diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java index b30a25414..ace780dd0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java @@ -31,6 +31,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -72,25 +73,28 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable { this.logger = logger; } - /** - * Must be overridden by concrete subclasses - * - * @param tracker rod data - * @param ref reference context - * @param contexts stratified alignment contexts - * @param contextType stratified context type - * @param priors priors to use for GLs - * @param alternateAlleleToUse the alternate allele to use, null if not set - * @param useBAQedPileup should we use the BAQed pileup or the raw one? - * @return variant context where genotypes are no-called but with GLs - */ - public abstract VariantContext getLikelihoods(RefMetaDataTracker tracker, - ReferenceContext ref, - Map contexts, - AlignmentContextUtils.ReadOrientation contextType, - GenotypePriors priors, - Allele alternateAlleleToUse, - boolean useBAQedPileup); + /** + * Can be overridden by concrete subclasses + * + * @param tracker rod data + * @param ref reference context + * @param contexts stratified alignment contexts + * @param contextType stratified context type + * @param priors priors to use for GLs + * @param alternateAlleleToUse the alternate allele to use, null if not set + * @param useBAQedPileup should we use the BAQed pileup or the raw one? + * @param locParser Genome Loc Parser + * @return variant context where genotypes are no-called but with GLs + */ + public abstract VariantContext getLikelihoods(RefMetaDataTracker tracker, + ReferenceContext ref, + Map contexts, + AlignmentContextUtils.ReadOrientation contextType, + GenotypePriors priors, + Allele alternateAlleleToUse, + boolean useBAQedPileup, + GenomeLocParser locParser); + protected int getFilteredDepth(ReadBackedPileup pileup) { int count = 0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index 8d279005b..0756caf03 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; @@ -54,17 +55,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood private final boolean getAlleleListFromVCF; private boolean DEBUG = false; - + private final boolean doMultiAllelicCalls; private boolean ignoreSNPAllelesWhenGenotypingIndels = false; - + private final int maxAlternateAlleles; private PairHMMIndelErrorModel pairModel; private static ThreadLocal>> indelLikelihoodMap = new ThreadLocal>>() { - protected synchronized HashMap> initialValue() { - return new HashMap>(); - } - }; + protected synchronized HashMap> initialValue() { + return new HashMap>(); + } + }; private LinkedHashMap haplotypeMap; @@ -87,6 +88,8 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood minIndelCountForGenotyping = UAC.MIN_INDEL_COUNT_FOR_GENOTYPING; HAPLOTYPE_SIZE = UAC.INDEL_HAPLOTYPE_SIZE; DEBUG = UAC.OUTPUT_DEBUG_INDEL_INFO; + maxAlternateAlleles = UAC.MAX_ALTERNATE_ALLELES; + doMultiAllelicCalls = UAC.MULTI_ALLELIC; haplotypeMap = new LinkedHashMap(); ignoreSNPAllelesWhenGenotypingIndels = UAC.IGNORE_SNP_ALLELES; @@ -95,7 +98,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood private ArrayList computeConsensusAlleles(ReferenceContext ref, Map contexts, - AlignmentContextUtils.ReadOrientation contextType) { + AlignmentContextUtils.ReadOrientation contextType, GenomeLocParser locParser) { Allele refAllele=null, altAllele=null; GenomeLoc loc = ref.getLocus(); ArrayList aList = new ArrayList(); @@ -114,7 +117,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood if (insCount < minIndelCountForGenotyping && delCount < minIndelCountForGenotyping) return aList; - + for ( Map.Entry sample : contexts.entrySet() ) { // todo -- warning, can be duplicating expensive partition here AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType); @@ -126,9 +129,9 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood for ( ExtendedEventPileupElement p : indelPileup.toExtendedIterable() ) { //SAMRecord read = p.getRead(); - GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead()); + GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead()); if (read == null) - continue; + continue; if(ReadUtils.is454Read(read)) { continue; } @@ -208,63 +211,69 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood } } -/* if (DEBUG) { - int icount = indelPileup.getNumberOfInsertions(); - int dcount = indelPileup.getNumberOfDeletions(); - if (icount + dcount > 0) - { - List> eventStrings = indelPileup.getEventStringsWithCounts(ref.getBases()); - System.out.format("#ins: %d, #del:%d\n", insCount, delCount); - - for (int i=0 ; i < eventStrings.size() ; i++ ) { - System.out.format("%s:%d,",eventStrings.get(i).first,eventStrings.get(i).second); - // int k=0; - } - System.out.println(); - } - } */ } + Collection vcs = new ArrayList(); int maxAlleleCnt = 0; String bestAltAllele = ""; + for (String s : consensusIndelStrings.keySet()) { - int curCnt = consensusIndelStrings.get(s); - if (curCnt > maxAlleleCnt) { - maxAlleleCnt = curCnt; - bestAltAllele = s; + int curCnt = consensusIndelStrings.get(s), stop = 0; + // if observed count if above minimum threshold, we will genotype this allele + if (curCnt < minIndelCountForGenotyping) + continue; + + if (s.startsWith("D")) { + // get deletion length + int dLen = Integer.valueOf(s.substring(1)); + // get ref bases of accurate deletion + int startIdxInReference = 1+loc.getStart()-ref.getWindow().getStart(); + stop = loc.getStart() + dLen; + byte[] refBases = Arrays.copyOfRange(ref.getBases(),startIdxInReference,startIdxInReference+dLen); + + if (Allele.acceptableAlleleBases(refBases)) { + refAllele = Allele.create(refBases,true); + altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false); + } + } + else { + // insertion case + if (Allele.acceptableAlleleBases(s)) { + refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true); + altAllele = Allele.create(s, false); + stop = loc.getStart(); + } } -// if (DEBUG) -// System.out.format("Key:%s, number: %d\n",s,consensusIndelStrings.get(s) ); - } //gdebug- - if (maxAlleleCnt < minIndelCountForGenotyping) - return aList; - if (bestAltAllele.startsWith("D")) { - // get deletion length - int dLen = Integer.valueOf(bestAltAllele.substring(1)); - // get ref bases of accurate deletion - int startIdxInReference = 1+loc.getStart()-ref.getWindow().getStart(); + ArrayList vcAlleles = new ArrayList(); + vcAlleles.add(refAllele); + vcAlleles.add(altAllele); - //System.out.println(new String(ref.getBases())); - byte[] refBases = Arrays.copyOfRange(ref.getBases(),startIdxInReference,startIdxInReference+dLen); + final VariantContextBuilder builder = new VariantContextBuilder().source(""); + builder.loc(loc.getContig(), loc.getStart(), stop); + builder.alleles(vcAlleles); + builder.referenceBaseForIndel(ref.getBase()); + builder.noGenotypes(); + if (doMultiAllelicCalls) + vcs.add(builder.make()); + else { + if (curCnt > maxAlleleCnt) { + maxAlleleCnt = curCnt; + vcs.clear(); + vcs.add(builder.make()); + } - if (Allele.acceptableAlleleBases(refBases)) { - refAllele = Allele.create(refBases,true); - altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false); } } - else { - // insertion case - if (Allele.acceptableAlleleBases(bestAltAllele)) { - refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true); - altAllele = Allele.create(bestAltAllele, false); - } - } - if (refAllele != null && altAllele != null) { - aList.add(0,refAllele); - aList.add(1,altAllele); - } + + if (vcs.isEmpty()) + return aList; // nothing else to do, no alleles passed minimum count criterion + + VariantContext mergedVC = VariantContextUtils.simpleMerge(locParser, vcs, null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, null, false, false); + + aList = new ArrayList(mergedVC.getAlleles()); + return aList; } @@ -277,7 +286,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood AlignmentContextUtils.ReadOrientation contextType, GenotypePriors priors, Allele alternateAlleleToUse, - boolean useBAQedPileup) { + boolean useBAQedPileup, GenomeLocParser locParser) { if ( tracker == null ) return null; @@ -294,17 +303,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood haplotypeMap.clear(); if (getAlleleListFromVCF) { - for( final VariantContext vc_input : tracker.getValues(UAC.alleles, loc) ) { - if( vc_input != null && - allowableTypes.contains(vc_input.getType()) && - ref.getLocus().getStart() == vc_input.getStart()) { - vc = vc_input; - break; - } - } - // ignore places where we don't have a variant - if ( vc == null ) - return null; + for( final VariantContext vc_input : tracker.getValues(UAC.alleles, loc) ) { + if( vc_input != null && + allowableTypes.contains(vc_input.getType()) && + ref.getLocus().getStart() == vc_input.getStart()) { + vc = vc_input; + break; + } + } + // ignore places where we don't have a variant + if ( vc == null ) + return null; alleleList.clear(); if (ignoreSNPAllelesWhenGenotypingIndels) { @@ -323,7 +332,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood } else { - alleleList = computeConsensusAlleles(ref,contexts, contextType); + alleleList = computeConsensusAlleles(ref,contexts, contextType, locParser); if (alleleList.isEmpty()) return null; } @@ -340,7 +349,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood if (alleleList.isEmpty()) return null; - + refAllele = alleleList.get(0); altAllele = alleleList.get(1); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index eee89674a..81c766e4d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -30,10 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.StingException; @@ -66,7 +63,8 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC final AlignmentContextUtils.ReadOrientation contextType, final GenotypePriors priors, final Allele alternateAlleleToUse, - final boolean useBAQedPileup) { + final boolean useBAQedPileup, + final GenomeLocParser locParser) { if ( !(priors instanceof DiploidSNPGenotypePriors) ) throw new StingException("Only diploid-based SNP priors are supported in the SNP GL model"); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 4639d67a7..16159393f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -109,7 +109,7 @@ public class UnifiedArgumentCollection { * For advanced users only. */ @Advanced - @Argument(fullName = "multiallelic", shortName = "multiallelic", doc = "Allow the discovery of multiple alleles (SNPs only)", required = false) + @Argument(fullName = "multiallelic", shortName = "multiallelic", doc = "Allow the discovery of multiple alleles", required = false) public boolean MULTI_ALLELIC = false; /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 34be88dbb..5d73e8d28 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -243,7 +243,7 @@ public class UnifiedGenotyperEngine { glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC)); } - return glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine); + return glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser); } private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, AlignmentContext rawContext) {