diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java index f6ce818be..4c20700ac 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyGenotypeLikelihoodsCalculationModel.java @@ -41,15 +41,6 @@ import java.util.*; public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel { - //protected Set laneIDs; - public enum Model { - SNP, - INDEL, - POOLSNP, - POOLINDEL, - BOTH - } - final protected UnifiedArgumentCollection UAC; protected GeneralPloidyGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) { @@ -203,7 +194,8 @@ public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends G final AlignmentContextUtils.ReadOrientation contextType, final List allAllelesToUse, final boolean useBAQedPileup, - final GenomeLocParser locParser) { + final GenomeLocParser locParser, + final Map perReadAlleleLikelihoodMap) { HashMap perLaneErrorModels = getPerLaneErrorModels(tracker, ref, contexts); if (perLaneErrorModels == null && UAC.referenceSampleName != null) @@ -215,8 +207,11 @@ public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends G newContext.put(DUMMY_SAMPLE_NAME,mergedContext); contexts = newContext; } - - // get initial alleles to genotype + if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) { + // starting a new site: clear allele list + perReadAlleleLikelihoodMap.clear(); // clean mapping sample-> per read, per allele likelihoods + } + // get initial alleles to genotype final List allAlleles = new ArrayList(); if (allAllelesToUse == null || allAllelesToUse.isEmpty()) allAlleles.addAll(getInitialAllelesToUse(tracker, ref,contexts,contextType,locParser, allAllelesToUse)); @@ -234,9 +229,13 @@ public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends G continue; ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup(); + if (!perReadAlleleLikelihoodMap.containsKey(sample.getKey())){ + // no likelihoods have been computed for this sample at this site + perReadAlleleLikelihoodMap.put(sample.getKey(), new PerReadAlleleLikelihoodMap()); + } // create the GenotypeLikelihoods object - final GeneralPloidyGenotypeLikelihoods GL = getPoolGenotypeLikelihoodObject(allAlleles, null, UAC.samplePloidy, perLaneErrorModels, useBAQedPileup, ref, UAC.IGNORE_LANE_INFO); + final GeneralPloidyGenotypeLikelihoods GL = getPoolGenotypeLikelihoodObject(allAlleles, null, UAC.samplePloidy, perLaneErrorModels, useBAQedPileup, ref, UAC.IGNORE_LANE_INFO, perReadAlleleLikelihoodMap.get(sample.getKey())); // actually compute likelihoods final int nGoodBases = GL.add(pileup, UAC); if ( nGoodBases > 0 ) @@ -333,7 +332,8 @@ public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends G final HashMap perLaneErrorModels, final boolean useBQAedPileup, final ReferenceContext ref, - final boolean ignoreLaneInformation); + final boolean ignoreLaneInformation, + final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap); protected abstract List getInitialAllelesToUse(final RefMetaDataTracker tracker, final ReferenceContext ref, diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java index 4f42f820e..e562bd265 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoods.java @@ -26,6 +26,7 @@ public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotype double[][] readHaplotypeLikelihoods; final byte refBase; + final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap; public GeneralPloidyIndelGenotypeLikelihoods(final List alleles, final double[] logLikelihoods, @@ -34,7 +35,8 @@ public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotype final boolean ignoreLaneInformation, final PairHMMIndelErrorModel pairModel, final LinkedHashMap haplotypeMap, - final ReferenceContext referenceContext) { + final ReferenceContext referenceContext, + final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap) { super(alleles, logLikelihoods, ploidy, perLaneErrorModels, ignoreLaneInformation); this.pairModel = pairModel; this.haplotypeMap = haplotypeMap; @@ -42,6 +44,7 @@ public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotype this.eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(alleles); // todo - not needed if indel alleles have base at current position this.refBase = referenceContext.getBase(); + this.perReadAlleleLikelihoodMap = perReadAlleleLikelihoodMap; } // ------------------------------------------------------------------------------------- @@ -142,10 +145,9 @@ public class GeneralPloidyIndelGenotypeLikelihoods extends GeneralPloidyGenotype List numSeenBases = new ArrayList(this.alleles.size()); if (!hasReferenceSampleData) { - final int numHaplotypes = haplotypeMap.size(); final int readCounts[] = new int[pileup.getNumberOfElements()]; - readHaplotypeLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, refContext, eventLength, IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(), readCounts); + readHaplotypeLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, refContext, eventLength, perReadAlleleLikelihoodMap, readCounts); n = readHaplotypeLikelihoods.length; } else { Allele refAllele = null; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoodsCalculationModel.java index f6559f666..fc0c526bc 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GeneralPloidyIndelGenotypeLikelihoodsCalculationModel.java @@ -73,8 +73,9 @@ public class GeneralPloidyIndelGenotypeLikelihoodsCalculationModel extends Gener final HashMap perLaneErrorModels, final boolean useBQAedPileup, final ReferenceContext ref, - final boolean ignoreLaneInformation){ - return new GeneralPloidyIndelGenotypeLikelihoods(alleles, logLikelihoods, ploidy,perLaneErrorModels,ignoreLaneInformation, pairModel, haplotypeMap, ref); + final boolean ignoreLaneInformation, + final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap){ + return new GeneralPloidyIndelGenotypeLikelihoods(alleles, logLikelihoods, ploidy,perLaneErrorModels,ignoreLaneInformation, pairModel, haplotypeMap, ref, perReadAlleleLikelihoodMap); } protected List getInitialAllelesToUse(final RefMetaDataTracker tracker, @@ -90,7 +91,6 @@ public class GeneralPloidyIndelGenotypeLikelihoodsCalculationModel extends Gener if (alleles.size() > MAX_NUM_ALLELES_TO_GENOTYPE) alleles = alleles.subList(0,MAX_NUM_ALLELES_TO_GENOTYPE); if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) { - IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap().clear(); haplotypeMap.clear(); } IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles(alleles, ref, ref.getLocus(), haplotypeMap); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index ec4fb3950..cde76ca7a 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; import com.google.java.contract.Ensures; import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.broadinstitute.sting.gatk.walkers.genotyper.*; import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.commandline.*; @@ -44,10 +45,6 @@ import org.broadinstitute.sting.gatk.walkers.PartitionBy; import org.broadinstitute.sting.gatk.walkers.PartitionType; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; -import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel; -import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; -import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; -import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.codecs.vcf.*; @@ -417,7 +414,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem : genotypingEngine.assignGenotypeLikelihoodsAndCallIndependentEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser(), activeAllelesToGenotype ) ) ) { if( DEBUG ) { System.out.println(callResult.getFirst().toStringWithoutGenotypes()); } - final Map>> stratifiedReadMap = LikelihoodCalculationEngine.partitionReadsBasedOnLikelihoods( getToolkit().getGenomeLocParser(), perSampleReadList, perSampleFilteredReadList, callResult ); + final Map stratifiedReadMap = LikelihoodCalculationEngine.partitionReadsBasedOnLikelihoods( getToolkit().getGenomeLocParser(), perSampleReadList, perSampleFilteredReadList, callResult ); final VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, callResult.getFirst()); // add some custom annotations to the calls diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index fabf5633f..9ba434100 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -323,11 +324,13 @@ public class LikelihoodCalculationEngine { return bestHaplotypes; } - public static Map>> partitionReadsBasedOnLikelihoods( final GenomeLocParser parser, final HashMap> perSampleReadList, final HashMap> perSampleFilteredReadList, final Pair>> call) { - final Map>> returnMap = new HashMap>>(); + public static Map partitionReadsBasedOnLikelihoods( final GenomeLocParser parser, final HashMap> perSampleReadList, final HashMap> perSampleFilteredReadList, final Pair>> call) { + final Map returnMap = new HashMap(); final GenomeLoc callLoc = parser.createGenomeLoc(call.getFirst()); for( final Map.Entry> sample : perSampleReadList.entrySet() ) { - final Map> alleleReadMap = new HashMap>(); + //final Map> alleleReadMap = new HashMap>(); + final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap(); + final ArrayList readsForThisSample = sample.getValue(); for( int iii = 0; iii < readsForThisSample.size(); iii++ ) { final GATKSAMRecord read = readsForThisSample.get(iii); // BUGBUG: assumes read order in this list and haplotype likelihood list are the same! @@ -335,51 +338,31 @@ public class LikelihoodCalculationEngine { if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) { final double likelihoods[] = new double[call.getFirst().getAlleles().size()]; int count = 0; - for( final Allele a : call.getFirst().getAlleles() ) { // find the allele with the highest haplotype likelihood - double maxLikelihood = Double.NEGATIVE_INFINITY; + + for( final Allele a : call.getFirst().getAlleles() ) { for( final Haplotype h : call.getSecond().get(a) ) { // use the max likelihood from all the haplotypes which mapped to this allele (achieved via the haplotype mapper object) final double likelihood = h.getReadLikelihoods(sample.getKey())[iii]; - if( likelihood > maxLikelihood ) { - maxLikelihood = likelihood; - } - } - likelihoods[count++] = maxLikelihood; - } - final int bestAllele = MathUtils.maxElementIndex(likelihoods); - final double bestLikelihood = likelihoods[bestAllele]; - Allele allele = Allele.NO_CALL; - boolean isInformativeRead = false; - for( final double likelihood : likelihoods ) { - if( bestLikelihood - likelihood > BEST_LIKELIHOOD_THRESHOLD ) { - isInformativeRead = true; - break; + likelihoodMap.add(read, a, likelihood); } } - // uninformative reads get the no call Allele - if( isInformativeRead ) { - allele = call.getFirst().getAlleles().get(bestAllele); - } - List readList = alleleReadMap.get(allele); - if( readList == null ) { - readList = new ArrayList(); - alleleReadMap.put(allele, readList); - } - readList.add(read); } } - // add all filtered reads to the NO_CALL list because they weren't given any likelihoods +/* // add all filtered reads to the NO_CALL list because they weren't given any likelihoods List readList = alleleReadMap.get(Allele.NO_CALL); if( readList == null ) { readList = new ArrayList(); alleleReadMap.put(Allele.NO_CALL, readList); } - for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample.getKey()) ) { + */ + /* for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample.getKey()) ) { // only count the read if it overlaps the event, otherwise it is not added to the output read list at all if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) { readList.add(read); } } - returnMap.put(sample.getKey(), alleleReadMap); + */ + returnMap.put(sample.getKey(), likelihoodMap); + } return returnMap; } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java index 56cb6c3d4..71aee44b8 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java @@ -201,7 +201,8 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { // compute mean number of reduced read counts in current kmer span final byte[] counts = Arrays.copyOfRange(reducedReadCounts,iii,iii+KMER_LENGTH+1); // precise rounding can make a difference with low consensus counts - countNumber = (int)Math.round((double)MathUtils.sum(counts)/counts.length); + countNumber = MathUtils.arrayMax(counts); + // countNumber = (int)Math.round((double)MathUtils.sum(counts)/counts.length); } if( !badKmer ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java index 30f81b20c..a68f0df21 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java @@ -30,6 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; @@ -51,7 +52,12 @@ public class AlleleBalance extends InfoFieldAnnotation { char[] BASES = {'A','C','G','T'}; - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc, + final Map stratifiedPerReadAlleleLikelihoodMap) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java index c3b6de65a..3cbca4f52 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java @@ -36,6 +36,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; @@ -52,7 +53,12 @@ import java.util.Map; */ public class BaseCounts extends InfoFieldAnnotation { - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc, + final Map stratifiedPerReadAlleleLikelihoodMap) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java index bd884892c..3f1eaa139 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java @@ -2,6 +2,8 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; @@ -21,66 +23,40 @@ public class BaseQualityRankSumTest extends RankSumTest implements StandardAnnot public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("BaseQRankSum", 1, VCFHeaderLineType.Float, "Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities")); } - protected void fillQualsFromPileup(byte ref, List alts, ReadBackedPileup pileup, List refQuals, List altQuals) { - for ( final PileupElement p : pileup ) { - if( isUsableBase(p) ) { - if ( p.getBase() == ref ) - refQuals.add((double)p.getQual()); - else if ( alts.contains(p.getBase()) ) - altQuals.add((double)p.getQual()); - } - } - } - protected void fillQualsFromPileup(final Allele ref, final List alts, final int refLoc, final Map> stratifiedContext, final List refQuals, final List altQuals) { - // TODO -- implement me; how do we pull out the correct offset from the read? - return; - -/* - for ( final Map.Entry> alleleBin : stratifiedContext.entrySet() ) { - final boolean matchesRef = ref.equals(alleleBin.getKey()); - final boolean matchesAlt = alts.contains(alleleBin.getKey()); - if ( !matchesRef && !matchesAlt ) - continue; - - for ( final GATKSAMRecord read : alleleBin.getValue() ) { + protected void fillQualsFromPileup(final List allAlleles, final int refLoc, + final ReadBackedPileup pileup, + final PerReadAlleleLikelihoodMap alleleLikelihoodMap, + final List refQuals, final List altQuals){ + if (alleleLikelihoodMap == null) { + // use fast SNP-based version if we don't have per-read allele likelihoods + for ( final PileupElement p : pileup ) { if ( isUsableBase(p) ) { - if ( matchesRef ) + if ( allAlleles.get(0).equals(Allele.create(p.getBase())) ) { refQuals.add((double)p.getQual()); - else + } else if ( allAlleles.contains(Allele.create(p.getBase()))) { altQuals.add((double)p.getQual()); - } - } - } -*/ - } - - protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List refQuals, List altQuals) { - // equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele ? - HashMap> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(); - for (final PileupElement p: pileup) { - if (indelLikelihoodMap.containsKey(p)) { - // retrieve likelihood information corresponding to this read - LinkedHashMap el = indelLikelihoodMap.get(p); - // by design, first element in LinkedHashMap was ref allele - double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY; - - for (Map.Entry entry : el.entrySet()) { - - if (entry.getKey().isReference()) - refLikelihood = entry.getValue(); - else { - double like = entry.getValue(); - if (like >= altLikelihood) - altLikelihood = like; } } - if (refLikelihood > altLikelihood + INDEL_LIKELIHOOD_THRESH) - refQuals.add(-10.0*refLikelihood); - else if (altLikelihood > refLikelihood + INDEL_LIKELIHOOD_THRESH) - altQuals.add(-10.0*altLikelihood); } + return; + } + + for (Map.Entry> el : alleleLikelihoodMap.getLikelihoodReadMap().entrySet()) { + if (!isUsableBase(el.getKey())) + continue; + + final Allele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue()); + if (a.isNoCall()) + continue; // read is non-informative + if (a.isReference()) + refQuals.add(-10.0*(double)el.getValue().get(a)); + else if (allAlleles.contains(a)) + altQuals.add(-10.0*(double)el.getValue().get(a)); + + } } + } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java index 54837baad..4ae1a0bba 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java @@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; @@ -61,7 +62,12 @@ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnn private Set founderIds = new HashSet(); - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc, + final Map perReadAlleleLikelihoodMap ) { if ( ! vc.hasGenotypes() ) return null; @@ -73,13 +79,6 @@ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnn founderIds = ((Walker)walker).getSampleDB().getFounderIds(); } - public Map annotate(Map>> stratifiedContexts, VariantContext vc) { - if ( ! vc.hasGenotypes() ) - return null; - - return VariantContextUtils.calculateChromosomeCounts(vc, new HashMap(), true); - } - public List getKeyNames() { return Arrays.asList(keyNames); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java index f41a40621..fdbbf6732 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ClippingRankSumTest.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; @@ -24,68 +25,26 @@ public class ClippingRankSumTest extends RankSumTest { public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("ClippingRankSum", 1, VCFHeaderLineType.Float, "Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases")); } - protected void fillQualsFromPileup(byte ref, List alts, ReadBackedPileup pileup, List refQuals, List altQuals) { - return; - // This working implementation below needs to be tested for the UG pipeline - /* - for ( final PileupElement p : pileup ) { - if ( isUsableBase(p) ) { - if ( p.getBase() == ref ) { - refQuals.add((double)AlignmentUtils.getNumHardClippedBases(p.getRead())); - } else if ( alts.contains(p.getBase()) ) { - altQuals.add((double)AlignmentUtils.getNumHardClippedBases(p.getRead())); - } - } - } - */ - } - protected void fillQualsFromPileup(final Allele ref, final List alts, final int refLoc, final Map> stratifiedContext, final List refQuals, final List altQuals) { - for ( final Map.Entry> alleleBin : stratifiedContext.entrySet() ) { - final boolean matchesRef = ref.equals(alleleBin.getKey()); - final boolean matchesAlt = alts.contains(alleleBin.getKey()); - if ( !matchesRef && !matchesAlt ) - continue; + protected void fillQualsFromPileup(final List allAlleles, + final int refLoc, + final ReadBackedPileup pileup, + final PerReadAlleleLikelihoodMap likelihoodMap, final List refQuals, final List altQuals) { + // todo - only support non-pileup case for now, e.g. active-region based version + if (pileup != null) + return; + + for (Map.Entry> el : likelihoodMap.getLikelihoodReadMap().entrySet()) { + + final Allele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue()); + if (a.isNoCall()) + continue; // read is non-informative + if (a.isReference()) + refQuals.add((double)AlignmentUtils.getNumHardClippedBases(el.getKey().getRead())); + else if (allAlleles.contains(a)) + altQuals.add((double)AlignmentUtils.getNumHardClippedBases(el.getKey().getRead())); - for ( final GATKSAMRecord read : alleleBin.getValue() ) { - if ( matchesRef ) - refQuals.add((double)AlignmentUtils.getNumHardClippedBases(read)); - else - altQuals.add((double)AlignmentUtils.getNumHardClippedBases(read)); - } } } - protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List refQuals, List altQuals) { - return; - // This working implementation below needs to be tested for the UG pipeline - - /* - // equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele ? - HashMap> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(); - for (final PileupElement p: pileup) { - if (indelLikelihoodMap.containsKey(p) && p.getMappingQual() != 0 && p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE) { - // retrieve likelihood information corresponding to this read - LinkedHashMap el = indelLikelihoodMap.get(p); - // by design, first element in LinkedHashMap was ref allele - double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY; - - for (Allele a : el.keySet()) { - - if (a.isReference()) - refLikelihood =el.get(a); - else { - double like = el.get(a); - if (like >= altLikelihood) - altLikelihood = like; - } - } - if (refLikelihood > altLikelihood + INDEL_LIKELIHOOD_THRESH) - refQuals.add((double)AlignmentUtils.getNumHardClippedBases(p.getRead())); - else if (altLikelihood > refLikelihood + INDEL_LIKELIHOOD_THRESH) - altQuals.add((double)AlignmentUtils.getNumHardClippedBases(p.getRead())); - } - } - */ - } -} + } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java index 28ca77f18..8f67414fa 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java @@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines; @@ -38,28 +39,30 @@ import java.util.Map; */ public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation { - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { - if ( stratifiedContexts.size() == 0 ) - return null; + public Map annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc, + final Map perReadAlleleLikelihoodMap ) { int depth = 0; - for ( Map.Entry sample : stratifiedContexts.entrySet() ) - depth += sample.getValue().getBasePileup().depthOfCoverage(); - Map map = new HashMap(); - map.put(getKeyNames().get(0), String.format("%d", depth)); - return map; - } + if (stratifiedContexts != null) { + if ( stratifiedContexts.size() == 0 ) + return null; - public Map annotate(Map>> stratifiedContexts, VariantContext vc) { - if ( stratifiedContexts.size() == 0 ) - return null; - - int depth = 0; - for ( final Map> alleleBins : stratifiedContexts.values() ) { - for ( final List alleleBin : alleleBins.values() ) { - depth += alleleBin.size(); - } + for ( Map.Entry sample : stratifiedContexts.entrySet() ) + depth += sample.getValue().getBasePileup().depthOfCoverage(); } + else if (perReadAlleleLikelihoodMap != null) { + if ( perReadAlleleLikelihoodMap.size() == 0 ) + return null; + + for ( Map.Entry sample : perReadAlleleLikelihoodMap.entrySet() ) + depth += sample.getValue().getLikelihoodReadMap().size(); + } + else + return null; Map map = new HashMap(); map.put(getKeyNames().get(0), String.format("%d", depth)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java index a9edab752..cd8faf093 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java @@ -42,7 +42,13 @@ import java.util.List; */ public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation { - public void annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g, GenotypeBuilder gb) { + public void annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final AlignmentContext stratifiedContext, + final VariantContext vc, + final Genotype g, + final GenotypeBuilder gb) { if ( g == null || !g.isCalled() ) return; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java index 131670599..610d5e7b0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java @@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; @@ -54,21 +55,29 @@ import java.util.*; public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation { private static final String FS = "FS"; private static final double MIN_PVALUE = 1E-320; - - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc, + final Map stratifiedPerReadAlleleLikelihoodMap) { if ( !vc.isVariant() ) return null; int[][] table; - if ( vc.isSNP() ) + if (stratifiedPerReadAlleleLikelihoodMap != null) { + table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount()); + } + else if (vc.isSNP() && stratifiedContexts != null) { table = getSNPContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount()); - else if ( vc.isIndel() || vc.isMixed() ) { - table = getIndelContingencyTable(stratifiedContexts); - if (table == null) - return null; } else + // for non-snp variants, we need per-read likelihoods. + // for snps, we can get same result from simple pileup + return null; + + if (table == null) return null; Double pvalue = Math.max(pValueForContingencyTable(table), MIN_PVALUE); @@ -80,22 +89,6 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat return map; } - public Map annotate(Map>> stratifiedContexts, VariantContext vc) { - if ( !vc.isVariant() ) - return null; - - final int[][] table = getContingencyTable(stratifiedContexts, vc.getReference(), vc.getAltAlleleWithHighestAlleleCount()); - - final Double pvalue = Math.max(pValueForContingencyTable(table), MIN_PVALUE); - if ( pvalue == null ) - return null; - - final Map map = new HashMap(); - map.put(FS, String.format("%.3f", QualityUtils.phredScaleErrorRate(pvalue))); - return map; - - } - public List getKeyNames() { return Arrays.asList(FS); } @@ -161,7 +154,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat table[0][1] += 1; table[1][1] -= 1; - return (table[0][0] >= 0 && table[1][1] >= 0) ? true : false; + return (table[0][0] >= 0 && table[1][1] >= 0); } private static boolean unrotateTable(int[][] table) { @@ -171,7 +164,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat table[0][1] -= 1; table[1][1] += 1; - return (table[0][1] >= 0 && table[1][0] >= 0) ? true : false; + return (table[0][1] >= 0 && table[1][0] >= 0); } private static double computePValue(int[][] table) { @@ -218,31 +211,29 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat * allele2 # # * @return a 2x2 contingency table */ - private static int[][] getContingencyTable(Map>> stratifiedContexts, Allele ref, Allele alt) { + private static int[][] getContingencyTable( final Map stratifiedPerReadAlleleLikelihoodMap, + final Allele ref, final Allele alt) { int[][] table = new int[2][2]; - for ( final Map> alleleBins : stratifiedContexts.values() ) { - for ( final Map.Entry> alleleBin : alleleBins.entrySet() ) { + for (PerReadAlleleLikelihoodMap maps : stratifiedPerReadAlleleLikelihoodMap.values() ) { + for (Map.Entry> el : maps.getLikelihoodReadMap().entrySet()) { + final boolean matchesRef = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue()).equals(ref); + final boolean matchesAlt = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue()).equals(alt); - final boolean matchesRef = ref.equals(alleleBin.getKey()); - final boolean matchesAlt = alt.equals(alleleBin.getKey()); if ( !matchesRef && !matchesAlt ) continue; - for ( final GATKSAMRecord read : alleleBin.getValue() ) { - boolean isFW = read.getReadNegativeStrandFlag(); + boolean isFW = el.getKey().getRead().getReadNegativeStrandFlag(); - int row = matchesRef ? 0 : 1; - int column = isFW ? 0 : 1; + int row = matchesRef ? 0 : 1; + int column = isFW ? 0 : 1; - table[row][column]++; - } + table[row][column]++; } } return table; } - /** Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this: * fw rc @@ -275,69 +266,5 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat return table; } - /** - Allocate and fill a 2x2 strand contingency table. In the end, it'll look something like this: - * fw rc - * allele1 # # - * allele2 # # - * @return a 2x2 contingency table - */ - private static int[][] getIndelContingencyTable(Map stratifiedContexts) { - final double INDEL_LIKELIHOOD_THRESH = 0.3; - final HashMap> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(); - if (indelLikelihoodMap == null) - return null; - - int[][] table = new int[2][2]; - - for ( Map.Entry sample : stratifiedContexts.entrySet() ) { - final AlignmentContext context = sample.getValue(); - if ( context == null ) - continue; - - final ReadBackedPileup pileup = context.getBasePileup(); - for ( final PileupElement p : pileup ) { - if ( ! RankSumTest.isUsableBase(p, true) || p.getRead().isReducedRead() ) // ignore reduced reads - continue; - if ( indelLikelihoodMap.containsKey(p) ) { - // to classify a pileup element as ref or alt, we look at the likelihood associated with the allele associated to this element. - // A pileup element then has a list of pairs of form (Allele, likelihood of this allele). - // To classify a pileup element as Ref or Alt, we look at the likelihood of corresponding alleles. - // If likelihood of ref allele > highest likelihood of all alt alleles + epsilon, then this pileup element is "ref" - // otherwise if highest alt allele likelihood is > ref likelihood + epsilon, then this pileup element it "alt" - // retrieve likelihood information corresponding to this read - LinkedHashMap el = indelLikelihoodMap.get(p); - // by design, first element in LinkedHashMap was ref allele - boolean isFW = !p.getRead().getReadNegativeStrandFlag(); - - double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY; - - for (Map.Entry entry : el.entrySet()) { - - if (entry.getKey().isReference()) - refLikelihood = entry.getValue(); - else { - double like = entry.getValue(); - if (like >= altLikelihood) - altLikelihood = like; - } - } - - boolean matchesRef = (refLikelihood > (altLikelihood + INDEL_LIKELIHOOD_THRESH)); - boolean matchesAlt = (altLikelihood > (refLikelihood + INDEL_LIKELIHOOD_THRESH)); - if ( matchesRef || matchesAlt ) { - int row = matchesRef ? 0 : 1; - int column = isFW ? 0 : 1; - - table[row][column]++; - } - - - } - } - } - - return table; - } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java index fba30b3f7..3fe5c5837 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java @@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; @@ -25,7 +26,12 @@ import java.util.Map; @DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation { - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc, + final Map stratifiedPerReadAlleleLikelihoodMap) { double content = computeGCContent(ref); Map map = new HashMap(); map.put(getKeyNames().get(0), String.format("%.2f", content)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java index c6d8883c5..e01c51f4b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java @@ -32,6 +32,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.MathUtils; @@ -60,7 +61,12 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 50; private final static char REGEXP_WILDCARD = '.'; - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc, + final Map stratifiedPerReadAlleleLikelihoodMap) { if (stratifiedContexts.size() == 0) // size 0 means that call was made by someone else and we have no data here return null; @@ -88,7 +94,9 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot if (vc.isSNP()) scoreRA.add(scoreReadsAgainstHaplotypes(haplotypes, thisPileup, contextSize, locus)); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense else if (vc.isIndel() || vc.isMixed()) { - Double d = scoreIndelsAgainstHaplotypes(thisPileup); + if (stratifiedPerReadAlleleLikelihoodMap == null) + return null; + Double d = scoreIndelsAgainstHaplotypes(stratifiedPerReadAlleleLikelihoodMap.get(genotype.getSampleName())); if (d == null) return null; scoreRA.add(d); // Taking the simple average of all sample's score since the score can be negative and the RMS doesn't make sense @@ -177,7 +185,6 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot private Haplotype getHaplotypeFromRead(final PileupElement p, final int contextSize, final int locus) { final GATKSAMRecord read = p.getRead(); - int readOffsetFromPileup = p.getOffset(); final byte[] haplotypeBases = new byte[contextSize]; Arrays.fill(haplotypeBases, (byte) REGEXP_WILDCARD); @@ -189,7 +196,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot byte[] readQuals = read.getBaseQualities(); readQuals = AlignmentUtils.readToAlignmentByteArray(read.getCigar(), readQuals); // Shift the location of the qual scores based on the Cigar string - readOffsetFromPileup = AlignmentUtils.calcAlignmentByteArrayOffset(read.getCigar(), p, read.getAlignmentStart(), locus); + final int readOffsetFromPileup = AlignmentUtils.calcAlignmentByteArrayOffset(read.getCigar(), p, read.getAlignmentStart(), locus); final int baseOffsetStart = readOffsetFromPileup - (contextSize - 1) / 2; for (int i = 0; i < contextSize; i++) { @@ -346,31 +353,26 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot } - private Double scoreIndelsAgainstHaplotypes(final ReadBackedPileup pileup) { + private Double scoreIndelsAgainstHaplotypes(final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap) { final ArrayList haplotypeScores = new ArrayList(); - final HashMap> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(); - - if (indelLikelihoodMap == null) + if (perReadAlleleLikelihoodMap.isEmpty()) return null; - for (final PileupElement p : pileup) { - if (indelLikelihoodMap.containsKey(p)) { - // retrieve likelihood information corresponding to this read - LinkedHashMap el = indelLikelihoodMap.get(p); + for (Map.Entry> el : perReadAlleleLikelihoodMap.getLikelihoodReadMap().entrySet()) { - // Score all the reads in the pileup, even the filtered ones - final double[] scores = new double[el.size()]; - int i = 0; - for (Map.Entry a : el.entrySet()) { - scores[i++] = -a.getValue(); - if (DEBUG) { - System.out.printf(" vs. haplotype %d = %f%n", i - 1, scores[i - 1]); - } + // retrieve likelihood information corresponding to this read + // Score all the reads in the pileup, even the filtered ones + final double[] scores = new double[el.getValue().size()]; + int i = 0; + for (Map.Entry a : el.getValue().entrySet()) { + scores[i++] = -a.getValue(); + if (DEBUG) { + System.out.printf(" vs. haplotype %d = %f%n", i - 1, scores[i - 1]); } - - haplotypeScores.add(scores); } + + haplotypeScores.add(scores); } // indel likelihoods are strict log-probs, not phred scored diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java index 6ba85de07..06fa04526 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java @@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.WorkInProgressAnnotation; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; @@ -29,7 +30,12 @@ public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgress private static final int MIN_GENOTYPE_QUALITY = 10; private static final int MIN_LOG10_PERROR = MIN_GENOTYPE_QUALITY / 10; - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc, + final Map stratifiedPerReadAlleleLikelihoodMap) { final GenotypesContext genotypes = vc.getGenotypes(); if ( genotypes == null || genotypes.size() < MIN_SAMPLES ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java index 9f20bf375..5891cbc69 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java @@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; @@ -22,7 +23,12 @@ public class HomopolymerRun extends InfoFieldAnnotation { private boolean ANNOTATE_INDELS = true; - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc, + final Map stratifiedPerReadAlleleLikelihoodMap) { if ( !vc.isBiallelic() ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java index 715895526..64be64afa 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java @@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; @@ -33,17 +34,18 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno private static final int MIN_SAMPLES = 10; private Set founderIds; - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc, + final Map perReadAlleleLikelihoodMap ) { //If available, get the founder IDs and cache them. the IC will only be computed on founders then. - if(founderIds == null) + if(founderIds == null && walker != null) founderIds = ((Walker)walker).getSampleDB().getFounderIds(); return calculateIC(vc); } - public Map annotate(Map>> stratifiedContexts, VariantContext vc) { - return calculateIC(vc); - } - private Map calculateIC(final VariantContext vc) { final GenotypesContext genotypes = (founderIds == null || founderIds.isEmpty()) ? vc.getGenotypes() : vc.getGenotypes(founderIds); if ( genotypes == null || genotypes.size() < MIN_SAMPLES ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java index babaf7ee6..5f405cb46 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java @@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.IndelUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; @@ -18,9 +19,14 @@ import java.util.*; */ public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnotation { - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc, + final Map stratifiedPerReadAlleleLikelihoodMap) { - int run; + int run; if (vc.isMixed()) { Map map = new HashMap(); map.put(getKeyNames().get(0), String.format("%s", "MIXED")); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java index 7f5033adf..4be601bc8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java @@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; @@ -21,7 +22,12 @@ import java.util.Map; */ public class LowMQ extends InfoFieldAnnotation { - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc, + final Map stratifiedPerReadAlleleLikelihoodMap) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java index b6f24433e..3136a696d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java @@ -10,6 +10,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; @@ -32,7 +33,12 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment private String fatherId; private String childId; - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc, + final Map stratifiedPerReadAlleleLikelihoodMap) { if ( mendelianViolation == null ) { if (checkAndSetSamples(((Walker) walker).getSampleDB())) { mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP ); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java index 31067e386..ef0c8ab4f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java @@ -2,11 +2,13 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.variantcontext.Allele; @@ -23,60 +25,39 @@ public class MappingQualityRankSumTest extends RankSumTest implements StandardAn public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MQRankSum", 1, VCFHeaderLineType.Float, "Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities")); } - protected void fillQualsFromPileup(byte ref, List alts, ReadBackedPileup pileup, List refQuals, List altQuals) { - for ( final PileupElement p : pileup ) { - if ( isUsableBase(p) ) { - if ( p.getBase() == ref ) { - refQuals.add((double)p.getMappingQual()); - } else if ( alts.contains(p.getBase()) ) { - altQuals.add((double)p.getMappingQual()); - } - } - } - } + protected void fillQualsFromPileup(final List allAlleles, + final int refLoc, + final ReadBackedPileup pileup, + final PerReadAlleleLikelihoodMap likelihoodMap, + final List refQuals, final List altQuals) { - protected void fillQualsFromPileup(final Allele ref, final List alts, final int refLoc, final Map> stratifiedContext, final List refQuals, final List altQuals) { - for ( final Map.Entry> alleleBin : stratifiedContext.entrySet() ) { - final boolean matchesRef = ref.equals(alleleBin.getKey()); - final boolean matchesAlt = alts.contains(alleleBin.getKey()); - if ( !matchesRef && !matchesAlt ) - continue; - - for ( final GATKSAMRecord read : alleleBin.getValue() ) { - if ( matchesRef ) - refQuals.add((double)read.getMappingQuality()); - else - altQuals.add((double)read.getMappingQuality()); - } - } - } - - protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List refQuals, List altQuals) { - // equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele ? - HashMap> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(); - for (final PileupElement p: pileup) { - if (indelLikelihoodMap.containsKey(p) && p.getMappingQual() != 0 && p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE) { - // retrieve likelihood information corresponding to this read - LinkedHashMap el = indelLikelihoodMap.get(p); - // by design, first element in LinkedHashMap was ref allele - double refLikelihood=0.0, altLikelihood=Double.NEGATIVE_INFINITY; - - for (Map.Entry a : el.entrySet()) { - - if (a.getKey().isReference()) - refLikelihood = a.getValue(); - else { - double like = a.getValue(); - if (like >= altLikelihood) - altLikelihood = like; + if (pileup != null && likelihoodMap == null) { + // no per-read likelihoods available: + for ( final PileupElement p : pileup ) { + if ( isUsableBase(p) ) { + if ( allAlleles.get(0).equals(Allele.create(p.getBase())) ) { + refQuals.add((double)p.getMappingQual()); + } else if ( allAlleles.contains(Allele.create(p.getBase()))) { + altQuals.add((double)p.getMappingQual()); } } - if (refLikelihood > altLikelihood + INDEL_LIKELIHOOD_THRESH) - refQuals.add((double)p.getMappingQual()); - else if (altLikelihood > refLikelihood + INDEL_LIKELIHOOD_THRESH) - altQuals.add((double)p.getMappingQual()); } + return; + } + for (Map.Entry> el : likelihoodMap.getLikelihoodReadMap().entrySet()) { + if (!isUsableBase(el.getKey())) + continue; + + final Allele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue()); + if (a.isNoCall()) + continue; // read is non-informative + if (a.isReference()) + refQuals.add((double)el.getKey().getMappingQual()); + else if (allAlleles.contains(a)) + altQuals.add((double)el.getKey().getMappingQual()); + + } } - -} \ No newline at end of file + + } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java index 372d5bc9e..c3cb01c23 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java @@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFStandardHeaderLines; @@ -24,7 +25,12 @@ import java.util.Map; */ public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation { - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc, + final Map stratifiedPerReadAlleleLikelihoodMap) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java index 9f542e3bd..21ee66ea2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java @@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; @@ -22,7 +23,12 @@ import java.util.Map; */ public class MappingQualityZeroFraction extends InfoFieldAnnotation implements ExperimentalAnnotation { - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc, + final Map stratifiedPerReadAlleleLikelihoodMap) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java index ba4303b4a..8e4edaf0e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java @@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; @@ -20,7 +21,12 @@ import java.util.Map; * The number of N bases, counting only SOLiD data */ public class NBaseCount extends InfoFieldAnnotation { - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc, + final Map stratifiedPerReadAlleleLikelihoodMap) { if( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java index b62cd374b..f94d51bc8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java @@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -28,14 +29,24 @@ import java.util.Map; */ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation { - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { - if ( !vc.hasLog10PError() || stratifiedContexts.size() == 0 ) + public Map annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc, + final Map perReadAlleleLikelihoodMap ) { + if ( !vc.hasLog10PError() ) return null; final GenotypesContext genotypes = vc.getGenotypes(); if ( genotypes == null || genotypes.size() == 0 ) return null; + if (stratifiedContexts != null && stratifiedContexts.size() == 0) + return null; + if (perReadAlleleLikelihoodMap != null && perReadAlleleLikelihoodMap.size() == 0) + return null; + int depth = 0; for ( final Genotype genotype : genotypes ) { @@ -44,11 +55,20 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati if ( !genotype.isHet() && !genotype.isHomVar() ) continue; - AlignmentContext context = stratifiedContexts.get(genotype.getSampleName()); - if ( context == null ) - continue; + if (stratifiedContexts!= null) { + AlignmentContext context = stratifiedContexts.get(genotype.getSampleName()); + if ( context == null ) + continue; + depth += context.getBasePileup().depthOfCoverage(); - depth += context.getBasePileup().depthOfCoverage(); + } + else if (perReadAlleleLikelihoodMap != null) { + PerReadAlleleLikelihoodMap perReadAlleleLikelihoods = perReadAlleleLikelihoodMap.get(genotype.getSampleName()); + if (perReadAlleleLikelihoods == null || perReadAlleleLikelihoods.isEmpty()) + continue; + + depth += perReadAlleleLikelihoods.getLikelihoodReadMap().size(); + } } if ( depth == 0 ) @@ -67,39 +87,5 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Variant Confidence/Quality by Depth")); } - public Map annotate(Map>> stratifiedContexts, VariantContext vc) { - if ( stratifiedContexts.size() == 0 ) - return null; - - final GenotypesContext genotypes = vc.getGenotypes(); - if ( genotypes == null || genotypes.size() == 0 ) - return null; - - int depth = 0; - - for ( final Genotype genotype : genotypes ) { - - // we care only about variant calls with likelihoods - if ( !genotype.isHet() && !genotype.isHomVar() ) - continue; - - final Map> alleleBins = stratifiedContexts.get(genotype.getSampleName()); - if ( alleleBins == null ) - continue; - - for ( final Map.Entry> alleleBin : alleleBins.entrySet() ) { - depth += alleleBin.getValue().size(); - } - } - - if ( depth == 0 ) - return null; - - double QD = -10.0 * vc.getLog10PError() / (double)depth; - - Map map = new HashMap(); - map.put(getKeyNames().get(0), String.format("%.2f", QD)); - return map; - } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java index 842fde8ad..21b91b4b2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java @@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; @@ -18,10 +19,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; /** @@ -29,25 +27,48 @@ import java.util.Map; */ public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation { - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { - if ( stratifiedContexts.size() == 0 ) - return null; + public Map annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc, + final Map perReadAlleleLikelihoodMap ) { + int totalSize = 0, index = 0; + int qualities[]; + if (stratifiedContexts != null) { + if ( stratifiedContexts.size() == 0 ) + return null; - int totalSize = 0; - for ( AlignmentContext context : stratifiedContexts.values() ) - totalSize += context.size(); + for ( AlignmentContext context : stratifiedContexts.values() ) + totalSize += context.size(); - final int[] qualities = new int[totalSize]; - int index = 0; + qualities = new int[totalSize]; - for ( Map.Entry sample : stratifiedContexts.entrySet() ) { - AlignmentContext context = sample.getValue(); - final ReadBackedPileup pileup = context.getBasePileup(); - for (PileupElement p : pileup ) { - if ( p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE ) - qualities[index++] = p.getMappingQual(); + for ( Map.Entry sample : stratifiedContexts.entrySet() ) { + AlignmentContext context = sample.getValue(); + for (PileupElement p : context.getBasePileup() ) + index = fillMappingQualitiesFromPileupAndUpdateIndex(p, index, qualities); } } + else if (perReadAlleleLikelihoodMap != null) { + if ( perReadAlleleLikelihoodMap.size() == 0 ) + return null; + + for ( PerReadAlleleLikelihoodMap perReadLikelihoods : perReadAlleleLikelihoodMap.values() ) + totalSize += perReadLikelihoods.size(); + + qualities = new int[totalSize]; + for ( PerReadAlleleLikelihoodMap perReadLikelihoods : perReadAlleleLikelihoodMap.values() ) { + for (PileupElement p : perReadLikelihoods.getStoredPileupElements()) + index = fillMappingQualitiesFromPileupAndUpdateIndex(p, index, qualities); + + + } + } + else + return null; + + double rms = MathUtils.rms(qualities); Map map = new HashMap(); @@ -55,32 +76,12 @@ public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAn return map; } - public Map annotate(Map>> stratifiedContexts, VariantContext vc) { - if ( stratifiedContexts.size() == 0 ) - return null; + private static int fillMappingQualitiesFromPileupAndUpdateIndex(final PileupElement p, final int inputIdx, final int[] qualities) { + int outputIdx = inputIdx; + if ( p.getMappingQual() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE ) + qualities[outputIdx++] = p.getMappingQual(); - int depth = 0; - for ( final Map> alleleBins : stratifiedContexts.values() ) { - for ( final Map.Entry> alleleBin : alleleBins.entrySet() ) { - depth += alleleBin.getValue().size(); - } - } - - final int[] qualities = new int[depth]; - int index = 0; - - for ( final Map> alleleBins : stratifiedContexts.values() ) { - for ( final List reads : alleleBins.values() ) { - for ( final GATKSAMRecord read : reads ) { - if ( read.getMappingQuality() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE ) - qualities[index++] = read.getMappingQuality(); - } - } - } - - final Map map = new HashMap(); - map.put(getKeyNames().get(0), String.format("%.2f", MathUtils.rms(qualities))); - return map; + return outputIdx; } public List getKeyNames() { return Arrays.asList(VCFConstants.RMS_MAPPING_QUALITY_KEY); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java index bf6adcfac..474625fff 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java @@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.MannWhitneyU; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.collections.Pair; @@ -28,12 +29,15 @@ import java.util.Map; * Abstract root for all RankSum based annotations */ public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveRegionBasedAnnotation { - static final double INDEL_LIKELIHOOD_THRESH = 0.1; static final boolean DEBUG = false; - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { - if (stratifiedContexts.size() == 0) - return null; + public Map annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc, + final Map stratifiedPerReadAlleleLikelihoodMap) { + // either stratifiedContexts or stratifiedPerReadAlleleLikelihoodMap has to be non-null final GenotypesContext genotypes = vc.getGenotypes(); if (genotypes == null || genotypes.size() == 0) @@ -42,39 +46,24 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveR final ArrayList refQuals = new ArrayList(); final ArrayList altQuals = new ArrayList(); - if ( vc.isSNP() ) { - final List altAlleles = new ArrayList(); - for ( final Allele a : vc.getAlternateAlleles() ) - altAlleles.add(a.getBases()[0]); - - for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) { + for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) { + PerReadAlleleLikelihoodMap indelLikelihoodMap = null; + ReadBackedPileup pileup = null; + if (stratifiedPerReadAlleleLikelihoodMap != null && !stratifiedPerReadAlleleLikelihoodMap.isEmpty()) { + indelLikelihoodMap = stratifiedPerReadAlleleLikelihoodMap.get(genotype.getSampleName()); + if (indelLikelihoodMap == null) + continue; + if (indelLikelihoodMap.isEmpty()) + continue; + } + else if (stratifiedContexts != null) { final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName()); if ( context == null ) continue; - - fillQualsFromPileup(ref.getBase(), altAlleles, context.getBasePileup(), refQuals, altQuals); + pileup = context.getBasePileup(); } - } else if ( vc.isIndel() || vc.isMixed() ) { - - for (final Genotype genotype : genotypes.iterateInSampleNameOrder()) { - final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName()); - if (context == null) { - continue; - } - - final ReadBackedPileup pileup = context.getBasePileup(); - if (pileup == null) - continue; - - if (IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap() == null || - IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap().size() == 0) - return null; - - fillIndelQualsFromPileup(pileup, refQuals, altQuals); - } - } else - return null; - + fillQualsFromPileup(vc.getAlleles(), vc.getStart(), pileup, indelLikelihoodMap, refQuals, altQuals ); + } final MannWhitneyU mannWhitneyU = new MannWhitneyU(); for (final Double qual : altQuals) { mannWhitneyU.add(qual, MannWhitneyU.USet.SET1); @@ -103,50 +92,12 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveR return map; } - public Map annotate(Map>> stratifiedContexts, VariantContext vc) { - if (stratifiedContexts.size() == 0) - return null; - - final GenotypesContext genotypes = vc.getGenotypes(); - if (genotypes == null || genotypes.size() == 0) - return null; - - final ArrayList refQuals = new ArrayList(); - final ArrayList altQuals = new ArrayList(); - - for ( final Genotype genotype : genotypes.iterateInSampleNameOrder() ) { - final Map> context = stratifiedContexts.get(genotype.getSampleName()); - if ( context == null ) - continue; - - fillQualsFromPileup(vc.getReference(), vc.getAlternateAlleles(), vc.getStart(), context, refQuals, altQuals); - } - - if ( refQuals.size() == 0 || altQuals.size() == 0 ) - return null; - - final MannWhitneyU mannWhitneyU = new MannWhitneyU(); - for (final Double qual : altQuals) { - mannWhitneyU.add(qual, MannWhitneyU.USet.SET1); - } - for (final Double qual : refQuals) { - mannWhitneyU.add(qual, MannWhitneyU.USet.SET2); - } - - // we are testing that set1 (the alt bases) have lower quality scores than set2 (the ref bases) - final Pair testResults = mannWhitneyU.runOneSidedTest(MannWhitneyU.USet.SET1); - - final Map map = new HashMap(); - if (!Double.isNaN(testResults.first)) - map.put(getKeyNames().get(0), String.format("%.3f", testResults.first)); - return map; - } - - protected abstract void fillQualsFromPileup(final Allele ref, final List alts, final int refLoc, final Map> stratifiedContext, final List refQuals, List altQuals); - - protected abstract void fillQualsFromPileup(final byte ref, final List alts, final ReadBackedPileup pileup, final List refQuals, final List altQuals); - - protected abstract void fillIndelQualsFromPileup(final ReadBackedPileup pileup, final List refQuals, final List altQuals); + protected abstract void fillQualsFromPileup(final List alleles, + final int refLoc, + final ReadBackedPileup readBackedPileup, + final PerReadAlleleLikelihoodMap alleleLikelihoodMap, + final List refQuals, + final List altQuals); /** * Can the base in this pileup element be used in comparative tests between ref / alt bases? diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java index 3456041c7..a1b8bcfc8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java @@ -6,6 +6,7 @@ import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; @@ -32,98 +33,55 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio return Arrays.asList(new VCFInfoHeaderLine("ReadPosRankSum", 1, VCFHeaderLineType.Float, "Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias")); } - protected void fillQualsFromPileup(byte ref, List alts, ReadBackedPileup pileup, List refQuals, List altQuals) { - for (final PileupElement p : pileup) { - if (isUsableBase(p)) { - int readPos = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p, 0, 0); - final int numAlignedBases = AlignmentUtils.getNumAlignedBases(p.getRead()); - if (readPos > numAlignedBases / 2) - readPos = numAlignedBases - (readPos + 1); + protected void fillQualsFromPileup(final List allAlleles, + final int refLoc, + final ReadBackedPileup pileup, + final PerReadAlleleLikelihoodMap alleleLikelihoodMap, + final List refQuals, final List altQuals) { + if (alleleLikelihoodMap == null) { + // use fast SNP-based version if we don't have per-read allele likelihoods + for ( final PileupElement p : pileup ) { + if ( isUsableBase(p) ) { + int readPos = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p, 0, 0); - if ( p.getBase() == ref ) - refQuals.add((double) readPos); - else if ( alts.contains(p.getBase()) ) - altQuals.add((double) readPos); - } - } - } + readPos = getFinalReadPosition(p.getRead(),readPos); - protected void fillQualsFromPileup(final Allele ref, final List alts, final int refLoc, final Map> stratifiedContext, final List refQuals, final List altQuals) { - for ( final Map.Entry> alleleBin : stratifiedContext.entrySet() ) { - final boolean matchesRef = ref.equals(alleleBin.getKey()); - final boolean matchesAlt = alts.contains(alleleBin.getKey()); - if ( !matchesRef && !matchesAlt ) - continue; - - for ( final GATKSAMRecord read : alleleBin.getValue() ) { - final int offset = ReadUtils.getReadCoordinateForReferenceCoordinate( read.getSoftStart(), read.getCigar(), refLoc, ReadUtils.ClippingTail.RIGHT_TAIL, true ); - if ( offset == ReadUtils.CLIPPING_GOAL_NOT_REACHED ) - continue; - int readPos = AlignmentUtils.calcAlignmentByteArrayOffset( read.getCigar(), offset, false, false, 0, 0 ); - - final int numAlignedBases = AlignmentUtils.getNumAlignedBasesCountingSoftClips( read ); - if (readPos > numAlignedBases / 2) - readPos = numAlignedBases - (readPos + 1); - - if ( matchesRef ) - refQuals.add((double) readPos); - else - altQuals.add((double) readPos); - } - } - } - - protected void fillIndelQualsFromPileup(ReadBackedPileup pileup, List refQuals, List altQuals) { - // equivalent is whether indel likelihoods for reads corresponding to ref allele are more likely than reads corresponding to alt allele - // to classify a pileup element as ref or alt, we look at the likelihood associated with the allele associated to this element. - // A pileup element then has a list of pairs of form (Allele, likelihood of this allele). - // To classify a pileup element as Ref or Alt, we look at the likelihood of corresponding alleles. - // If likelihood of ref allele > highest likelihood of all alt alleles + epsilon, then this pielup element is "ref" - // otherwise if highest alt allele likelihood is > ref likelihood + epsilon, then this pileup element it "alt" - final HashMap> indelLikelihoodMap = IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(); - for (final PileupElement p : pileup) { - if (indelLikelihoodMap.containsKey(p)) { - LinkedHashMap el = indelLikelihoodMap.get(p); // retrieve likelihood information corresponding to this read - double refLikelihood = 0.0, altLikelihood = Double.NEGATIVE_INFINITY; // by design, first element in LinkedHashMap was ref allele - - for (Map.Entry a : el.entrySet()) { - if (a.getKey().isReference()) - refLikelihood = a.getValue(); - else { - double like = a.getValue(); - if (like >= altLikelihood) - altLikelihood = like; + if ( allAlleles.get(0).equals(Allele.create(p.getBase())) ) { + refQuals.add((double)readPos); + } else if ( allAlleles.contains(Allele.create(p.getBase()))) { + altQuals.add((double)readPos); } } - - int readPos = getOffsetFromClippedReadStart(p.getRead(), p.getOffset()); - final int numAlignedBases = getNumAlignedBases(p.getRead()); - - if (readPos > numAlignedBases / 2) { - readPos = numAlignedBases - (readPos + 1); - } - //if (DEBUG) System.out.format("R:%s start:%d C:%s offset:%d rp:%d readPos:%d alignedB:%d\n",p.getRead().getReadName(),p.getRead().getAlignmentStart(),p.getRead().getCigarString(),p.getOffset(), rp, readPos, numAlignedBases); - - - // if event is beyond span of read just return and don't consider this element. This can happen, for example, with reads - // where soft clipping still left strings of low quality bases but these are later removed by indel-specific clipping. - // if (readPos < -1) - // return; - if (refLikelihood > (altLikelihood + INDEL_LIKELIHOOD_THRESH)) { - refQuals.add((double) readPos); - //if (DEBUG) System.out.format("REF like: %4.1f, pos: %d\n",refLikelihood,readPos); - } else if (altLikelihood > (refLikelihood + INDEL_LIKELIHOOD_THRESH)) { - altQuals.add((double) readPos); - //if (DEBUG) System.out.format("ALT like: %4.1f, pos: %d\n",refLikelihood,readPos); - - } - - } + return; + } + + for (Map.Entry> el : alleleLikelihoodMap.getLikelihoodReadMap().entrySet()) { + int readPos = getOffsetFromClippedReadStart(el.getKey().getRead(), el.getKey().getOffset()); + readPos = getFinalReadPosition(el.getKey().getRead(),readPos); + + final Allele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue()); + if (a.isNoCall()) + continue; // read is non-informative + if (a.isReference()) + refQuals.add((double)readPos); + else if (allAlleles.contains(a)) + altQuals.add((double)readPos); + } } + int getFinalReadPosition(GATKSAMRecord read, int initialReadPosition) { + final int numAlignedBases = getNumAlignedBases(read); + + int readPos = initialReadPosition; + if (initialReadPosition > numAlignedBases / 2) { + readPos = numAlignedBases - (initialReadPosition + 1); + } + return readPos; + + } int getNumClippedBasesAtStart(SAMRecord read) { // compute total number of clipped bases (soft or hard clipped) // check for hard clips (never consider these bases): diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java index 7e4d44cf2..abe55de5a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java @@ -30,6 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; @@ -46,7 +47,12 @@ import java.util.Map; */ public class SampleList extends InfoFieldAnnotation { - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc, + final Map stratifiedPerReadAlleleLikelihoodMap) { if ( vc.isMonomorphicInSamples() || !vc.hasGenotypes() ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java index 4d990e738..f0bd7ecd9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java @@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -225,7 +226,12 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue())); } - public Map annotate ( RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc ) { + public Map annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc, + final Map stratifiedPerReadAlleleLikelihoodMap) { RodBinding snpEffRodBinding = walker.getSnpEffRodBinding(); // Get only SnpEff records that start at this locus, not merely span it: diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java index af2df8e6a..f6bb4e747 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java @@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -22,7 +23,12 @@ import java.util.Map; */ public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation { - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc, + final Map stratifiedPerReadAlleleLikelihoodMap) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java index eced387b3..439402e2f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java @@ -30,6 +30,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; @@ -47,7 +48,12 @@ public class TandemRepeatAnnotator extends InfoFieldAnnotation implements Standa private static final String STR_PRESENT = "STR"; private static final String REPEAT_UNIT_KEY = "RU"; private static final String REPEATS_PER_ALLELE_KEY = "RPA"; - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc, + final Map stratifiedPerReadAlleleLikelihoodMap) { if ( !vc.isIndel()) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java index 63694d809..43ef188a8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java @@ -7,6 +7,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; @@ -28,7 +29,12 @@ public class TechnologyComposition extends InfoFieldAnnotation implements Experi private String n454 ="Num454"; private String nSolid = "NumSOLiD"; private String nOther = "NumOther"; - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc, + final Map stratifiedPerReadAlleleLikelihoodMap) { if ( stratifiedContexts.size() == 0 ) return null; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java index 2e3578dcb..c3e98c20f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java @@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.RodRequiringAnnotation; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; @@ -28,7 +29,12 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen private Set trios = null; private final static int MIN_NUM_VALID_TRIOS = 5; // don't calculate this population-level statistic if there are less than X trios with full genotype likelihood information - public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + public Map annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc, + final Map stratifiedPerReadAlleleLikelihoodMap) { if ( trios == null ) { if ( walker instanceof VariantAnnotator ) { trios = ((VariantAnnotator) walker).getSampleDB().getChildrenWithParents(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 073faf54e..f30fb4109 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -31,6 +31,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -178,7 +179,18 @@ public class VariantAnnotatorEngine { this.requireStrictAlleleMatch = requireStrictAlleleMatch; } - public VariantContext annotateContext(final RefMetaDataTracker tracker, final ReferenceContext ref, final Map stratifiedContexts, VariantContext vc) { + public VariantContext annotateContext(final RefMetaDataTracker tracker, + final ReferenceContext ref, + final Map stratifiedContexts, + VariantContext vc) { + return annotateContext(tracker, ref, stratifiedContexts, vc, null); + } + + public VariantContext annotateContext(final RefMetaDataTracker tracker, + final ReferenceContext ref, + final Map stratifiedContexts, + VariantContext vc, + final Map perReadAlleleLikelihoodMap) { Map infoAnnotations = new LinkedHashMap(vc.getAttributes()); // annotate db occurrences @@ -189,7 +201,7 @@ public class VariantAnnotatorEngine { // go through all the requested info annotationTypes for ( InfoFieldAnnotation annotationType : requestedInfoAnnotations ) { - Map annotationsFromCurrentType = annotationType.annotate(tracker, walker, ref, stratifiedContexts, vc); + Map annotationsFromCurrentType = annotationType.annotate(tracker, walker, ref, stratifiedContexts, vc, perReadAlleleLikelihoodMap); if ( annotationsFromCurrentType != null ) infoAnnotations.putAll(annotationsFromCurrentType); } @@ -201,7 +213,7 @@ public class VariantAnnotatorEngine { return builder.genotypes(annotateGenotypes(tracker, ref, stratifiedContexts, vc)).make(); } - public VariantContext annotateContext(final Map>> stratifiedContexts, VariantContext vc) { + public VariantContext annotateContext(final Map stratifiedContexts, VariantContext vc) { Map infoAnnotations = new LinkedHashMap(vc.getAttributes()); // go through all the requested info annotationTypes diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/ActiveRegionBasedAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/ActiveRegionBasedAnnotation.java index de61c7741..7af4baddb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/ActiveRegionBasedAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/ActiveRegionBasedAnnotation.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.annotator.interfaces; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.variantcontext.Allele; @@ -10,8 +11,8 @@ import java.util.Map; // TODO -- make this an abstract class when we move away from InfoFieldAnnotation public interface ActiveRegionBasedAnnotation extends AnnotationType { - // return annotations for the given contexts split by sample and then allele - public abstract Map annotate(final Map>> stratifiedContexts, final VariantContext vc); + // return annotations for the given contexts split by sample and then read likelihoof + public abstract Map annotate(final Map stratifiedContexts, final VariantContext vc); // return the descriptions used for the VCF INFO meta field public abstract List getDescriptions(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java index 1569a605f..738be9883 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator.interfaces; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -11,8 +12,25 @@ import java.util.Map; public abstract class InfoFieldAnnotation extends VariantAnnotatorAnnotation { // return annotations for the given contexts split by sample - public abstract Map annotate(RefMetaDataTracker tracker, AnnotatorCompatible walker, - ReferenceContext ref, Map stratifiedContexts, VariantContext vc); + public Map annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc) { + return annotate(tracker, walker, ref, stratifiedContexts, vc, null); + } + + public Map annotate(Map perReadAlleleLikelihoodMap, VariantContext vc) { + return annotate(null, null, null, null, vc, perReadAlleleLikelihoodMap); + } + + + public abstract Map annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc, + final Map stratifiedPerReadAlleleLikelihoodMap); // return the descriptions used for the VCF INFO meta field public abstract List getDescriptions(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java index 6fdc926d5..77da35c41 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java @@ -103,7 +103,8 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable { final AlignmentContextUtils.ReadOrientation contextType, final List allAllelesToUse, final boolean useBAQedPileup, - final GenomeLocParser locParser); + final GenomeLocParser locParser, + final Map perReadAlleleLikelihoodMap); protected int getFilteredDepth(ReadBackedPileup pileup) { @@ -115,4 +116,5 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable { return count; } + } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index bedffa690..ebfbc49fe 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -48,24 +48,11 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood private boolean ignoreSNPAllelesWhenGenotypingIndels = false; private PairHMMIndelErrorModel pairModel; - private static ThreadLocal>> indelLikelihoodMap = - new ThreadLocal>>() { - protected synchronized HashMap> initialValue() { - return new HashMap>(); - } - }; private LinkedHashMap haplotypeMap; - // gdebug removeme - // todo -cleanup - private GenomeLoc lastSiteVisited; private List alleleList = new ArrayList(); - static { - indelLikelihoodMap.set(new HashMap>()); - } - protected IndelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) { super(UAC, logger); @@ -93,16 +80,15 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood final AlignmentContextUtils.ReadOrientation contextType, final List allAllelesToUse, final boolean useBAQedPileup, - final GenomeLocParser locParser) { + final GenomeLocParser locParser, + final Map perReadAlleleLikelihoodMap) { GenomeLoc loc = ref.getLocus(); // if (!ref.getLocus().equals(lastSiteVisited)) { if (contextType == AlignmentContextUtils.ReadOrientation.COMPLETE) { // starting a new site: clear allele list - lastSiteVisited = ref.getLocus(); - indelLikelihoodMap.set(new HashMap>()); haplotypeMap.clear(); - + perReadAlleleLikelihoodMap.clear(); // clean mapping sample-> per read, per allele likelihoods alleleList = getInitialAlleleList(tracker, ref, contexts, contextType, locParser, UAC, ignoreSNPAllelesWhenGenotypingIndels); if (alleleList.isEmpty()) return null; @@ -130,10 +116,14 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood for (Map.Entry sample : contexts.entrySet()) { AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType); + if (!perReadAlleleLikelihoodMap.containsKey(sample.getKey())){ + // no likelihoods have been computed for this sample at this site + perReadAlleleLikelihoodMap.put(sample.getKey(), new PerReadAlleleLikelihoodMap()); + } final ReadBackedPileup pileup = context.getBasePileup(); if (pileup != null) { final GenotypeBuilder b = new GenotypeBuilder(sample.getKey()); - final double[] genotypeLikelihoods = pairModel.computeDiploidReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap()); + final double[] genotypeLikelihoods = pairModel.computeDiploidReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, perReadAlleleLikelihoodMap.get(sample.getKey())); b.PL(genotypeLikelihoods); b.DP(getFilteredDepth(pileup)); genotypes.add(b.make()); @@ -150,10 +140,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood return builder.genotypes(genotypes).make(); } - public static HashMap> getIndelLikelihoodMap() { - return indelLikelihoodMap.get(); - } - public static void getHaplotypeMapFromAlleles(final List alleleList, final ReferenceContext ref, final GenomeLoc loc, diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PerReadAlleleLikelihoodMap.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PerReadAlleleLikelihoodMap.java new file mode 100644 index 000000000..a704afba9 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PerReadAlleleLikelihoodMap.java @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2011 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +package org.broadinstitute.sting.gatk.walkers.genotyper; + + +//import org.broadinstitute.sting.gatk.walkers.Requires; +import com.google.java.contract.Requires; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.variantcontext.Allele; + +import java.util.*; + +public class PerReadAlleleLikelihoodMap { + public static final double INDEL_LIKELIHOOD_THRESH = 0.1; + + private List alleles; + private Map> likelihoodReadMap; + public PerReadAlleleLikelihoodMap() { + likelihoodReadMap = new LinkedHashMap>(); + alleles = new ArrayList(); + } + + public void add(PileupElement p, Allele a, Double likelihood) { + Map likelihoodMap; + if (likelihoodReadMap.containsKey(p)){ + // seen pileup element before + likelihoodMap = likelihoodReadMap.get(p); + } + else { + likelihoodMap = new HashMap(); + likelihoodReadMap.put(p,likelihoodMap); + } + likelihoodMap.put(a,likelihood); + + if (!alleles.contains(a)) + alleles.add(a); + + } + + public int size() { + return likelihoodReadMap.size(); + } + + public void add(GATKSAMRecord read, Allele a, Double likelihood) { + PileupElement p = new PileupElement(read,-1,false,false,false,false,false,false); + add(p,a,likelihood); + } + + public boolean containsPileupElement(PileupElement p) { + return likelihoodReadMap.containsKey(p); + } + + public boolean isEmpty() { + return likelihoodReadMap.isEmpty(); + } + + public Map> getLikelihoodReadMap() { + return likelihoodReadMap; + } + public void clear() { + alleles.clear(); + likelihoodReadMap.clear(); + } + + public Set getStoredPileupElements() { + return likelihoodReadMap.keySet(); + } + /** + * Returns list of reads greedily associated with a particular allele. + * Needs to loop for each read, and assign to each allele + * @param a Desired allele + * @return + */ + @Requires("a!=null") + public List getReadsAssociatedWithAllele(Allele a) { + return null; + } + + public Map getLikelihoodsAssociatedWithPileupElement(PileupElement p) { + if (!likelihoodReadMap.containsKey(p)) + return null; + + return likelihoodReadMap.get(p); + } + + public static Allele getMostLikelyAllele(Map alleleMap) { + double minLike = Double.POSITIVE_INFINITY, maxLike = Double.NEGATIVE_INFINITY; + Allele mostLikelyAllele = Allele.NO_CALL; + + for (Map.Entry el : alleleMap.entrySet()) { + if (el.getValue() > maxLike) { + maxLike = el.getValue(); + mostLikelyAllele = el.getKey(); + } + + if (el.getValue() < minLike) + minLike = el.getValue(); + + } + if (maxLike-minLike > INDEL_LIKELIHOOD_THRESH) + return mostLikelyAllele; + else + return Allele.NO_CALL; + } + } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index 07d5d2f2d..76ba72017 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -62,7 +62,10 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC final AlignmentContextUtils.ReadOrientation contextType, final List allAllelesToUse, final boolean useBAQedPileup, - final GenomeLocParser locParser) { + final GenomeLocParser locParser, + final Map perReadAlleleLikelihoodMap) { + + perReadAlleleLikelihoodMap.clear(); // not used in SNP model, sanity check to delete any older data final byte refBase = ref.getBase(); final int indexOfRefBase = BaseUtils.simpleBaseToBaseIndex(refBase); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index f15fa9b99..0b218865c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -177,19 +177,23 @@ public class UnifiedGenotyperEngine { final List results = new ArrayList(2); final List models = getGLModelsToUse(tracker, refContext, rawContext); + + final Map perReadAlleleLikelihoodMap = new HashMap(); + if ( models.isEmpty() ) { results.add(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? generateEmptyContext(tracker, refContext, null, rawContext) : null); } else { for ( final GenotypeLikelihoodsCalculationModel.Model model : models ) { + perReadAlleleLikelihoodMap.clear(); final Map stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model); if ( stratifiedContexts == null ) { results.add(UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext) : null); } else { - final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model); + final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model, perReadAlleleLikelihoodMap); if ( vc != null ) - results.add(calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, true)); + results.add(calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, true, perReadAlleleLikelihoodMap)); } } } @@ -219,9 +223,13 @@ public class UnifiedGenotyperEngine { * @param tracker the meta data tracker * @param refContext the reference base * @param rawContext contextual information around the locus + * @param perReadAlleleLikelihoodMap Map to store per-sample, per-read, per-allele likelihoods (only used for indels) * @return the VariantContext object */ - public VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) { + public VariantContext calculateLikelihoods(final RefMetaDataTracker tracker, + final ReferenceContext refContext, + final AlignmentContext rawContext, + final Map perReadAlleleLikelihoodMap) { final List models = getGLModelsToUse(tracker, refContext, rawContext); if ( models.isEmpty() ) { return null; @@ -231,7 +239,7 @@ public class UnifiedGenotyperEngine { final Map stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model); // return the first valid one we encounter if ( stratifiedContexts != null ) - return calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model); + return calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model, perReadAlleleLikelihoodMap); } @@ -247,7 +255,11 @@ public class UnifiedGenotyperEngine { * @param vc the GL-annotated variant context * @return the VariantCallContext object */ - public VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, VariantContext vc) { + public VariantCallContext calculateGenotypes(final RefMetaDataTracker tracker, + final ReferenceContext refContext, + final AlignmentContext rawContext, + final VariantContext vc, + final Map perReadAlleleLikelihoodMap) { final List models = getGLModelsToUse(tracker, refContext, rawContext); if ( models.isEmpty() ) { return null; @@ -256,25 +268,37 @@ public class UnifiedGenotyperEngine { // return the first one final GenotypeLikelihoodsCalculationModel.Model model = models.get(0); final Map stratifiedContexts = getFilteredAndStratifiedContexts(UAC, refContext, rawContext, model); - return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model); + return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, perReadAlleleLikelihoodMap); } - - // --------------------------------------------------------------------------------------------------------- + public VariantCallContext calculateGenotypes(final RefMetaDataTracker tracker, + final ReferenceContext refContext, + final AlignmentContext rawContext, + final VariantContext vc) { + return calculateGenotypes(tracker, refContext, rawContext, vc, null); + } + // --------------------------------------------------------------------------------------------------------- // // Private implementation helpers // // --------------------------------------------------------------------------------------------------------- // private method called by both UnifiedGenotyper and UGCalcLikelihoods entry points into the engine - private VariantContext calculateLikelihoods(RefMetaDataTracker tracker, ReferenceContext refContext, Map stratifiedContexts, AlignmentContextUtils.ReadOrientation type, List alternateAllelesToUse, boolean useBAQedPileup, final GenotypeLikelihoodsCalculationModel.Model model) { + private VariantContext calculateLikelihoods(final RefMetaDataTracker tracker, + final ReferenceContext refContext, + final Map stratifiedContexts, + final AlignmentContextUtils.ReadOrientation type, + final List alternateAllelesToUse, + final boolean useBAQedPileup, + final GenotypeLikelihoodsCalculationModel.Model model, + final Map perReadAlleleLikelihoodMap) { // initialize the data for this thread if that hasn't been done yet if ( glcm.get() == null ) { glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC)); } - return glcm.get().get(model.name().toUpperCase()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser); + return glcm.get().get(model.name().toUpperCase()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser, perReadAlleleLikelihoodMap); } private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, AlignmentContext rawContext) { @@ -305,12 +329,22 @@ public class UnifiedGenotyperEngine { return new VariantCallContext(vc, false); } - public VariantCallContext calculateGenotypes(VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) { - return calculateGenotypes(null, null, null, null, vc, model); + public VariantCallContext calculateGenotypes(final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model, final Map perReadAlleleLikelihoodMap) { + return calculateGenotypes(null, null, null, null, vc, model, perReadAlleleLikelihoodMap); } - public VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, Map stratifiedContexts, VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) { - return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, false); + public VariantCallContext calculateGenotypes(final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) { + return calculateGenotypes(null, null, null, null, vc, model, null); + } + + public VariantCallContext calculateGenotypes(final RefMetaDataTracker tracker, + final ReferenceContext refContext, + final AlignmentContext rawContext, + final Map stratifiedContexts, + final VariantContext vc, + final GenotypeLikelihoodsCalculationModel.Model model, + final Map perReadAlleleLikelihoodMap) { + return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, false,perReadAlleleLikelihoodMap); } /** @@ -324,8 +358,11 @@ public class UnifiedGenotyperEngine { * @param inheritAttributesFromInputVC Output VC will contain attributes inherited from input vc * @return VC with assigned genotypes */ - public VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, Map stratifiedContexts, VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model, - final boolean inheritAttributesFromInputVC) { + public VariantCallContext calculateGenotypes(final RefMetaDataTracker tracker, final ReferenceContext refContext, + final AlignmentContext rawContext, Map stratifiedContexts, + final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model, + final boolean inheritAttributesFromInputVC, + final Map perReadAlleleLikelihoodMap) { boolean limitedContext = tracker == null || refContext == null || rawContext == null || stratifiedContexts == null; @@ -451,7 +488,7 @@ public class UnifiedGenotyperEngine { List allAllelesToUse = builder.make().getAlleles(); // the forward lod - VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, allAllelesToUse, false, model); + VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, allAllelesToUse, false, model, perReadAlleleLikelihoodMap); AFresult.reset(); afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model), AFresult); //double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true); @@ -460,7 +497,7 @@ public class UnifiedGenotyperEngine { //if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF); // the reverse lod - VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, allAllelesToUse, false, model); + VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, allAllelesToUse, false, model, perReadAlleleLikelihoodMap); AFresult.reset(); afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model), AFresult); //normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true); @@ -496,7 +533,7 @@ public class UnifiedGenotyperEngine { final ReadBackedPileup pileup = rawContext.getBasePileup(); stratifiedContexts = AlignmentContextUtils.splitContextBySampleName(pileup); - vcCall = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vcCall); + vcCall = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vcCall, perReadAlleleLikelihoodMap); } return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 65c5a2fbc..9234a9fe8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.indels; import com.google.java.contract.Ensures; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.walkers.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.PairHMM; @@ -40,6 +41,7 @@ import org.broadinstitute.sting.utils.variantcontext.Allele; import java.util.Arrays; import java.util.HashMap; import java.util.LinkedHashMap; +import java.util.Map; public class PairHMMIndelErrorModel { @@ -167,11 +169,15 @@ public class PairHMMIndelErrorModel { } - public synchronized double[] computeDiploidReadHaplotypeLikelihoods(ReadBackedPileup pileup, LinkedHashMap haplotypeMap, ReferenceContext ref, int eventLength, HashMap> indelLikelihoodMap){ + public synchronized double[] computeDiploidReadHaplotypeLikelihoods(final ReadBackedPileup pileup, + final LinkedHashMap haplotypeMap, + final ReferenceContext ref, + final int eventLength, + final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap){ final int numHaplotypes = haplotypeMap.size(); final int readCounts[] = new int[pileup.getNumberOfElements()]; - final double[][] readLikelihoods = computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, indelLikelihoodMap, readCounts); + final double[][] readLikelihoods = computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, ref, eventLength, perReadAlleleLikelihoodMap, readCounts); return getDiploidHaplotypeLikelihoods(numHaplotypes, readCounts, readLikelihoods); } @@ -181,7 +187,7 @@ public class PairHMMIndelErrorModel { final LinkedHashMap haplotypeMap, final ReferenceContext ref, final int eventLength, - final HashMap> indelLikelihoodMap, + final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap, final int[] readCounts) { final double readLikelihoods[][] = new double[pileup.getNumberOfElements()][haplotypeMap.size()]; final PairHMM pairHMM = new PairHMM(bandedLikelihoods); @@ -192,8 +198,8 @@ public class PairHMMIndelErrorModel { readCounts[readIdx] = p.getRepresentativeCount(); // check if we've already computed likelihoods for this pileup element (i.e. for this read at this location) - if (indelLikelihoodMap.containsKey(p)) { - HashMap el = indelLikelihoodMap.get(p); + if (perReadAlleleLikelihoodMap.containsPileupElement(p)) { + Map el = perReadAlleleLikelihoodMap.getLikelihoodsAssociatedWithPileupElement(p); int j=0; for (Allele a: haplotypeMap.keySet()) { readLikelihoods[readIdx][j++] = el.get(a); @@ -201,7 +207,7 @@ public class PairHMMIndelErrorModel { } else { final int refWindowStart = ref.getWindow().getStart(); - final int refWindowStop = ref.getWindow().getStop(); + final int refWindowStop = ref.getWindow().getStop(); if (DEBUG) { System.out.format("Read Name:%s, aln start:%d aln stop:%d orig cigar:%s\n",p.getRead().getReadName(), p.getRead().getAlignmentStart(), p.getRead().getAlignmentEnd(), p.getRead().getCigarString()); @@ -280,7 +286,7 @@ public class PairHMMIndelErrorModel { System.out.format("numStartSoftClippedBases: %d numEndSoftClippedBases: %d WinStart:%d WinStop:%d start: %d stop: %d readLength: %d\n", numStartSoftClippedBases, numEndSoftClippedBases, ref.getWindow().getStart(), ref.getWindow().getStop(), startLocationInRefForHaplotypes, stopLocationInRefForHaplotypes, read.getReadLength()); - LinkedHashMap readEl = new LinkedHashMap(); + // LinkedHashMap readEl = new LinkedHashMap(); /** * Check if we'll end up with an empty read once all clipping is done @@ -288,7 +294,7 @@ public class PairHMMIndelErrorModel { if (numStartSoftClippedBases + numEndSoftClippedBases >= unclippedReadBases.length) { int j=0; for (Allele a: haplotypeMap.keySet()) { - readEl.put(a,0.0); + perReadAlleleLikelihoodMap.add(p,a,0.0); readLikelihoods[readIdx][j++] = 0.0; } } @@ -329,45 +335,45 @@ public class PairHMMIndelErrorModel { - final byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBases(), - (int)indStart, (int)indStop); + final byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBases(), + (int)indStart, (int)indStop); - final int X_METRIC_LENGTH = readBases.length+2; - final int Y_METRIC_LENGTH = haplotypeBases.length+2; + final int X_METRIC_LENGTH = readBases.length+2; + final int Y_METRIC_LENGTH = haplotypeBases.length+2; - if (matchMetricArray == null) { - //no need to reallocate arrays for each new haplotype, as length won't change - matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH]; - XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH]; - YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH]; + if (matchMetricArray == null) { + //no need to reallocate arrays for each new haplotype, as length won't change + matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH]; + XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH]; + YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH]; - PairHMM.initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH); - } + PairHMM.initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH); + } - int startIndexInHaplotype = 0; - if (previousHaplotypeSeen != null) - startIndexInHaplotype = computeFirstDifferingPosition(haplotypeBases, previousHaplotypeSeen); - previousHaplotypeSeen = haplotypeBases.clone(); + int startIndexInHaplotype = 0; + if (previousHaplotypeSeen != null) + startIndexInHaplotype = computeFirstDifferingPosition(haplotypeBases, previousHaplotypeSeen); + previousHaplotypeSeen = haplotypeBases.clone(); - readLikelihood = pairHMM.computeReadLikelihoodGivenHaplotype(haplotypeBases, readBases, readQuals, - (read.hasBaseIndelQualities() ? read.getBaseInsertionQualities() : contextLogGapOpenProbabilities), - (read.hasBaseIndelQualities() ? read.getBaseDeletionQualities() : contextLogGapOpenProbabilities), - contextLogGapContinuationProbabilities, - startIndexInHaplotype, matchMetricArray, XMetricArray, YMetricArray); + readLikelihood = pairHMM.computeReadLikelihoodGivenHaplotype(haplotypeBases, readBases, readQuals, + (read.hasBaseIndelQualities() ? read.getBaseInsertionQualities() : contextLogGapOpenProbabilities), + (read.hasBaseIndelQualities() ? read.getBaseDeletionQualities() : contextLogGapOpenProbabilities), + contextLogGapContinuationProbabilities, + startIndexInHaplotype, matchMetricArray, XMetricArray, YMetricArray); - if (DEBUG) { - System.out.println("H:"+new String(haplotypeBases)); - System.out.println("R:"+new String(readBases)); - System.out.format("L:%4.2f\n",readLikelihood); - System.out.format("StPos:%d\n", startIndexInHaplotype); - } - readEl.put(a,readLikelihood); + if (DEBUG) { + System.out.println("H:"+new String(haplotypeBases)); + System.out.println("R:"+new String(readBases)); + System.out.format("L:%4.2f\n",readLikelihood); + System.out.format("StPos:%d\n", startIndexInHaplotype); + } + + perReadAlleleLikelihoodMap.add(p, a, readLikelihood); readLikelihoods[readIdx][j++] = readLikelihood; } } - indelLikelihoodMap.put(p,readEl); } readIdx++; } diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index 96704f0b8..8fc5105e5 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -625,6 +625,10 @@ public class MathUtils { return maxElementIndex(array, array.length); } + public static int maxElementIndex(final byte[] array) { + return maxElementIndex(array, array.length); + } + public static int maxElementIndex(final int[] array, int endIndex) { if (array == null || array.length == 0) throw new IllegalArgumentException("Array cannot be null!"); @@ -638,6 +642,24 @@ public class MathUtils { return maxI; } + public static int maxElementIndex(final byte[] array, int endIndex) { + if (array == null || array.length == 0) + throw new IllegalArgumentException("Array cannot be null!"); + + int maxI = 0; + for (int i = 1; i < endIndex; i++) { + if (array[i] > array[maxI]) + maxI = i; + } + + return maxI; + } + + public static byte arrayMax(final byte[] array) { + return array[maxElementIndex(array)]; + } + + public static double arrayMax(final double[] array) { return array[maxElementIndex(array)]; }