diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java index f91e535b0..864414de9 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ErrorModel.java @@ -1,6 +1,10 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import com.google.java.contract.Requires; +import org.apache.commons.lang.ArrayUtils; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; +import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -9,6 +13,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; +import java.util.LinkedHashMap; /** * Created by IntelliJ IDEA. @@ -30,24 +35,26 @@ public class ErrorModel { private static final boolean compressRange = false; private static final double log10MinusE = Math.log10(Math.exp(1.0)); - + private static final boolean DEBUG = false; /** * Calculates the probability of the data (reference sample reads) given the phred scaled site quality score. * - * @param minQualityScore Minimum site quality score to evaluate - * @param maxQualityScore Maximum site quality score to evaluate - * @param phredScaledPrior Prior for site quality + * @param UAC Argument Collection * @param refSamplePileup Reference sample pileup * @param refSampleVC VC with True alleles in reference sample pileup - * @param minPower Minimum power */ - public ErrorModel (byte minQualityScore, byte maxQualityScore, byte phredScaledPrior, - ReadBackedPileup refSamplePileup, VariantContext refSampleVC, double minPower) { - this.maxQualityScore = maxQualityScore; - this.minQualityScore = minQualityScore; - this.phredScaledPrior = phredScaledPrior; - log10minPower = Math.log10(minPower); + public ErrorModel (final UnifiedArgumentCollection UAC, + final ReadBackedPileup refSamplePileup, + VariantContext refSampleVC, final ReferenceContext refContext) { + this.maxQualityScore = UAC.maxQualityScore; + this.minQualityScore = UAC.minQualityScore; + this.phredScaledPrior = UAC.phredScaledPrior; + log10minPower = Math.log10(UAC.minPower); + PairHMMIndelErrorModel pairModel = null; + LinkedHashMap haplotypeMap = null; + HashMap> indelLikelihoodMap = null; + double[][] perReadLikelihoods = null; double[] model = new double[maxQualityScore+1]; Arrays.fill(model,Double.NEGATIVE_INFINITY); @@ -61,11 +68,17 @@ public class ErrorModel { break; } } - - + if (refSampleVC.isIndel()) { + pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY, + UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION); + haplotypeMap = new LinkedHashMap(); + indelLikelihoodMap = new HashMap>(); + IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles(refSampleVC.getAlleles(), refContext, refContext.getLocus(), haplotypeMap); // will update haplotypeMap adding elements + } } + + double p = MathUtils.phredScaleToLog10Probability((byte)(maxQualityScore-minQualityScore)); if (refSamplePileup == null || refSampleVC == null || !hasCalledAlleles) { - double p = MathUtils.phredScaleToLog10Probability((byte)(maxQualityScore-minQualityScore)); for (byte q=minQualityScore; q<=maxQualityScore; q++) { // maximum uncertainty if there's no ref data at site model[q] = p; @@ -75,23 +88,47 @@ public class ErrorModel { else { hasData = true; int matches = 0; - int coverage = refSamplePileup.getNumberOfElements(); + int coverage = 0; Allele refAllele = refSampleVC.getReference(); + if (refSampleVC.isIndel()) { + final int readCounts[] = new int[refSamplePileup.getNumberOfElements()]; + //perReadLikelihoods = new double[readCounts.length][refSampleVC.getAlleles().size()]; + final int eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(refSampleVC.getAlleles()); + perReadLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(refSamplePileup,haplotypeMap,refContext, eventLength, indelLikelihoodMap, readCounts); + } + int idx = 0; for (PileupElement refPileupElement : refSamplePileup) { + if (DEBUG) + System.out.println(refPileupElement.toString()); boolean isMatch = false; - for (Allele allele : refSampleVC.getAlleles()) - isMatch |= pileupElementMatches(refPileupElement, allele, refAllele); + for (Allele allele : refSampleVC.getAlleles()) { + boolean m = pileupElementMatches(refPileupElement, allele, refAllele, refContext.getBase()); + if (DEBUG) System.out.println(m); + isMatch |= m; + } + if (refSampleVC.isIndel()) { + // ignore match/mismatch if reads, as determined by their likelihood, are not informative + double[] perAlleleLikelihoods = perReadLikelihoods[idx++]; + if (!isInformativeElement(perAlleleLikelihoods)) + matches++; + else + matches += (isMatch?1:0); - matches += (isMatch?1:0); - // System.out.format("MATCH:%b\n",isMatch); + } else { + matches += (isMatch?1:0); + } + coverage++; } int mismatches = coverage - matches; //System.out.format("Cov:%d match:%d mismatch:%d\n",coverage, matches, mismatches); for (byte q=minQualityScore; q<=maxQualityScore; q++) { - model[q] = log10PoissonProbabilitySiteGivenQual(q,coverage, mismatches); + if (coverage==0) + model[q] = p; + else + model[q] = log10PoissonProbabilitySiteGivenQual(q,coverage, mismatches); } this.refDepth = coverage; } @@ -101,6 +138,17 @@ public class ErrorModel { } + @Requires("likelihoods.length>0") + private boolean isInformativeElement(double[] likelihoods) { + // if likelihoods are the same, they're not informative + final double thresh = 0.1; + int maxIdx = MathUtils.maxElementIndex(likelihoods); + int minIdx = MathUtils.minElementIndex(likelihoods); + if (likelihoods[maxIdx]-likelihoods[minIdx]< thresh) + return false; + else + return true; + } /** * Simple constructor that just takes a given log-probability vector as error model. * Only intended for unit testing, not general usage. @@ -115,23 +163,27 @@ public class ErrorModel { } - public static boolean pileupElementMatches(PileupElement pileupElement, Allele allele, Allele refAllele) { - /* System.out.format("PE: base:%s isNextToDel:%b isNextToIns:%b eventBases:%s eventLength:%d Allele:%s RefAllele:%s\n", + public static boolean pileupElementMatches(PileupElement pileupElement, Allele allele, Allele refAllele, byte refBase) { + if (DEBUG) + System.out.format("PE: base:%s isNextToDel:%b isNextToIns:%b eventBases:%s eventLength:%d Allele:%s RefAllele:%s\n", pileupElement.getBase(), pileupElement.isBeforeDeletionStart(), pileupElement.isBeforeInsertion(),pileupElement.getEventBases(),pileupElement.getEventLength(), allele.toString(), refAllele.toString()); - */ + //pileupElement. // if test allele is ref, any base mismatch, or any insertion/deletion at start of pileup count as mismatch if (allele.isReference()) { // for a ref allele, any base mismatch or new indel is a mismatch. - if(allele.getBases().length>0 && allele.getBases().length == refAllele.getBases().length ) // SNP/MNP case - return (/*!pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart() &&*/ pileupElement.getBase() == allele.getBases()[0]); + if(allele.getBases().length>0 ) + // todo - can't check vs. allele because allele is not padded so it doesn't include the reference base at this location + // could clean up/simplify this when unpadding is removed + return (pileupElement.getBase() == refBase); else // either null allele to compare, or ref/alt lengths are different (indel by definition). // if we have an indel that we are comparing against a REF allele, any indel presence (of any length/content) is a mismatch return (!pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart()); } + // for non-ref alleles to compare: if (refAllele.getBases().length == allele.getBases().length) // alleles have the same length (eg snp or mnp) return pileupElement.getBase() == allele.getBases()[0]; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsCalculationModel.java index 3e0bdd2ea..685091678 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsCalculationModel.java @@ -246,7 +246,8 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi // find the alternate allele(s) that we should be using final List alleles = getFinalAllelesToUse(tracker, ref, allAllelesToUse, GLs); - + if (alleles == null || alleles.isEmpty()) + return null; // start making the VariantContext final GenomeLoc loc = ref.getLocus(); final int endLoc = getEndLocation(tracker, ref, alleles); @@ -312,7 +313,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi refLanePileup = refPileup.getPileupForLane(laneID); //ReferenceSample referenceSample = new ReferenceSample(UAC.referenceSampleName, refLanePileup, trueReferenceAlleles); - perLaneErrorModels.put(laneID, new ErrorModel(UAC.minQualityScore, UAC.maxQualityScore, UAC.phredScaledPrior, refLanePileup, refVC, UAC.minPower)); + perLaneErrorModels.put(laneID, new ErrorModel(UAC, refLanePileup, refVC, ref)); } return perLaneErrorModels; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoods.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoods.java index dae7bd43d..33b7b8b90 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoods.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoods.java @@ -25,6 +25,8 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods { final int eventLength; double[][] readHaplotypeLikelihoods; + final byte refBase; + public PoolIndelGenotypeLikelihoods(final List alleles, final double[] logLikelihoods, final int ploidy, @@ -38,6 +40,8 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods { this.haplotypeMap = haplotypeMap; this.refContext = referenceContext; this.eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(alleles); + // todo - not needed if indel alleles have base at current position + this.refBase = referenceContext.getBase(); } // ------------------------------------------------------------------------------------- @@ -141,7 +145,7 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods { final int numHaplotypes = haplotypeMap.size(); final int readCounts[] = new int[pileup.getNumberOfElements()]; - readHaplotypeLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, refContext, eventLength, PoolIndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(), readCounts); + readHaplotypeLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, refContext, eventLength, IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(), readCounts); n = readHaplotypeLikelihoods.length; } else { Allele refAllele = null; @@ -161,7 +165,7 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods { int idx =0; for (Allele allele : alleles) { int cnt = numSeenBases.get(idx); - numSeenBases.set(idx++,cnt + (ErrorModel.pileupElementMatches(elt, allele, refAllele)?1:0)); + numSeenBases.set(idx++,cnt + (ErrorModel.pileupElementMatches(elt, allele, refAllele, refBase)?1:0)); } n++; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoodsCalculationModel.java index c2bac4455..a15c9d7da 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PoolIndelGenotypeLikelihoodsCalculationModel.java @@ -70,12 +70,6 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi } - public static HashMap> getIndelLikelihoodMap() { - return IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(); - } - - - protected PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List alleles, final double[] logLikelihoods, final int ploidy, diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java index e33c8c6f0..cf6d1cd77 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java @@ -50,20 +50,20 @@ public class BQSRIntegrationTest extends WalkerTest { String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam"; String HiSeqInterval = "chr1:10,000,000-10,100,000"; return new Object[][]{ - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "087dbc3e3f0cee6b891aecad2d0faf25")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "8a69591f728b3a2cdd79ff26bbebcc26")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "73d649bce0b69f56452de8c7e0a8686d")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "d9512cebf54ea120539059976b33d1ca")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "f61a8df03aae8c4273acf2b72497f154")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "7c2ce84e521d8f19fe5061b4e40317f7")}, - {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "66a0caad65ab41d9013e812617e67370")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "6f5e9836147b488a7a204cffa5ecd21e")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "444fdfca7835e6a3714445f7e60abcaf")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "e0bfaf38f45142d45c8fe0ae05d0d4e0")}, - {new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "5b30760bab51b4d1fc02097d4eacefa4")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "742fd8edfa36ab9023ceeaac40c4e215")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "6f5e9836147b488a7a204cffa5ecd21e")}, - {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "22dd42897cf20852712c6e8f63195443")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "239ce3387b4540faf44ec000d844ccd1")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "d69127341938910c38166dd18449598d")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "b77e621bed1b0dc57970399a35efd0da")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "2697f38d467a7856c40abce0f778456a")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "a55018b1643ca3964dbb50783db9f3e4")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "54fe8d1f5573845e6a2aa9688f6dd950")}, + {new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "6b518ad3c56d66c6f5ea812d058f5c4d")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "3ddb9730f00ee3a612b42209ed9f7e03")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "4cd4fb754e1ef142ad691cb35c74dc4c")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "364eab693e5e4c7d18a77726b6460f3f")}, + {new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "c449cfca61d605b534f0dce35581339d")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "5268cb5a4b69335568751d5e5ab80d43")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "3ddb9730f00ee3a612b42209ed9f7e03")}, + {new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "4a786ba42e38e7fd101947c34a6883ed")}, }; } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolCallerIntegrationTest.java index 903465538..c6b1a8b7f 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolCallerIntegrationTest.java @@ -37,6 +37,13 @@ public class PoolCallerIntegrationTest extends WalkerTest { executeTest("testPoolCaller:"+name+" args=" + args, spec); } + private void PC_LSV_Test_NoRef(String args, String name, String model, String md5) { + final String base = String.format("-T UnifiedGenotyper -R %s -I %s -L %s -glm %s -ignoreLane -pnrm POOL", + REF, LSV_BAM, LSVINTERVALS, model) + " --no_cmdline_in_header -o %s"; + final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); + executeTest("testPoolCaller:"+name+" args=" + args, spec); + } + @Test public void testBOTH_GGA_Pools() { PC_LSV_Test(String.format(" -maxAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","POOLBOTH","36b8db57f65be1cc3d2d9d7f9f3f26e4"); @@ -44,7 +51,17 @@ public class PoolCallerIntegrationTest extends WalkerTest { @Test public void testINDEL_GGA_Pools() { - PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","POOLINDEL","d1339990291648495bfcf4404f051478"); + PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","POOLINDEL","d1339990291648495bfcf4404f051478"); + } + + @Test + public void testINDEL_maxAlleles2_ploidy3_Pools_noRef() { + PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","POOLINDEL","b66e7150603310fd57ee7bf9fc590706"); + } + + @Test + public void testINDEL_maxAlleles2_ploidy1_Pools_noRef() { + PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","POOLINDEL","ccdae3fc4d2c922f956a186aaad51c29"); } @Test diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java index a7dd65bb2..9aab24998 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/PoolGenotypeLikelihoodsUnitTest.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import net.sf.samtools.SAMUtils; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.MathUtils; @@ -49,6 +50,13 @@ public class PoolGenotypeLikelihoodsUnitTest { private static final boolean SIMULATE_NOISY_PILEUP = false; private static final int NUM_SIMULATED_OBS = 10; + void PoolGenotypeLikelihoodsUnitTest() { + UAC.minQualityScore = 5; + UAC.maxQualityScore = 40; + UAC.phredScaledPrior = (byte)20; + UAC.minPower = 0.0; + + } @Test public void testStoringLikelihoodElements() { @@ -251,8 +259,6 @@ public class PoolGenotypeLikelihoodsUnitTest { @Test public void testErrorModel() { final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref"); - final byte minQ = 5; - final byte maxQ = 40; final byte refByte = refPileupTestProvider.getRefByte(); final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T'; final String refSampleName = refPileupTestProvider.getSampleNames().get(0); @@ -270,7 +276,7 @@ public class PoolGenotypeLikelihoodsUnitTest { // get artificial alignment context for ref sample - no noise Map refContext = refPileupTestProvider.getAlignmentContextFromAlleles(0, new String(new byte[]{altByte}), new int[]{matches, mismatches}, false, 30); final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup(); - final ErrorModel emodel = new ErrorModel(minQ,maxQ, (byte)20, refPileup, refVC, 0.0); + final ErrorModel emodel = new ErrorModel(UAC, refPileup, refVC, refPileupTestProvider.getReferenceContext()); final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector(); final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches)); @@ -287,8 +293,6 @@ public class PoolGenotypeLikelihoodsUnitTest { @Test public void testIndelErrorModel() { final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref"); - final byte minQ = 5; - final byte maxQ = 40; final byte refByte = refPileupTestProvider.getRefByte(); final String altBases = refByte + "TCA"; final String refSampleName = refPileupTestProvider.getSampleNames().get(0); @@ -313,7 +317,7 @@ public class PoolGenotypeLikelihoodsUnitTest { // Ref sample has TC insertion but pileup will have TCA inserted instead to test mismatches Map refContext = refPileupTestProvider.getAlignmentContextFromAlleles(altBases.length(), altBases, new int[]{matches, mismatches}, false, 30); final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup(); - final ErrorModel emodel = new ErrorModel(minQ,maxQ, (byte)20, refPileup, refInsertionVC, 0.0); + final ErrorModel emodel = new ErrorModel(UAC, refPileup, refInsertionVC, refPileupTestProvider.getReferenceContext()); final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector(); final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches)); @@ -343,7 +347,7 @@ public class PoolGenotypeLikelihoodsUnitTest { // Ref sample has 4bp deletion but pileup will have 3 bp deletion instead to test mismatches Map refContext = refPileupTestProvider.getAlignmentContextFromAlleles(-delLength+1, altBases, new int[]{matches, mismatches}, false, 30); final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup(); - final ErrorModel emodel = new ErrorModel(minQ,maxQ, (byte)20, refPileup, refDeletionVC, 0.0); + final ErrorModel emodel = new ErrorModel(UAC, refPileup, refDeletionVC, refPileupTestProvider.getReferenceContext()); final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector(); final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches)); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java index 274b0f8bb..d7e8e16b5 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/Datum.java @@ -43,7 +43,6 @@ public class Datum { private static final int SMOOTHING_CONSTANT = 1; // used when calculating empirical qualities to avoid division by zero - //--------------------------------------------------------------------------------------------------------------- // // constructors @@ -84,7 +83,7 @@ public class Datum { double empiricalQualDouble() { final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT); - final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT); + final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT); // smoothing is one error and one non-error observation, for example final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations); return Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index f49e78469..6bfe5702d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.indels; +import com.google.java.contract.Ensures; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.MathUtils; @@ -175,7 +176,8 @@ public class PairHMMIndelErrorModel { } - public synchronized double[][] computeGeneralReadHaplotypeLikelihoods(final ReadBackedPileup pileup, + @Ensures("result != null && result.length == pileup.getNumberOfElements()") + public synchronized double[][] computeGeneralReadHaplotypeLikelihoods(final ReadBackedPileup pileup, final LinkedHashMap haplotypeMap, final ReferenceContext ref, final int eventLength, diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java index aa4885406..a911718c1 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ArtificialReadPileupTestProvider.java @@ -90,7 +90,7 @@ public class ArtificialReadPileupTestProvider { return sampleNames; } public byte getRefByte() { - return refBases.substring(offset,offset+1).getBytes()[0]; + return referenceContext.getBase(); } public ReferenceContext getReferenceContext() { return referenceContext;}