Resolving merge conflicts
This commit is contained in:
commit
beb7610195
|
|
@ -1,6 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
||||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
@ -9,6 +13,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -30,24 +35,26 @@ public class ErrorModel {
|
|||
private static final boolean compressRange = false;
|
||||
|
||||
private static final double log10MinusE = Math.log10(Math.exp(1.0));
|
||||
|
||||
private static final boolean DEBUG = false;
|
||||
/**
|
||||
* Calculates the probability of the data (reference sample reads) given the phred scaled site quality score.
|
||||
*
|
||||
* @param minQualityScore Minimum site quality score to evaluate
|
||||
* @param maxQualityScore Maximum site quality score to evaluate
|
||||
* @param phredScaledPrior Prior for site quality
|
||||
* @param UAC Argument Collection
|
||||
* @param refSamplePileup Reference sample pileup
|
||||
* @param refSampleVC VC with True alleles in reference sample pileup
|
||||
* @param minPower Minimum power
|
||||
*/
|
||||
public ErrorModel (byte minQualityScore, byte maxQualityScore, byte phredScaledPrior,
|
||||
ReadBackedPileup refSamplePileup, VariantContext refSampleVC, double minPower) {
|
||||
this.maxQualityScore = maxQualityScore;
|
||||
this.minQualityScore = minQualityScore;
|
||||
this.phredScaledPrior = phredScaledPrior;
|
||||
log10minPower = Math.log10(minPower);
|
||||
public ErrorModel (final UnifiedArgumentCollection UAC,
|
||||
final ReadBackedPileup refSamplePileup,
|
||||
VariantContext refSampleVC, final ReferenceContext refContext) {
|
||||
this.maxQualityScore = UAC.maxQualityScore;
|
||||
this.minQualityScore = UAC.minQualityScore;
|
||||
this.phredScaledPrior = UAC.phredScaledPrior;
|
||||
log10minPower = Math.log10(UAC.minPower);
|
||||
|
||||
PairHMMIndelErrorModel pairModel = null;
|
||||
LinkedHashMap<Allele, Haplotype> haplotypeMap = null;
|
||||
HashMap<PileupElement, LinkedHashMap<Allele, Double>> indelLikelihoodMap = null;
|
||||
double[][] perReadLikelihoods = null;
|
||||
|
||||
double[] model = new double[maxQualityScore+1];
|
||||
Arrays.fill(model,Double.NEGATIVE_INFINITY);
|
||||
|
|
@ -61,11 +68,17 @@ public class ErrorModel {
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (refSampleVC.isIndel()) {
|
||||
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY,
|
||||
UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION);
|
||||
haplotypeMap = new LinkedHashMap<Allele, Haplotype>();
|
||||
indelLikelihoodMap = new HashMap<PileupElement, LinkedHashMap<Allele, Double>>();
|
||||
IndelGenotypeLikelihoodsCalculationModel.getHaplotypeMapFromAlleles(refSampleVC.getAlleles(), refContext, refContext.getLocus(), haplotypeMap); // will update haplotypeMap adding elements
|
||||
}
|
||||
}
|
||||
|
||||
double p = MathUtils.phredScaleToLog10Probability((byte)(maxQualityScore-minQualityScore));
|
||||
if (refSamplePileup == null || refSampleVC == null || !hasCalledAlleles) {
|
||||
double p = MathUtils.phredScaleToLog10Probability((byte)(maxQualityScore-minQualityScore));
|
||||
for (byte q=minQualityScore; q<=maxQualityScore; q++) {
|
||||
// maximum uncertainty if there's no ref data at site
|
||||
model[q] = p;
|
||||
|
|
@ -75,23 +88,47 @@ public class ErrorModel {
|
|||
else {
|
||||
hasData = true;
|
||||
int matches = 0;
|
||||
int coverage = refSamplePileup.getNumberOfElements();
|
||||
int coverage = 0;
|
||||
|
||||
Allele refAllele = refSampleVC.getReference();
|
||||
|
||||
if (refSampleVC.isIndel()) {
|
||||
final int readCounts[] = new int[refSamplePileup.getNumberOfElements()];
|
||||
//perReadLikelihoods = new double[readCounts.length][refSampleVC.getAlleles().size()];
|
||||
final int eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(refSampleVC.getAlleles());
|
||||
perReadLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(refSamplePileup,haplotypeMap,refContext, eventLength, indelLikelihoodMap, readCounts);
|
||||
}
|
||||
int idx = 0;
|
||||
for (PileupElement refPileupElement : refSamplePileup) {
|
||||
if (DEBUG)
|
||||
System.out.println(refPileupElement.toString());
|
||||
boolean isMatch = false;
|
||||
for (Allele allele : refSampleVC.getAlleles())
|
||||
isMatch |= pileupElementMatches(refPileupElement, allele, refAllele);
|
||||
for (Allele allele : refSampleVC.getAlleles()) {
|
||||
boolean m = pileupElementMatches(refPileupElement, allele, refAllele, refContext.getBase());
|
||||
if (DEBUG) System.out.println(m);
|
||||
isMatch |= m;
|
||||
}
|
||||
if (refSampleVC.isIndel()) {
|
||||
// ignore match/mismatch if reads, as determined by their likelihood, are not informative
|
||||
double[] perAlleleLikelihoods = perReadLikelihoods[idx++];
|
||||
if (!isInformativeElement(perAlleleLikelihoods))
|
||||
matches++;
|
||||
else
|
||||
matches += (isMatch?1:0);
|
||||
|
||||
matches += (isMatch?1:0);
|
||||
// System.out.format("MATCH:%b\n",isMatch);
|
||||
} else {
|
||||
matches += (isMatch?1:0);
|
||||
}
|
||||
coverage++;
|
||||
}
|
||||
|
||||
int mismatches = coverage - matches;
|
||||
//System.out.format("Cov:%d match:%d mismatch:%d\n",coverage, matches, mismatches);
|
||||
for (byte q=minQualityScore; q<=maxQualityScore; q++) {
|
||||
model[q] = log10PoissonProbabilitySiteGivenQual(q,coverage, mismatches);
|
||||
if (coverage==0)
|
||||
model[q] = p;
|
||||
else
|
||||
model[q] = log10PoissonProbabilitySiteGivenQual(q,coverage, mismatches);
|
||||
}
|
||||
this.refDepth = coverage;
|
||||
}
|
||||
|
|
@ -101,6 +138,17 @@ public class ErrorModel {
|
|||
}
|
||||
|
||||
|
||||
@Requires("likelihoods.length>0")
|
||||
private boolean isInformativeElement(double[] likelihoods) {
|
||||
// if likelihoods are the same, they're not informative
|
||||
final double thresh = 0.1;
|
||||
int maxIdx = MathUtils.maxElementIndex(likelihoods);
|
||||
int minIdx = MathUtils.minElementIndex(likelihoods);
|
||||
if (likelihoods[maxIdx]-likelihoods[minIdx]< thresh)
|
||||
return false;
|
||||
else
|
||||
return true;
|
||||
}
|
||||
/**
|
||||
* Simple constructor that just takes a given log-probability vector as error model.
|
||||
* Only intended for unit testing, not general usage.
|
||||
|
|
@ -115,23 +163,27 @@ public class ErrorModel {
|
|||
|
||||
}
|
||||
|
||||
public static boolean pileupElementMatches(PileupElement pileupElement, Allele allele, Allele refAllele) {
|
||||
/* System.out.format("PE: base:%s isNextToDel:%b isNextToIns:%b eventBases:%s eventLength:%d Allele:%s RefAllele:%s\n",
|
||||
public static boolean pileupElementMatches(PileupElement pileupElement, Allele allele, Allele refAllele, byte refBase) {
|
||||
if (DEBUG)
|
||||
System.out.format("PE: base:%s isNextToDel:%b isNextToIns:%b eventBases:%s eventLength:%d Allele:%s RefAllele:%s\n",
|
||||
pileupElement.getBase(), pileupElement.isBeforeDeletionStart(),
|
||||
pileupElement.isBeforeInsertion(),pileupElement.getEventBases(),pileupElement.getEventLength(), allele.toString(), refAllele.toString());
|
||||
*/
|
||||
|
||||
//pileupElement.
|
||||
// if test allele is ref, any base mismatch, or any insertion/deletion at start of pileup count as mismatch
|
||||
if (allele.isReference()) {
|
||||
// for a ref allele, any base mismatch or new indel is a mismatch.
|
||||
if(allele.getBases().length>0 && allele.getBases().length == refAllele.getBases().length ) // SNP/MNP case
|
||||
return (/*!pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart() &&*/ pileupElement.getBase() == allele.getBases()[0]);
|
||||
if(allele.getBases().length>0 )
|
||||
// todo - can't check vs. allele because allele is not padded so it doesn't include the reference base at this location
|
||||
// could clean up/simplify this when unpadding is removed
|
||||
return (pileupElement.getBase() == refBase);
|
||||
else
|
||||
// either null allele to compare, or ref/alt lengths are different (indel by definition).
|
||||
// if we have an indel that we are comparing against a REF allele, any indel presence (of any length/content) is a mismatch
|
||||
return (!pileupElement.isBeforeInsertion() && !pileupElement.isBeforeDeletionStart());
|
||||
}
|
||||
|
||||
// for non-ref alleles to compare:
|
||||
if (refAllele.getBases().length == allele.getBases().length)
|
||||
// alleles have the same length (eg snp or mnp)
|
||||
return pileupElement.getBase() == allele.getBases()[0];
|
||||
|
|
|
|||
|
|
@ -246,7 +246,8 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
|
|||
|
||||
// find the alternate allele(s) that we should be using
|
||||
final List<Allele> alleles = getFinalAllelesToUse(tracker, ref, allAllelesToUse, GLs);
|
||||
|
||||
if (alleles == null || alleles.isEmpty())
|
||||
return null;
|
||||
// start making the VariantContext
|
||||
final GenomeLoc loc = ref.getLocus();
|
||||
final int endLoc = getEndLocation(tracker, ref, alleles);
|
||||
|
|
@ -312,7 +313,7 @@ public abstract class PoolGenotypeLikelihoodsCalculationModel extends GenotypeLi
|
|||
refLanePileup = refPileup.getPileupForLane(laneID);
|
||||
|
||||
//ReferenceSample referenceSample = new ReferenceSample(UAC.referenceSampleName, refLanePileup, trueReferenceAlleles);
|
||||
perLaneErrorModels.put(laneID, new ErrorModel(UAC.minQualityScore, UAC.maxQualityScore, UAC.phredScaledPrior, refLanePileup, refVC, UAC.minPower));
|
||||
perLaneErrorModels.put(laneID, new ErrorModel(UAC, refLanePileup, refVC, ref));
|
||||
}
|
||||
return perLaneErrorModels;
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,8 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods {
|
|||
final int eventLength;
|
||||
double[][] readHaplotypeLikelihoods;
|
||||
|
||||
final byte refBase;
|
||||
|
||||
public PoolIndelGenotypeLikelihoods(final List<Allele> alleles,
|
||||
final double[] logLikelihoods,
|
||||
final int ploidy,
|
||||
|
|
@ -38,6 +40,8 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods {
|
|||
this.haplotypeMap = haplotypeMap;
|
||||
this.refContext = referenceContext;
|
||||
this.eventLength = IndelGenotypeLikelihoodsCalculationModel.getEventLength(alleles);
|
||||
// todo - not needed if indel alleles have base at current position
|
||||
this.refBase = referenceContext.getBase();
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
|
@ -141,7 +145,7 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods {
|
|||
final int numHaplotypes = haplotypeMap.size();
|
||||
|
||||
final int readCounts[] = new int[pileup.getNumberOfElements()];
|
||||
readHaplotypeLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, refContext, eventLength, PoolIndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(), readCounts);
|
||||
readHaplotypeLikelihoods = pairModel.computeGeneralReadHaplotypeLikelihoods(pileup, haplotypeMap, refContext, eventLength, IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap(), readCounts);
|
||||
n = readHaplotypeLikelihoods.length;
|
||||
} else {
|
||||
Allele refAllele = null;
|
||||
|
|
@ -161,7 +165,7 @@ public class PoolIndelGenotypeLikelihoods extends PoolGenotypeLikelihoods {
|
|||
int idx =0;
|
||||
for (Allele allele : alleles) {
|
||||
int cnt = numSeenBases.get(idx);
|
||||
numSeenBases.set(idx++,cnt + (ErrorModel.pileupElementMatches(elt, allele, refAllele)?1:0));
|
||||
numSeenBases.set(idx++,cnt + (ErrorModel.pileupElementMatches(elt, allele, refAllele, refBase)?1:0));
|
||||
}
|
||||
|
||||
n++;
|
||||
|
|
|
|||
|
|
@ -70,12 +70,6 @@ public class PoolIndelGenotypeLikelihoodsCalculationModel extends PoolGenotypeLi
|
|||
}
|
||||
|
||||
|
||||
public static HashMap<PileupElement, LinkedHashMap<Allele, Double>> getIndelLikelihoodMap() {
|
||||
return IndelGenotypeLikelihoodsCalculationModel.getIndelLikelihoodMap();
|
||||
}
|
||||
|
||||
|
||||
|
||||
protected PoolGenotypeLikelihoods getPoolGenotypeLikelihoodObject(final List<Allele> alleles,
|
||||
final double[] logLikelihoods,
|
||||
final int ploidy,
|
||||
|
|
|
|||
|
|
@ -50,20 +50,20 @@ public class BQSRIntegrationTest extends WalkerTest {
|
|||
String HiSeqBam = privateTestDir + "HiSeq.1mb.1RG.bam";
|
||||
String HiSeqInterval = "chr1:10,000,000-10,100,000";
|
||||
return new Object[][]{
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "087dbc3e3f0cee6b891aecad2d0faf25")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "8a69591f728b3a2cdd79ff26bbebcc26")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "73d649bce0b69f56452de8c7e0a8686d")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "d9512cebf54ea120539059976b33d1ca")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "f61a8df03aae8c4273acf2b72497f154")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "7c2ce84e521d8f19fe5061b4e40317f7")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "66a0caad65ab41d9013e812617e67370")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "6f5e9836147b488a7a204cffa5ecd21e")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "444fdfca7835e6a3714445f7e60abcaf")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "e0bfaf38f45142d45c8fe0ae05d0d4e0")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "5b30760bab51b4d1fc02097d4eacefa4")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "742fd8edfa36ab9023ceeaac40c4e215")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "6f5e9836147b488a7a204cffa5ecd21e")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "22dd42897cf20852712c6e8f63195443")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, "", "239ce3387b4540faf44ec000d844ccd1")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov ContextCovariate", "d69127341938910c38166dd18449598d")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --no_standard_covs -cov CycleCovariate", "b77e621bed1b0dc57970399a35efd0da")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --indels_context_size 4", "2697f38d467a7856c40abce0f778456a")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --low_quality_tail 5", "a55018b1643ca3964dbb50783db9f3e4")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --quantizing_levels 6", "54fe8d1f5573845e6a2aa9688f6dd950")},
|
||||
{new BQSRTest(hg18Reference, HiSeqBam, HiSeqInterval, " --mismatches_context_size 4", "6b518ad3c56d66c6f5ea812d058f5c4d")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", "", "3ddb9730f00ee3a612b42209ed9f7e03")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-10,200,000", "", "4cd4fb754e1ef142ad691cb35c74dc4c")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.1RG.bam", "1:10,000,000-10,200,000", "", "364eab693e5e4c7d18a77726b6460f3f")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "originalQuals.1kg.chr1.1-1K.1RG.bam", "1:1-1,000", " -OQ", "c449cfca61d605b534f0dce35581339d")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "1:10,000,000-20,000,000", " --solid_recal_mode REMOVE_REF_BIAS", "5268cb5a4b69335568751d5e5ab80d43")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:anyNameABCD,VCF " + privateTestDir + "vcfexample3.vcf", "3ddb9730f00ee3a612b42209ed9f7e03")},
|
||||
{new BQSRTest(b36KGReference, validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.1Mb.1RG.bam", "1:10,000,000-10,200,000", " -knownSites:bed " + validationDataLocation + "bqsrKnownTest.bed", "4a786ba42e38e7fd101947c34a6883ed")},
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -37,6 +37,13 @@ public class PoolCallerIntegrationTest extends WalkerTest {
|
|||
executeTest("testPoolCaller:"+name+" args=" + args, spec);
|
||||
}
|
||||
|
||||
private void PC_LSV_Test_NoRef(String args, String name, String model, String md5) {
|
||||
final String base = String.format("-T UnifiedGenotyper -R %s -I %s -L %s -glm %s -ignoreLane -pnrm POOL",
|
||||
REF, LSV_BAM, LSVINTERVALS, model) + " --no_cmdline_in_header -o %s";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||
executeTest("testPoolCaller:"+name+" args=" + args, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBOTH_GGA_Pools() {
|
||||
PC_LSV_Test(String.format(" -maxAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","POOLBOTH","36b8db57f65be1cc3d2d9d7f9f3f26e4");
|
||||
|
|
@ -44,7 +51,17 @@ public class PoolCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testINDEL_GGA_Pools() {
|
||||
PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_BOTH_GGA","POOLINDEL","d1339990291648495bfcf4404f051478");
|
||||
PC_LSV_Test(String.format(" -maxAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles %s",LSV_ALLELES),"LSV_INDEL_GGA","POOLINDEL","d1339990291648495bfcf4404f051478");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testINDEL_maxAlleles2_ploidy3_Pools_noRef() {
|
||||
PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","POOLINDEL","b66e7150603310fd57ee7bf9fc590706");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testINDEL_maxAlleles2_ploidy1_Pools_noRef() {
|
||||
PC_LSV_Test_NoRef(" -maxAlleles 2 -ploidy 1","LSV_INDEL_DISC_NOREF_p1","POOLINDEL","ccdae3fc4d2c922f956a186aaad51c29");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
import net.sf.samtools.SAMUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
|
|
@ -49,6 +50,13 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
private static final boolean SIMULATE_NOISY_PILEUP = false;
|
||||
private static final int NUM_SIMULATED_OBS = 10;
|
||||
|
||||
void PoolGenotypeLikelihoodsUnitTest() {
|
||||
UAC.minQualityScore = 5;
|
||||
UAC.maxQualityScore = 40;
|
||||
UAC.phredScaledPrior = (byte)20;
|
||||
UAC.minPower = 0.0;
|
||||
|
||||
}
|
||||
@Test
|
||||
public void testStoringLikelihoodElements() {
|
||||
|
||||
|
|
@ -251,8 +259,6 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
@Test
|
||||
public void testErrorModel() {
|
||||
final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
|
||||
final byte minQ = 5;
|
||||
final byte maxQ = 40;
|
||||
final byte refByte = refPileupTestProvider.getRefByte();
|
||||
final byte altByte = refByte == (byte)'T'? (byte) 'C': (byte)'T';
|
||||
final String refSampleName = refPileupTestProvider.getSampleNames().get(0);
|
||||
|
|
@ -270,7 +276,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
// get artificial alignment context for ref sample - no noise
|
||||
Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(0, new String(new byte[]{altByte}), new int[]{matches, mismatches}, false, 30);
|
||||
final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup();
|
||||
final ErrorModel emodel = new ErrorModel(minQ,maxQ, (byte)20, refPileup, refVC, 0.0);
|
||||
final ErrorModel emodel = new ErrorModel(UAC, refPileup, refVC, refPileupTestProvider.getReferenceContext());
|
||||
final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector();
|
||||
|
||||
final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches));
|
||||
|
|
@ -287,8 +293,6 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
@Test
|
||||
public void testIndelErrorModel() {
|
||||
final ArtificialReadPileupTestProvider refPileupTestProvider = new ArtificialReadPileupTestProvider(1,"ref");
|
||||
final byte minQ = 5;
|
||||
final byte maxQ = 40;
|
||||
final byte refByte = refPileupTestProvider.getRefByte();
|
||||
final String altBases = refByte + "TCA";
|
||||
final String refSampleName = refPileupTestProvider.getSampleNames().get(0);
|
||||
|
|
@ -313,7 +317,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
// Ref sample has TC insertion but pileup will have TCA inserted instead to test mismatches
|
||||
Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(altBases.length(), altBases, new int[]{matches, mismatches}, false, 30);
|
||||
final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup();
|
||||
final ErrorModel emodel = new ErrorModel(minQ,maxQ, (byte)20, refPileup, refInsertionVC, 0.0);
|
||||
final ErrorModel emodel = new ErrorModel(UAC, refPileup, refInsertionVC, refPileupTestProvider.getReferenceContext());
|
||||
final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector();
|
||||
|
||||
final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches));
|
||||
|
|
@ -343,7 +347,7 @@ public class PoolGenotypeLikelihoodsUnitTest {
|
|||
// Ref sample has 4bp deletion but pileup will have 3 bp deletion instead to test mismatches
|
||||
Map<String,AlignmentContext> refContext = refPileupTestProvider.getAlignmentContextFromAlleles(-delLength+1, altBases, new int[]{matches, mismatches}, false, 30);
|
||||
final ReadBackedPileup refPileup = refContext.get(refSampleName).getBasePileup();
|
||||
final ErrorModel emodel = new ErrorModel(minQ,maxQ, (byte)20, refPileup, refDeletionVC, 0.0);
|
||||
final ErrorModel emodel = new ErrorModel(UAC, refPileup, refDeletionVC, refPileupTestProvider.getReferenceContext());
|
||||
final double[] errorVec = emodel.getErrorModelVector().getProbabilityVector();
|
||||
|
||||
final double mlEst = -10.0*Math.log10((double)mismatches/(double)(matches+mismatches));
|
||||
|
|
|
|||
|
|
@ -43,7 +43,6 @@ public class Datum {
|
|||
|
||||
private static final int SMOOTHING_CONSTANT = 1; // used when calculating empirical qualities to avoid division by zero
|
||||
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// constructors
|
||||
|
|
@ -84,7 +83,7 @@ public class Datum {
|
|||
|
||||
double empiricalQualDouble() {
|
||||
final double doubleMismatches = (double) (numMismatches + SMOOTHING_CONSTANT);
|
||||
final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT);
|
||||
final double doubleObservations = (double) (numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT); // smoothing is one error and one non-error observation, for example
|
||||
final double empiricalQual = -10 * Math.log10(doubleMismatches / doubleObservations);
|
||||
return Math.min(empiricalQual, (double) QualityUtils.MAX_RECALIBRATED_Q_SCORE);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
|
|
@ -175,7 +176,8 @@ public class PairHMMIndelErrorModel {
|
|||
|
||||
}
|
||||
|
||||
public synchronized double[][] computeGeneralReadHaplotypeLikelihoods(final ReadBackedPileup pileup,
|
||||
@Ensures("result != null && result.length == pileup.getNumberOfElements()")
|
||||
public synchronized double[][] computeGeneralReadHaplotypeLikelihoods(final ReadBackedPileup pileup,
|
||||
final LinkedHashMap<Allele, Haplotype> haplotypeMap,
|
||||
final ReferenceContext ref,
|
||||
final int eventLength,
|
||||
|
|
|
|||
|
|
@ -90,7 +90,7 @@ public class ArtificialReadPileupTestProvider {
|
|||
return sampleNames;
|
||||
}
|
||||
public byte getRefByte() {
|
||||
return refBases.substring(offset,offset+1).getBytes()[0];
|
||||
return referenceContext.getBase();
|
||||
}
|
||||
|
||||
public ReferenceContext getReferenceContext() { return referenceContext;}
|
||||
|
|
|
|||
Loading…
Reference in New Issue