1. Bug fix: check that AF=0 doesn't contain more probability than 1-fraction

2. Fix for Kiran: allow UG to call SNPs at deletion sites; we'll add an annotation to the VariantAnotator for deletions at the locus (next week).
3. Added integration tests for joint estimation model



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2038 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-11-13 18:02:18 +00:00
parent 1be36ca959
commit bf451873ff
4 changed files with 54 additions and 16 deletions

View File

@ -134,7 +134,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {
HashMap<String, AlignmentContextBySample> contexts = new HashMap<String, AlignmentContextBySample>();
int deletionsInPile = 0;
int deletionsInPileup = 0;
List<SAMRecord> reads = context.getReads();
List<Integer> offsets = context.getOffsets();
@ -169,7 +169,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {
// check for deletions
if ( offset == -1 ) {
// are there too many deletions in the pileup?
if ( ++deletionsInPile > maxDeletionsInPileup )
if ( ++deletionsInPileup > maxDeletionsInPileup && maxDeletionsInPileup >= 0 )
return null;
}

View File

@ -121,7 +121,6 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
// create the GenotypeLikelihoods object
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
GL.add(pileup, true);
GLs.put(sample, GL);
}
}
@ -458,14 +457,24 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
double totalProb = alleleFrequencyProbs[bestAFguess];
int lowIndex = bestAFguess;
int highIndex = bestAFguess;
while ( totalProb < fraction ) {
if ( lowIndex > 1 ) {
lowIndex--;
totalProb += alleleFrequencyProbs[lowIndex];
}
if ( highIndex < N ) {
highIndex++;
totalProb += alleleFrequencyProbs[highIndex];
// it's possible that AF=0 contains more probability than 1-fraction
if ( alleleFrequencyProbs[0] >= (1.0 - fraction) ) {
// in this case, the range is all possible AFs
lowIndex = 1;
highIndex = N;
}
// otherwise, find the range moving out from the best AF guess
else {
while ( totalProb < fraction ) {
if ( lowIndex > 1 ) {
lowIndex--;
totalProb += alleleFrequencyProbs[lowIndex];
}
if ( highIndex < N ) {
highIndex++;
totalProb += alleleFrequencyProbs[highIndex];
}
}
}

View File

@ -77,7 +77,7 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "min_confidence_threshold", shortName = "confidence", doc = "The phred-scaled confidence threshold by which variants should be filtered", required = false)
public double CONFIDENCE_THRESHOLD = 0.0;
@Argument(fullName = "max_deletions", shortName = "deletions", doc = "Maximum reads with deletions spanning this locus for it to be callable [default:1]", required = false)
@Argument(fullName = "max_deletions", shortName = "deletions", doc = "Maximum reads with deletions spanning this locus for it to be callable [to disable, set to < 0; default:1]", required = false)
public Integer MAX_DELETIONS = 1;
@Argument(fullName = "max_coverage", shortName = "coverage", doc = "Maximum reads at this locus for it to be callable; to disable, provide value < 1 [default:10,000]", required = false)

View File

@ -40,19 +40,48 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
//
// --------------------------------------------------------------------------------------------------------------
@Test
public void testMultiSamplePilot1() {
public void testMultiSamplePilot1PointEM() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 50", 1,
Arrays.asList("b7e12c4011d0043024e0dd2dd5764752"));
executeTest("testMultiSamplePilot1", spec);
executeTest("testMultiSamplePilot1 - Point Estimate EM", spec);
}
@Test
public void testMultiSamplePilot2() {
public void testMultiSamplePilot2PointEM() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 50", 1,
Arrays.asList("89c600d72a815c09412c97f82fa2281e"));
executeTest("testMultiSamplePilot2", spec);
executeTest("testMultiSamplePilot2 - Point Estimate EM", spec);
}
// --------------------------------------------------------------------------------------------------------------
//
// testing joint estimation model
//
// --------------------------------------------------------------------------------------------------------------
@Test
public void testMultiSamplePilot1Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm JOINT_ESTIMATE -confidence 50", 1,
Arrays.asList("a96862eb5bd3d8db143712f427e3db91"));
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
}
@Test
public void testMultiSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm JOINT_ESTIMATE -confidence 50", 1,
Arrays.asList("22735bba0cb6ea3984f1d3913e376ac4"));
executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
}
@Test
public void testSingleSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,067,000-10,083,000 -bm empirical -gm JOINT_ESTIMATE -confidence 50", 1,
Arrays.asList("580248cfb813824194bda830427ab3d6"));
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
}
// --------------------------------------------------------------------------------------------------------------