1. Bug fix: check that AF=0 doesn't contain more probability than 1-fraction
2. Fix for Kiran: allow UG to call SNPs at deletion sites; we'll add an annotation to the VariantAnotator for deletions at the locus (next week). 3. Added integration tests for joint estimation model git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2038 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
1be36ca959
commit
bf451873ff
|
|
@ -134,7 +134,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {
|
||||||
|
|
||||||
HashMap<String, AlignmentContextBySample> contexts = new HashMap<String, AlignmentContextBySample>();
|
HashMap<String, AlignmentContextBySample> contexts = new HashMap<String, AlignmentContextBySample>();
|
||||||
|
|
||||||
int deletionsInPile = 0;
|
int deletionsInPileup = 0;
|
||||||
List<SAMRecord> reads = context.getReads();
|
List<SAMRecord> reads = context.getReads();
|
||||||
List<Integer> offsets = context.getOffsets();
|
List<Integer> offsets = context.getOffsets();
|
||||||
|
|
||||||
|
|
@ -169,7 +169,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {
|
||||||
// check for deletions
|
// check for deletions
|
||||||
if ( offset == -1 ) {
|
if ( offset == -1 ) {
|
||||||
// are there too many deletions in the pileup?
|
// are there too many deletions in the pileup?
|
||||||
if ( ++deletionsInPile > maxDeletionsInPileup )
|
if ( ++deletionsInPileup > maxDeletionsInPileup && maxDeletionsInPileup >= 0 )
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -121,7 +121,6 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
|
||||||
// create the GenotypeLikelihoods object
|
// create the GenotypeLikelihoods object
|
||||||
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
|
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
|
||||||
GL.add(pileup, true);
|
GL.add(pileup, true);
|
||||||
|
|
||||||
GLs.put(sample, GL);
|
GLs.put(sample, GL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -458,6 +457,15 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
|
||||||
double totalProb = alleleFrequencyProbs[bestAFguess];
|
double totalProb = alleleFrequencyProbs[bestAFguess];
|
||||||
int lowIndex = bestAFguess;
|
int lowIndex = bestAFguess;
|
||||||
int highIndex = bestAFguess;
|
int highIndex = bestAFguess;
|
||||||
|
|
||||||
|
// it's possible that AF=0 contains more probability than 1-fraction
|
||||||
|
if ( alleleFrequencyProbs[0] >= (1.0 - fraction) ) {
|
||||||
|
// in this case, the range is all possible AFs
|
||||||
|
lowIndex = 1;
|
||||||
|
highIndex = N;
|
||||||
|
}
|
||||||
|
// otherwise, find the range moving out from the best AF guess
|
||||||
|
else {
|
||||||
while ( totalProb < fraction ) {
|
while ( totalProb < fraction ) {
|
||||||
if ( lowIndex > 1 ) {
|
if ( lowIndex > 1 ) {
|
||||||
lowIndex--;
|
lowIndex--;
|
||||||
|
|
@ -468,6 +476,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
|
||||||
totalProb += alleleFrequencyProbs[highIndex];
|
totalProb += alleleFrequencyProbs[highIndex];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return new AlleleFrequencyBacked.AlleleFrequencyRange((double)lowIndex / (double)N, (double)highIndex / (double)N, fraction);
|
return new AlleleFrequencyBacked.AlleleFrequencyRange((double)lowIndex / (double)N, (double)highIndex / (double)N, fraction);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -77,7 +77,7 @@ public class UnifiedArgumentCollection {
|
||||||
@Argument(fullName = "min_confidence_threshold", shortName = "confidence", doc = "The phred-scaled confidence threshold by which variants should be filtered", required = false)
|
@Argument(fullName = "min_confidence_threshold", shortName = "confidence", doc = "The phred-scaled confidence threshold by which variants should be filtered", required = false)
|
||||||
public double CONFIDENCE_THRESHOLD = 0.0;
|
public double CONFIDENCE_THRESHOLD = 0.0;
|
||||||
|
|
||||||
@Argument(fullName = "max_deletions", shortName = "deletions", doc = "Maximum reads with deletions spanning this locus for it to be callable [default:1]", required = false)
|
@Argument(fullName = "max_deletions", shortName = "deletions", doc = "Maximum reads with deletions spanning this locus for it to be callable [to disable, set to < 0; default:1]", required = false)
|
||||||
public Integer MAX_DELETIONS = 1;
|
public Integer MAX_DELETIONS = 1;
|
||||||
|
|
||||||
@Argument(fullName = "max_coverage", shortName = "coverage", doc = "Maximum reads at this locus for it to be callable; to disable, provide value < 1 [default:10,000]", required = false)
|
@Argument(fullName = "max_coverage", shortName = "coverage", doc = "Maximum reads at this locus for it to be callable; to disable, provide value < 1 [default:10,000]", required = false)
|
||||||
|
|
|
||||||
|
|
@ -40,19 +40,48 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
//
|
//
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
@Test
|
@Test
|
||||||
public void testMultiSamplePilot1() {
|
public void testMultiSamplePilot1PointEM() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 50", 1,
|
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 50", 1,
|
||||||
Arrays.asList("b7e12c4011d0043024e0dd2dd5764752"));
|
Arrays.asList("b7e12c4011d0043024e0dd2dd5764752"));
|
||||||
executeTest("testMultiSamplePilot1", spec);
|
executeTest("testMultiSamplePilot1 - Point Estimate EM", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testMultiSamplePilot2() {
|
public void testMultiSamplePilot2PointEM() {
|
||||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 50", 1,
|
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 50", 1,
|
||||||
Arrays.asList("89c600d72a815c09412c97f82fa2281e"));
|
Arrays.asList("89c600d72a815c09412c97f82fa2281e"));
|
||||||
executeTest("testMultiSamplePilot2", spec);
|
executeTest("testMultiSamplePilot2 - Point Estimate EM", spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// testing joint estimation model
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
@Test
|
||||||
|
public void testMultiSamplePilot1Joint() {
|
||||||
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
|
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm JOINT_ESTIMATE -confidence 50", 1,
|
||||||
|
Arrays.asList("a96862eb5bd3d8db143712f427e3db91"));
|
||||||
|
executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMultiSamplePilot2Joint() {
|
||||||
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
|
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm JOINT_ESTIMATE -confidence 50", 1,
|
||||||
|
Arrays.asList("22735bba0cb6ea3984f1d3913e376ac4"));
|
||||||
|
executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSingleSamplePilot2Joint() {
|
||||||
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
|
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,067,000-10,083,000 -bm empirical -gm JOINT_ESTIMATE -confidence 50", 1,
|
||||||
|
Arrays.asList("580248cfb813824194bda830427ab3d6"));
|
||||||
|
executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue