From e1e5b35b19a6a28879369c9f818a9964f78f4cc7 Mon Sep 17 00:00:00 2001 From: ebanks Date: Wed, 25 Nov 2009 04:54:44 +0000 Subject: [PATCH] Don't have the spanning deletions argument be a hard cutoff, but instead be a percentage of the reads in the pileup. Default is now 5% of reads. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2155 348d0f76-0448-11de-a6fe-93d51630548a --- .../genotyper/GenotypeCalculationModel.java | 20 ++++++++++++------- .../genotyper/PooledCalculationModel.java | 12 ++++++----- .../genotyper/UnifiedArgumentCollection.java | 4 ++-- .../UnifiedGenotyperIntegrationTest.java | 10 +++++----- 4 files changed, 27 insertions(+), 19 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java index 36c1f3298..9bf1f8595 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java @@ -39,7 +39,7 @@ public abstract class GenotypeCalculationModel implements Cloneable { protected double CONFIDENCE_THRESHOLD; protected double MINIMUM_ALLELE_FREQUENCY; protected boolean REPORT_SLOD; - protected int maxDeletionsInPileup; + protected double maxDeletionFractionInPileup; protected String assumedSingleSample; protected PrintWriter verboseWriter; @@ -70,7 +70,10 @@ public abstract class GenotypeCalculationModel implements Cloneable { POOL_SIZE = UAC.POOLSIZE; CONFIDENCE_THRESHOLD = UAC.CONFIDENCE_THRESHOLD; MINIMUM_ALLELE_FREQUENCY = UAC.MINIMUM_ALLELE_FREQUENCY; - maxDeletionsInPileup = UAC.MAX_DELETIONS; + maxDeletionFractionInPileup = UAC.MAX_DELETION_FRACTION; + // disable if that's what is requested + if ( maxDeletionFractionInPileup < 0.0 || maxDeletionFractionInPileup > 1.0 ) + maxDeletionFractionInPileup = -1.0; assumedSingleSample = UAC.ASSUME_SINGLE_SAMPLE; if ( UAC.VERBOSE != null ) { try { @@ -163,17 +166,20 @@ public abstract class GenotypeCalculationModel implements Cloneable { } // check for deletions - if ( offset == -1 ) { - // are there too many deletions in the pileup? - if ( ++deletionsInPileup > maxDeletionsInPileup && maxDeletionsInPileup >= 0 ) - return null; - } + if ( offset == -1 ) + deletionsInPileup++; // add the read to this sample's context // note that bad bases are added to the context (for DoC calculations later) myContext.add(read, offset); } + + // are there too many deletions in the pileup? + if ( maxDeletionFractionInPileup != -1.0 && + (double)deletionsInPileup / (double)reads.size() > maxDeletionFractionInPileup ) + return null; + return contexts; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java index 93dbc1478..3cd452fe1 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java @@ -56,17 +56,19 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode for (int i = 0; i < reads.size(); i++) { // check for deletions int offset = offsets.get(i); - if ( offset == -1 ) { - // are there too many deletions in the pileup? - if ( ++deletionsInPileup > maxDeletionsInPileup && maxDeletionsInPileup >= 0 ) - return null; - } + if ( offset == -1 ) + deletionsInPileup++; // add the read to this sample's context // note that bad bases are added to the context (for DoC calculations later) pooledContext.add(reads.get(i), offset); } + // are there too many deletions in the pileup? + if ( maxDeletionFractionInPileup != -1.0 && + (double)deletionsInPileup / (double)reads.size() > maxDeletionFractionInPileup ) + return null; + HashMap contexts = new HashMap(); contexts.put(POOL_SAMPLE_NAME, pooledContext); return contexts; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 7b399d953..873de5161 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -76,8 +76,8 @@ public class UnifiedArgumentCollection { @Argument(fullName = "min_confidence_threshold", shortName = "confidence", doc = "The phred-scaled confidence threshold by which variants should be filtered", required = false) public double CONFIDENCE_THRESHOLD = 0.0; - @Argument(fullName = "max_deletions", shortName = "deletions", doc = "Maximum reads with deletions spanning this locus for it to be callable [to disable, set to < 0; default:1]", required = false) - public Integer MAX_DELETIONS = 1; + @Argument(fullName = "max_deletion_fraction", shortName = "deletions", doc = "Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to < 0 or > 1; default:0.05]", required = false) + public Double MAX_DELETION_FRACTION = 0.05; @Argument(fullName = "max_coverage", shortName = "mc", doc = "Maximum reads at this locus for it to be callable; to disable, provide value < 1 [default:10,000]", required = false) public Integer MAX_READS_IN_PILEUP = 10000; diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 278c478e7..226659971 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -47,7 +47,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1PointEM() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1, - Arrays.asList("90072e7f935357695f8f48c40ff6e7a4")); + Arrays.asList("b14783b877e4857366cf9b1f516de343")); executeTest("testMultiSamplePilot1 - Point Estimate EM", spec); } @@ -68,7 +68,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testPooled1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,000-10,024,000 -bm empirical -gm POOLED -ps 60 -confidence 30", 1, - Arrays.asList("cdbda4a455f11f75fa666e9317139f22")); + Arrays.asList("48f17694db2fc8744741e4eb73227e5b")); executeTest("testPooled1", spec); } @@ -81,7 +81,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("415f4a276005cad7fea0a8fcc3b8c3b5")); + Arrays.asList("e1467f309e407a037404535b07b7e969")); executeTest("testMultiSamplePilot1 - Joint Estimate", spec); } @@ -125,7 +125,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -bm empirical" + " -vf GELI", 1, - Arrays.asList("f490a407c9c62d43eb83107e83c858f6")); + Arrays.asList("46acd5a5d7301129ee3c0c9a4ab10259")); executeTest(String.format("testMultiTechnologies"), spec); } @@ -162,7 +162,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void genotypeTest() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( testGeliLod5() + " -L 1:10,000,000-10,100,000 -bm empirical --genotype", 1, - Arrays.asList("45da29e3b1306d546a7b80c30c979ad4")); + Arrays.asList("db34a348abd13ee894780296e8a7e909")); executeTest("genotypeTest", spec); }