Don't have the spanning deletions argument be a hard cutoff, but instead be a percentage of the reads in the pileup. Default is now 5% of reads.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2155 348d0f76-0448-11de-a6fe-93d51630548a
2009-11-25 04:54:44 +00:00 · 2009-11-25 04:54:44 +00:00 · e1e5b35b19
parent 03342c1fdd
commit e1e5b35b19
4 changed files with 27 additions and 19 deletions
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java
@ -39,7 +39,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {
    protected double CONFIDENCE_THRESHOLD;
    protected double MINIMUM_ALLELE_FREQUENCY;
    protected boolean REPORT_SLOD;
-    protected int maxDeletionsInPileup;
+    protected double maxDeletionFractionInPileup;
    protected String assumedSingleSample;
    protected PrintWriter verboseWriter;

@ -70,7 +70,10 @@ public abstract class GenotypeCalculationModel implements Cloneable {
        POOL_SIZE = UAC.POOLSIZE;
        CONFIDENCE_THRESHOLD = UAC.CONFIDENCE_THRESHOLD;
        MINIMUM_ALLELE_FREQUENCY = UAC.MINIMUM_ALLELE_FREQUENCY;
-        maxDeletionsInPileup = UAC.MAX_DELETIONS;
+        maxDeletionFractionInPileup = UAC.MAX_DELETION_FRACTION;
+        // disable if that's what is requested
+        if ( maxDeletionFractionInPileup < 0.0 || maxDeletionFractionInPileup > 1.0 )
+            maxDeletionFractionInPileup = -1.0;
        assumedSingleSample = UAC.ASSUME_SINGLE_SAMPLE;
        if ( UAC.VERBOSE != null ) {
            try {
@ -163,17 +166,20 @@ public abstract class GenotypeCalculationModel implements Cloneable {
            }

            // check for deletions
-            if ( offset == -1 ) {
-                // are there too many deletions in the pileup?
-                if ( ++deletionsInPileup > maxDeletionsInPileup && maxDeletionsInPileup >= 0 )
-                    return null;
-            }
+            if ( offset == -1 )
+                deletionsInPileup++;

            // add the read to this sample's context
            // note that bad bases are added to the context (for DoC calculations later)
            myContext.add(read, offset);
        }

+
+        // are there too many deletions in the pileup?
+        if ( maxDeletionFractionInPileup != -1.0 &&
+             (double)deletionsInPileup / (double)reads.size() > maxDeletionFractionInPileup )
+            return null;
+
        return contexts;
    }

--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PooledCalculationModel.java
@ -56,17 +56,19 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode
        for (int i = 0; i < reads.size(); i++) {
            // check for deletions
            int offset = offsets.get(i);
-            if ( offset == -1 ) {
-                // are there too many deletions in the pileup?
-                if ( ++deletionsInPileup > maxDeletionsInPileup && maxDeletionsInPileup >= 0 )
-                    return null;
-            }
+            if ( offset == -1 )
+                deletionsInPileup++;

            // add the read to this sample's context
            // note that bad bases are added to the context (for DoC calculations later)
            pooledContext.add(reads.get(i), offset);
        }

+        // are there too many deletions in the pileup?
+        if ( maxDeletionFractionInPileup != -1.0 &&
+             (double)deletionsInPileup / (double)reads.size() > maxDeletionFractionInPileup )
+            return null;
+
        HashMap<String, AlignmentContextBySample> contexts = new HashMap<String, AlignmentContextBySample>();
        contexts.put(POOL_SAMPLE_NAME, pooledContext);
        return contexts;
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
@ -76,8 +76,8 @@ public class UnifiedArgumentCollection {
    @Argument(fullName = "min_confidence_threshold", shortName = "confidence", doc = "The phred-scaled confidence threshold by which variants should be filtered", required = false)
    public double CONFIDENCE_THRESHOLD = 0.0;

-    @Argument(fullName = "max_deletions", shortName = "deletions", doc = "Maximum reads with deletions spanning this locus for it to be callable [to disable, set to < 0; default:1]", required = false)
-    public Integer MAX_DELETIONS = 1;
+    @Argument(fullName = "max_deletion_fraction", shortName = "deletions", doc = "Maximum fraction of reads with deletions spanning this locus for it to be callable [to disable, set to < 0 or > 1; default:0.05]", required = false)
+    public Double MAX_DELETION_FRACTION = 0.05;

    @Argument(fullName = "max_coverage", shortName = "mc", doc = "Maximum reads at this locus for it to be callable; to disable, provide value < 1 [default:10,000]", required = false)
    public Integer MAX_READS_IN_PILEUP = 10000;
--- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@ -47,7 +47,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testMultiSamplePilot1PointEM() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1,
-                Arrays.asList("90072e7f935357695f8f48c40ff6e7a4"));
+                Arrays.asList("b14783b877e4857366cf9b1f516de343"));
        executeTest("testMultiSamplePilot1 - Point Estimate EM", spec);
    }

@ -68,7 +68,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testPooled1() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,000-10,024,000 -bm empirical -gm POOLED -ps 60 -confidence 30", 1,
-                Arrays.asList("cdbda4a455f11f75fa666e9317139f22"));
+                Arrays.asList("48f17694db2fc8744741e4eb73227e5b"));
        executeTest("testPooled1", spec);
    }

@ -81,7 +81,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testMultiSamplePilot1Joint() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1,
-                Arrays.asList("415f4a276005cad7fea0a8fcc3b8c3b5"));
+                Arrays.asList("e1467f309e407a037404535b07b7e969"));
        executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
    }

@ -125,7 +125,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
                        " -bm empirical" +
 		        " -vf GELI",
                1,
-                Arrays.asList("f490a407c9c62d43eb83107e83c858f6"));
+                Arrays.asList("46acd5a5d7301129ee3c0c9a4ab10259"));

        executeTest(String.format("testMultiTechnologies"), spec);
    }    
@ -162,7 +162,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void genotypeTest() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                testGeliLod5() + " -L 1:10,000,000-10,100,000 -bm empirical --genotype", 1,
-                Arrays.asList("45da29e3b1306d546a7b80c30c979ad4"));
+                Arrays.asList("db34a348abd13ee894780296e8a7e909"));
        executeTest("genotypeTest", spec);
    }