[Foiled yet again when trying to do this in git] Slight modifications in the argument structure for the IndelRealigner. Instead of boolean flags -knownsOnly and -doNotUseSW, we now have an enum --consensusDeterminationModel which lets you specify knowns only, also use indels in reads, or also use SW. Please note that the default behavior of IR has not changed at all (and won't for a few more days) - that'll be done in GIT (fingers crossed).

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@6008 348d0f76-0448-11de-a6fe-93d51630548a
2011-06-16 17:35:37 +00:00 · 2011-06-16 17:35:37 +00:00 · 4e85416af1
parent 4304fc4862
commit 4e85416af1
3 changed files with 29 additions and 12 deletions
--- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java
@ -76,6 +76,12 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
    public static final String ORIGINAL_POSITION_TAG = "OP";
    public static final String PROGRAM_RECORD_NAME = "GATK IndelRealigner";

+    public enum ConsensusDeterminationModel {
+        KNOWNS_ONLY,
+        USE_READS,
+        USE_SW
+    }
+
    @Input(fullName="targetIntervals", shortName="targetIntervals", doc="intervals file output from RealignerTargetCreator", required=true)
    protected String intervalsFile = null;

@ -89,12 +95,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
    protected StingSAMFileWriter writer = null;
    protected ConstrainedMateFixingManager manager = null;

-    @Argument(fullName="useOnlyKnownIndels", shortName="knownsOnly", required=false, doc="Don't run 'Smith-Waterman' to generate alternate consenses; use only known indels provided as RODs for constructing the alternate references.")
-    protected boolean USE_KNOWN_INDELS_ONLY = false;
-
-    @Hidden
-    @Argument(fullName="doNotUseSW", shortName="doNotUseSW", required=false, doc="Don't run 'Smith-Waterman' to generate alternate consenses; use only known indels provided as RODs or indels in the reads for constructing the alternate references.")
-    protected boolean NO_SW = false;
+    @Argument(fullName = "consensusDeterminationModel", shortName = "model", doc = "How should we determine the possible alternate consenses? -- in the order of least permissive to most permissive there is KNOWNS_ONLY (use only indels from known indels provided in RODs), USE_READS (additionally use indels already present in the original alignments of the reads), and USE_SW (additionally use 'Smith-Waterman' to generate alternate consenses).  The default is USE_SW", required = false)
+    public ConsensusDeterminationModel consensusModel = ConsensusDeterminationModel.USE_SW;


    // ADVANCED OPTIONS FOLLOW
@ -646,7 +648,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
        long totalRawMismatchSum = determineReadsThatNeedCleaning(reads, refReads, altReads, altAlignmentsToTest, altConsenses, leftmostIndex, reference);

        // use 'Smith-Waterman' to create alternate consenses from reads that mismatch the reference, using totalRawMismatchSum as the random seed
-        if ( !USE_KNOWN_INDELS_ONLY && !NO_SW )
+        if ( consensusModel == ConsensusDeterminationModel.USE_SW )
            generateAlternateConsensesFromReads(altAlignmentsToTest, altConsenses, reference, leftmostIndex);

        // if ( debugOn ) System.out.println("------\nChecking consenses...\n--------\n");
@ -723,7 +725,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
                if ( !updateRead(bestConsensus.cigar, bestConsensus.positionOnReference, indexPair.second, aRead, leftmostIndex) )
                    return;
            }
-            if ( !USE_KNOWN_INDELS_ONLY && !alternateReducesEntropy(altReads, reference, leftmostIndex) ) {
+            if ( consensusModel != ConsensusDeterminationModel.KNOWNS_ONLY && !alternateReducesEntropy(altReads, reference, leftmostIndex) ) {
                if ( statsOutput != null ) {
                    try {
                        statsOutput.write(currentInterval.toString());
@ -885,7 +887,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
                aRead.setAlignerMismatchScore(AlignmentUtils.mismatchingQualities(aRead.getRead(), reference, startOnRef));

                // if it has an indel, let's see if that's the best consensus
-                if ( !USE_KNOWN_INDELS_ONLY && numBlocks == 2 )  {
+                if ( consensusModel != ConsensusDeterminationModel.KNOWNS_ONLY && numBlocks == 2 )  {
                    Consensus c = createAlternateConsensus(startOnRef, aRead.getCigar(), reference, aRead.getReadBases());
                    if ( c != null )
                        altConsenses.add(c);
--- a/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java
@ -43,18 +43,33 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
    @Test
    public void testKnownsOnly() {
        WalkerTestSpec spec1 = new WalkerTestSpec(
-                baseCommand + "-knownsOnly -B:indels,vcf " + knownIndels,
+                baseCommand + "--consensusDeterminationModel KNOWNS_ONLY -B:indels,vcf " + knownIndels,
                1,
                Arrays.asList("3dd5d2c9931b375455af0bff1a2c4888"));
        executeTest("realigner known indels only from VCF", spec1);

        WalkerTestSpec spec2 = new WalkerTestSpec(
-                baseCommand + "-knownsOnly -D " + GATKDataLocation + "dbsnp_129_b36.rod",
+                baseCommand + "--consensusDeterminationModel KNOWNS_ONLY -D " + GATKDataLocation + "dbsnp_129_b36.rod",
                1,
                Arrays.asList("78850024ac9ff3ba51b6f097c7041c1d"));
        executeTest("realigner known indels only from dbsnp", spec2);
    }

+    @Test
+    public void testReadsOnly() {
+        WalkerTestSpec spec1 = new WalkerTestSpec(
+                baseCommand + "--consensusDeterminationModel USE_READS -B:indels,vcf " + knownIndels,
+                1,
+                Arrays.asList(base_md5));
+        executeTest("realigner known indels only from VCF", spec1);
+
+        WalkerTestSpec spec2 = new WalkerTestSpec(
+                baseCommand + "--consensusDeterminationModel USE_READS -D " + GATKDataLocation + "dbsnp_129_b36.rod",
+                1,
+                Arrays.asList("e041186bca9dccf360747c89be8417ad"));
+        executeTest("realigner known indels only from dbsnp", spec2);
+    }
+
    @Test
    public void testLods() {
        HashMap<String, String> e = new HashMap<String, String>();
--- a/scala/qscript/playground/WholeGenomePipeline.scala
+++ b/scala/qscript/playground/WholeGenomePipeline.scala
@ -149,7 +149,7 @@ class WholeGenomePipeline extends QScript {
        clean.targetIntervals = target.out
        clean.rodBind :+= RodBind("dbsnp", "VCF", dbsnp)
        clean.rodBind :+= RodBind("indels", "VCF", indels)
-        clean.doNotUseSW = true
+        clean.consensusDeterminationModel = org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel.USE_READS
        clean.simplifyBAM = true
        clean.bam_compression = 1
        clean.out = tmpBase + ".cleaned.bam"