diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 484981e44..9cf55dee5 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -76,6 +76,12 @@ public class IndelRealigner extends ReadWalker { public static final String ORIGINAL_POSITION_TAG = "OP"; public static final String PROGRAM_RECORD_NAME = "GATK IndelRealigner"; + public enum ConsensusDeterminationModel { + KNOWNS_ONLY, + USE_READS, + USE_SW + } + @Input(fullName="targetIntervals", shortName="targetIntervals", doc="intervals file output from RealignerTargetCreator", required=true) protected String intervalsFile = null; @@ -89,12 +95,8 @@ public class IndelRealigner extends ReadWalker { protected StingSAMFileWriter writer = null; protected ConstrainedMateFixingManager manager = null; - @Argument(fullName="useOnlyKnownIndels", shortName="knownsOnly", required=false, doc="Don't run 'Smith-Waterman' to generate alternate consenses; use only known indels provided as RODs for constructing the alternate references.") - protected boolean USE_KNOWN_INDELS_ONLY = false; - - @Hidden - @Argument(fullName="doNotUseSW", shortName="doNotUseSW", required=false, doc="Don't run 'Smith-Waterman' to generate alternate consenses; use only known indels provided as RODs or indels in the reads for constructing the alternate references.") - protected boolean NO_SW = false; + @Argument(fullName = "consensusDeterminationModel", shortName = "model", doc = "How should we determine the possible alternate consenses? -- in the order of least permissive to most permissive there is KNOWNS_ONLY (use only indels from known indels provided in RODs), USE_READS (additionally use indels already present in the original alignments of the reads), and USE_SW (additionally use 'Smith-Waterman' to generate alternate consenses). The default is USE_SW", required = false) + public ConsensusDeterminationModel consensusModel = ConsensusDeterminationModel.USE_SW; // ADVANCED OPTIONS FOLLOW @@ -646,7 +648,7 @@ public class IndelRealigner extends ReadWalker { long totalRawMismatchSum = determineReadsThatNeedCleaning(reads, refReads, altReads, altAlignmentsToTest, altConsenses, leftmostIndex, reference); // use 'Smith-Waterman' to create alternate consenses from reads that mismatch the reference, using totalRawMismatchSum as the random seed - if ( !USE_KNOWN_INDELS_ONLY && !NO_SW ) + if ( consensusModel == ConsensusDeterminationModel.USE_SW ) generateAlternateConsensesFromReads(altAlignmentsToTest, altConsenses, reference, leftmostIndex); // if ( debugOn ) System.out.println("------\nChecking consenses...\n--------\n"); @@ -723,7 +725,7 @@ public class IndelRealigner extends ReadWalker { if ( !updateRead(bestConsensus.cigar, bestConsensus.positionOnReference, indexPair.second, aRead, leftmostIndex) ) return; } - if ( !USE_KNOWN_INDELS_ONLY && !alternateReducesEntropy(altReads, reference, leftmostIndex) ) { + if ( consensusModel != ConsensusDeterminationModel.KNOWNS_ONLY && !alternateReducesEntropy(altReads, reference, leftmostIndex) ) { if ( statsOutput != null ) { try { statsOutput.write(currentInterval.toString()); @@ -885,7 +887,7 @@ public class IndelRealigner extends ReadWalker { aRead.setAlignerMismatchScore(AlignmentUtils.mismatchingQualities(aRead.getRead(), reference, startOnRef)); // if it has an indel, let's see if that's the best consensus - if ( !USE_KNOWN_INDELS_ONLY && numBlocks == 2 ) { + if ( consensusModel != ConsensusDeterminationModel.KNOWNS_ONLY && numBlocks == 2 ) { Consensus c = createAlternateConsensus(startOnRef, aRead.getCigar(), reference, aRead.getReadBases()); if ( c != null ) altConsenses.add(c); diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java index 20edf9b08..3e8ec3e02 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java @@ -43,18 +43,33 @@ public class IndelRealignerIntegrationTest extends WalkerTest { @Test public void testKnownsOnly() { WalkerTestSpec spec1 = new WalkerTestSpec( - baseCommand + "-knownsOnly -B:indels,vcf " + knownIndels, + baseCommand + "--consensusDeterminationModel KNOWNS_ONLY -B:indels,vcf " + knownIndels, 1, Arrays.asList("3dd5d2c9931b375455af0bff1a2c4888")); executeTest("realigner known indels only from VCF", spec1); WalkerTestSpec spec2 = new WalkerTestSpec( - baseCommand + "-knownsOnly -D " + GATKDataLocation + "dbsnp_129_b36.rod", + baseCommand + "--consensusDeterminationModel KNOWNS_ONLY -D " + GATKDataLocation + "dbsnp_129_b36.rod", 1, Arrays.asList("78850024ac9ff3ba51b6f097c7041c1d")); executeTest("realigner known indels only from dbsnp", spec2); } + @Test + public void testReadsOnly() { + WalkerTestSpec spec1 = new WalkerTestSpec( + baseCommand + "--consensusDeterminationModel USE_READS -B:indels,vcf " + knownIndels, + 1, + Arrays.asList(base_md5)); + executeTest("realigner known indels only from VCF", spec1); + + WalkerTestSpec spec2 = new WalkerTestSpec( + baseCommand + "--consensusDeterminationModel USE_READS -D " + GATKDataLocation + "dbsnp_129_b36.rod", + 1, + Arrays.asList("e041186bca9dccf360747c89be8417ad")); + executeTest("realigner known indels only from dbsnp", spec2); + } + @Test public void testLods() { HashMap e = new HashMap(); diff --git a/scala/qscript/playground/WholeGenomePipeline.scala b/scala/qscript/playground/WholeGenomePipeline.scala index 344124f71..088528ac0 100644 --- a/scala/qscript/playground/WholeGenomePipeline.scala +++ b/scala/qscript/playground/WholeGenomePipeline.scala @@ -149,7 +149,7 @@ class WholeGenomePipeline extends QScript { clean.targetIntervals = target.out clean.rodBind :+= RodBind("dbsnp", "VCF", dbsnp) clean.rodBind :+= RodBind("indels", "VCF", indels) - clean.doNotUseSW = true + clean.consensusDeterminationModel = org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel.USE_READS clean.simplifyBAM = true clean.bam_compression = 1 clean.out = tmpBase + ".cleaned.bam"