From 4e85416af1829d7d8b04fceb6c444b8ba0a15273 Mon Sep 17 00:00:00 2001 From: ebanks Date: Thu, 16 Jun 2011 17:35:37 +0000 Subject: [PATCH] [Foiled yet again when trying to do this in git] Slight modifications in the argument structure for the IndelRealigner. Instead of boolean flags -knownsOnly and -doNotUseSW, we now have an enum --consensusDeterminationModel which lets you specify knowns only, also use indels in reads, or also use SW. Please note that the default behavior of IR has not changed at all (and won't for a few more days) - that'll be done in GIT (fingers crossed). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@6008 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/walkers/indels/IndelRealigner.java | 20 ++++++++++--------- .../indels/IndelRealignerIntegrationTest.java | 19 ++++++++++++++++-- .../playground/WholeGenomePipeline.scala | 2 +- 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 484981e44..9cf55dee5 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -76,6 +76,12 @@ public class IndelRealigner extends ReadWalker { public static final String ORIGINAL_POSITION_TAG = "OP"; public static final String PROGRAM_RECORD_NAME = "GATK IndelRealigner"; + public enum ConsensusDeterminationModel { + KNOWNS_ONLY, + USE_READS, + USE_SW + } + @Input(fullName="targetIntervals", shortName="targetIntervals", doc="intervals file output from RealignerTargetCreator", required=true) protected String intervalsFile = null; @@ -89,12 +95,8 @@ public class IndelRealigner extends ReadWalker { protected StingSAMFileWriter writer = null; protected ConstrainedMateFixingManager manager = null; - @Argument(fullName="useOnlyKnownIndels", shortName="knownsOnly", required=false, doc="Don't run 'Smith-Waterman' to generate alternate consenses; use only known indels provided as RODs for constructing the alternate references.") - protected boolean USE_KNOWN_INDELS_ONLY = false; - - @Hidden - @Argument(fullName="doNotUseSW", shortName="doNotUseSW", required=false, doc="Don't run 'Smith-Waterman' to generate alternate consenses; use only known indels provided as RODs or indels in the reads for constructing the alternate references.") - protected boolean NO_SW = false; + @Argument(fullName = "consensusDeterminationModel", shortName = "model", doc = "How should we determine the possible alternate consenses? -- in the order of least permissive to most permissive there is KNOWNS_ONLY (use only indels from known indels provided in RODs), USE_READS (additionally use indels already present in the original alignments of the reads), and USE_SW (additionally use 'Smith-Waterman' to generate alternate consenses). The default is USE_SW", required = false) + public ConsensusDeterminationModel consensusModel = ConsensusDeterminationModel.USE_SW; // ADVANCED OPTIONS FOLLOW @@ -646,7 +648,7 @@ public class IndelRealigner extends ReadWalker { long totalRawMismatchSum = determineReadsThatNeedCleaning(reads, refReads, altReads, altAlignmentsToTest, altConsenses, leftmostIndex, reference); // use 'Smith-Waterman' to create alternate consenses from reads that mismatch the reference, using totalRawMismatchSum as the random seed - if ( !USE_KNOWN_INDELS_ONLY && !NO_SW ) + if ( consensusModel == ConsensusDeterminationModel.USE_SW ) generateAlternateConsensesFromReads(altAlignmentsToTest, altConsenses, reference, leftmostIndex); // if ( debugOn ) System.out.println("------\nChecking consenses...\n--------\n"); @@ -723,7 +725,7 @@ public class IndelRealigner extends ReadWalker { if ( !updateRead(bestConsensus.cigar, bestConsensus.positionOnReference, indexPair.second, aRead, leftmostIndex) ) return; } - if ( !USE_KNOWN_INDELS_ONLY && !alternateReducesEntropy(altReads, reference, leftmostIndex) ) { + if ( consensusModel != ConsensusDeterminationModel.KNOWNS_ONLY && !alternateReducesEntropy(altReads, reference, leftmostIndex) ) { if ( statsOutput != null ) { try { statsOutput.write(currentInterval.toString()); @@ -885,7 +887,7 @@ public class IndelRealigner extends ReadWalker { aRead.setAlignerMismatchScore(AlignmentUtils.mismatchingQualities(aRead.getRead(), reference, startOnRef)); // if it has an indel, let's see if that's the best consensus - if ( !USE_KNOWN_INDELS_ONLY && numBlocks == 2 ) { + if ( consensusModel != ConsensusDeterminationModel.KNOWNS_ONLY && numBlocks == 2 ) { Consensus c = createAlternateConsensus(startOnRef, aRead.getCigar(), reference, aRead.getReadBases()); if ( c != null ) altConsenses.add(c); diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java index 20edf9b08..3e8ec3e02 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java @@ -43,18 +43,33 @@ public class IndelRealignerIntegrationTest extends WalkerTest { @Test public void testKnownsOnly() { WalkerTestSpec spec1 = new WalkerTestSpec( - baseCommand + "-knownsOnly -B:indels,vcf " + knownIndels, + baseCommand + "--consensusDeterminationModel KNOWNS_ONLY -B:indels,vcf " + knownIndels, 1, Arrays.asList("3dd5d2c9931b375455af0bff1a2c4888")); executeTest("realigner known indels only from VCF", spec1); WalkerTestSpec spec2 = new WalkerTestSpec( - baseCommand + "-knownsOnly -D " + GATKDataLocation + "dbsnp_129_b36.rod", + baseCommand + "--consensusDeterminationModel KNOWNS_ONLY -D " + GATKDataLocation + "dbsnp_129_b36.rod", 1, Arrays.asList("78850024ac9ff3ba51b6f097c7041c1d")); executeTest("realigner known indels only from dbsnp", spec2); } + @Test + public void testReadsOnly() { + WalkerTestSpec spec1 = new WalkerTestSpec( + baseCommand + "--consensusDeterminationModel USE_READS -B:indels,vcf " + knownIndels, + 1, + Arrays.asList(base_md5)); + executeTest("realigner known indels only from VCF", spec1); + + WalkerTestSpec spec2 = new WalkerTestSpec( + baseCommand + "--consensusDeterminationModel USE_READS -D " + GATKDataLocation + "dbsnp_129_b36.rod", + 1, + Arrays.asList("e041186bca9dccf360747c89be8417ad")); + executeTest("realigner known indels only from dbsnp", spec2); + } + @Test public void testLods() { HashMap e = new HashMap(); diff --git a/scala/qscript/playground/WholeGenomePipeline.scala b/scala/qscript/playground/WholeGenomePipeline.scala index 344124f71..088528ac0 100644 --- a/scala/qscript/playground/WholeGenomePipeline.scala +++ b/scala/qscript/playground/WholeGenomePipeline.scala @@ -149,7 +149,7 @@ class WholeGenomePipeline extends QScript { clean.targetIntervals = target.out clean.rodBind :+= RodBind("dbsnp", "VCF", dbsnp) clean.rodBind :+= RodBind("indels", "VCF", indels) - clean.doNotUseSW = true + clean.consensusDeterminationModel = org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel.USE_READS clean.simplifyBAM = true clean.bam_compression = 1 clean.out = tmpBase + ".cleaned.bam"