[Foiled yet again when trying to do this in git] Slight modifications in the argument structure for the IndelRealigner. Instead of boolean flags -knownsOnly and -doNotUseSW, we now have an enum --consensusDeterminationModel which lets you specify knowns only, also use indels in reads, or also use SW. Please note that the default behavior of IR has not changed at all (and won't for a few more days) - that'll be done in GIT (fingers crossed).

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@6008 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2011-06-16 17:35:37 +00:00
parent 4304fc4862
commit 4e85416af1
3 changed files with 29 additions and 12 deletions

View File

@ -76,6 +76,12 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
public static final String ORIGINAL_POSITION_TAG = "OP"; public static final String ORIGINAL_POSITION_TAG = "OP";
public static final String PROGRAM_RECORD_NAME = "GATK IndelRealigner"; public static final String PROGRAM_RECORD_NAME = "GATK IndelRealigner";
public enum ConsensusDeterminationModel {
KNOWNS_ONLY,
USE_READS,
USE_SW
}
@Input(fullName="targetIntervals", shortName="targetIntervals", doc="intervals file output from RealignerTargetCreator", required=true) @Input(fullName="targetIntervals", shortName="targetIntervals", doc="intervals file output from RealignerTargetCreator", required=true)
protected String intervalsFile = null; protected String intervalsFile = null;
@ -89,12 +95,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
protected StingSAMFileWriter writer = null; protected StingSAMFileWriter writer = null;
protected ConstrainedMateFixingManager manager = null; protected ConstrainedMateFixingManager manager = null;
@Argument(fullName="useOnlyKnownIndels", shortName="knownsOnly", required=false, doc="Don't run 'Smith-Waterman' to generate alternate consenses; use only known indels provided as RODs for constructing the alternate references.") @Argument(fullName = "consensusDeterminationModel", shortName = "model", doc = "How should we determine the possible alternate consenses? -- in the order of least permissive to most permissive there is KNOWNS_ONLY (use only indels from known indels provided in RODs), USE_READS (additionally use indels already present in the original alignments of the reads), and USE_SW (additionally use 'Smith-Waterman' to generate alternate consenses). The default is USE_SW", required = false)
protected boolean USE_KNOWN_INDELS_ONLY = false; public ConsensusDeterminationModel consensusModel = ConsensusDeterminationModel.USE_SW;
@Hidden
@Argument(fullName="doNotUseSW", shortName="doNotUseSW", required=false, doc="Don't run 'Smith-Waterman' to generate alternate consenses; use only known indels provided as RODs or indels in the reads for constructing the alternate references.")
protected boolean NO_SW = false;
// ADVANCED OPTIONS FOLLOW // ADVANCED OPTIONS FOLLOW
@ -646,7 +648,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
long totalRawMismatchSum = determineReadsThatNeedCleaning(reads, refReads, altReads, altAlignmentsToTest, altConsenses, leftmostIndex, reference); long totalRawMismatchSum = determineReadsThatNeedCleaning(reads, refReads, altReads, altAlignmentsToTest, altConsenses, leftmostIndex, reference);
// use 'Smith-Waterman' to create alternate consenses from reads that mismatch the reference, using totalRawMismatchSum as the random seed // use 'Smith-Waterman' to create alternate consenses from reads that mismatch the reference, using totalRawMismatchSum as the random seed
if ( !USE_KNOWN_INDELS_ONLY && !NO_SW ) if ( consensusModel == ConsensusDeterminationModel.USE_SW )
generateAlternateConsensesFromReads(altAlignmentsToTest, altConsenses, reference, leftmostIndex); generateAlternateConsensesFromReads(altAlignmentsToTest, altConsenses, reference, leftmostIndex);
// if ( debugOn ) System.out.println("------\nChecking consenses...\n--------\n"); // if ( debugOn ) System.out.println("------\nChecking consenses...\n--------\n");
@ -723,7 +725,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
if ( !updateRead(bestConsensus.cigar, bestConsensus.positionOnReference, indexPair.second, aRead, leftmostIndex) ) if ( !updateRead(bestConsensus.cigar, bestConsensus.positionOnReference, indexPair.second, aRead, leftmostIndex) )
return; return;
} }
if ( !USE_KNOWN_INDELS_ONLY && !alternateReducesEntropy(altReads, reference, leftmostIndex) ) { if ( consensusModel != ConsensusDeterminationModel.KNOWNS_ONLY && !alternateReducesEntropy(altReads, reference, leftmostIndex) ) {
if ( statsOutput != null ) { if ( statsOutput != null ) {
try { try {
statsOutput.write(currentInterval.toString()); statsOutput.write(currentInterval.toString());
@ -885,7 +887,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
aRead.setAlignerMismatchScore(AlignmentUtils.mismatchingQualities(aRead.getRead(), reference, startOnRef)); aRead.setAlignerMismatchScore(AlignmentUtils.mismatchingQualities(aRead.getRead(), reference, startOnRef));
// if it has an indel, let's see if that's the best consensus // if it has an indel, let's see if that's the best consensus
if ( !USE_KNOWN_INDELS_ONLY && numBlocks == 2 ) { if ( consensusModel != ConsensusDeterminationModel.KNOWNS_ONLY && numBlocks == 2 ) {
Consensus c = createAlternateConsensus(startOnRef, aRead.getCigar(), reference, aRead.getReadBases()); Consensus c = createAlternateConsensus(startOnRef, aRead.getCigar(), reference, aRead.getReadBases());
if ( c != null ) if ( c != null )
altConsenses.add(c); altConsenses.add(c);

View File

@ -43,18 +43,33 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
@Test @Test
public void testKnownsOnly() { public void testKnownsOnly() {
WalkerTestSpec spec1 = new WalkerTestSpec( WalkerTestSpec spec1 = new WalkerTestSpec(
baseCommand + "-knownsOnly -B:indels,vcf " + knownIndels, baseCommand + "--consensusDeterminationModel KNOWNS_ONLY -B:indels,vcf " + knownIndels,
1, 1,
Arrays.asList("3dd5d2c9931b375455af0bff1a2c4888")); Arrays.asList("3dd5d2c9931b375455af0bff1a2c4888"));
executeTest("realigner known indels only from VCF", spec1); executeTest("realigner known indels only from VCF", spec1);
WalkerTestSpec spec2 = new WalkerTestSpec( WalkerTestSpec spec2 = new WalkerTestSpec(
baseCommand + "-knownsOnly -D " + GATKDataLocation + "dbsnp_129_b36.rod", baseCommand + "--consensusDeterminationModel KNOWNS_ONLY -D " + GATKDataLocation + "dbsnp_129_b36.rod",
1, 1,
Arrays.asList("78850024ac9ff3ba51b6f097c7041c1d")); Arrays.asList("78850024ac9ff3ba51b6f097c7041c1d"));
executeTest("realigner known indels only from dbsnp", spec2); executeTest("realigner known indels only from dbsnp", spec2);
} }
@Test
public void testReadsOnly() {
WalkerTestSpec spec1 = new WalkerTestSpec(
baseCommand + "--consensusDeterminationModel USE_READS -B:indels,vcf " + knownIndels,
1,
Arrays.asList(base_md5));
executeTest("realigner known indels only from VCF", spec1);
WalkerTestSpec spec2 = new WalkerTestSpec(
baseCommand + "--consensusDeterminationModel USE_READS -D " + GATKDataLocation + "dbsnp_129_b36.rod",
1,
Arrays.asList("e041186bca9dccf360747c89be8417ad"));
executeTest("realigner known indels only from dbsnp", spec2);
}
@Test @Test
public void testLods() { public void testLods() {
HashMap<String, String> e = new HashMap<String, String>(); HashMap<String, String> e = new HashMap<String, String>();

View File

@ -149,7 +149,7 @@ class WholeGenomePipeline extends QScript {
clean.targetIntervals = target.out clean.targetIntervals = target.out
clean.rodBind :+= RodBind("dbsnp", "VCF", dbsnp) clean.rodBind :+= RodBind("dbsnp", "VCF", dbsnp)
clean.rodBind :+= RodBind("indels", "VCF", indels) clean.rodBind :+= RodBind("indels", "VCF", indels)
clean.doNotUseSW = true clean.consensusDeterminationModel = org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel.USE_READS
clean.simplifyBAM = true clean.simplifyBAM = true
clean.bam_compression = 1 clean.bam_compression = 1
clean.out = tmpBase + ".cleaned.bam" clean.out = tmpBase + ".cleaned.bam"