From 3a8f001c276808dbb78e199202e7174d66e5e6c6 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 20 Mar 2013 14:26:37 -0400 Subject: [PATCH] Misc. fixes upon pull request review -- DeBruijnAssemblerUnitTest and AlignmentUtilsUnitTest were both in DEBUG = true mode (bad!) -- Remove the maxHaplotypesToConsider feature of HC as it's not useful --- .../haplotypecaller/DeBruijnAssembler.java | 34 ++++--------------- .../haplotypecaller/HaplotypeCaller.java | 6 +--- .../DeBruijnAssemblerUnitTest.java | 3 +- .../utils/sam/AlignmentUtilsUnitTest.java | 2 +- 4 files changed, 10 insertions(+), 35 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java index 6d295ff97..f3db422e7 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java @@ -92,7 +92,6 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { private final boolean debugGraphTransformations; private final PrintStream graphWriter; private final int minKmer; - private final int maxHaplotypesToConsider; private final byte minBaseQualityToUseInAssembly; private final int onlyBuildKmersOfThisSizeWhenDebuggingGraphAlgorithms; @@ -100,14 +99,13 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { private int PRUNE_FACTOR = 2; protected DeBruijnAssembler() { - this(false, -1, null, 11, 1000, DEFAULT_MIN_BASE_QUALITY_TO_USE); + this(false, -1, null, 11, DEFAULT_MIN_BASE_QUALITY_TO_USE); } public DeBruijnAssembler(final boolean debug, final int debugGraphTransformations, final PrintStream graphWriter, final int minKmer, - final int maxHaplotypesToConsider, final byte minBaseQualityToUseInAssembly) { super(); this.debug = debug; @@ -115,7 +113,6 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { this.onlyBuildKmersOfThisSizeWhenDebuggingGraphAlgorithms = debugGraphTransformations; this.graphWriter = graphWriter; this.minKmer = minKmer; - this.maxHaplotypesToConsider = maxHaplotypesToConsider; this.minBaseQualityToUseInAssembly = minBaseQualityToUseInAssembly; } @@ -371,39 +368,22 @@ public class DeBruijnAssembler extends LocalAssemblyEngine { } } - final List finalHaplotypes = selectHighestScoringHaplotypes(returnHaplotypes); - if ( finalHaplotypes.size() < returnHaplotypes.size() ) - logger.info("Found " + finalHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against at " + refLoc); + if ( returnHaplotypes.size() < returnHaplotypes.size() ) + logger.info("Found " + returnHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against at " + refLoc); if( debug ) { - if( finalHaplotypes.size() > 1 ) { - System.out.println("Found " + finalHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against."); + if( returnHaplotypes.size() > 1 ) { + System.out.println("Found " + returnHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against."); } else { System.out.println("Found only the reference haplotype in the assembly graph."); } - for( final Haplotype h : finalHaplotypes ) { + for( final Haplotype h : returnHaplotypes ) { System.out.println( h.toString() ); System.out.println( "> Cigar = " + h.getCigar() + " : " + h.getCigar().getReferenceLength() + " score " + h.getScore() ); } } - return finalHaplotypes; - } - - /** - * Select the best scoring haplotypes among all present, returning no more than maxHaplotypesToConsider - * - * @param haplotypes a list of haplotypes to consider - * @return a sublist of the best haplotypes, with size() <= maxHaplotypesToConsider - */ - private List selectHighestScoringHaplotypes(final List haplotypes) { - if ( haplotypes.size() <= maxHaplotypesToConsider ) - return haplotypes; - else { - final List sorted = new ArrayList(haplotypes); - Collections.sort(sorted, new Haplotype.ScoreComparator()); - return sorted.subList(0, maxHaplotypesToConsider); - } + return returnHaplotypes; } /** diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 7bec4bee5..31751d8f0 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -206,10 +206,6 @@ public class HaplotypeCaller extends ActiveRegionWalker implem @Argument(fullName="minKmer", shortName="minKmer", doc="Minimum kmer length to use in the assembly graph", required = false) protected int minKmer = 11; - @Advanced - @Argument(fullName="maxHaplotypesToConsider", shortName="maxHaplotypesToConsider", doc="Maximum number of haplotypes to consider in the likelihood calculation. Setting this number too high can have dramatic performance implications", required = false) - protected int maxHaplotypesToConsider = 100000; - /** * If this flag is provided, the haplotype caller will include unmapped reads in the assembly and calling * when these reads occur in the region being analyzed. Typically, for paired end analyses, one pair of the @@ -393,7 +389,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem } final byte minBaseQualityToUseInAssembly = useLowQualityBasesForAssembly ? (byte)1 : DeBruijnAssembler.DEFAULT_MIN_BASE_QUALITY_TO_USE; - assemblyEngine = new DeBruijnAssembler( DEBUG, debugGraphTransformations, graphWriter, minKmer, maxHaplotypesToConsider, minBaseQualityToUseInAssembly ); + assemblyEngine = new DeBruijnAssembler( DEBUG, debugGraphTransformations, graphWriter, minKmer, minBaseQualityToUseInAssembly ); likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, pairHMM ); genotypingEngine = new GenotypingEngine( DEBUG, annotationEngine, USE_FILTERED_READ_MAP_FOR_ANNOTATIONS ); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java index fa581f7fd..663d619a8 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java @@ -61,13 +61,12 @@ import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.Assert; -import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.util.*; public class DeBruijnAssemblerUnitTest extends BaseTest { - private final static boolean DEBUG = true; + private final static boolean DEBUG = false; @Test(enabled = !DEBUG) public void testReferenceCycleGraph() { diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/AlignmentUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/AlignmentUtilsUnitTest.java index 660dadc00..125450257 100644 --- a/public/java/test/org/broadinstitute/sting/utils/sam/AlignmentUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/sam/AlignmentUtilsUnitTest.java @@ -37,7 +37,7 @@ import org.testng.annotations.Test; import java.util.*; public class AlignmentUtilsUnitTest { - private final static boolean DEBUG = true; + private final static boolean DEBUG = false; private SAMFileHeader header; /** Basic aligned and mapped read. */