Misc. fixes upon pull request review

-- DeBruijnAssemblerUnitTest and AlignmentUtilsUnitTest were both in DEBUG = true mode (bad!)
-- Remove the maxHaplotypesToConsider feature of HC as it's not useful
This commit is contained in:
Mark DePristo 2013-03-20 14:26:37 -04:00
parent d3b756bdc7
commit 3a8f001c27
4 changed files with 10 additions and 35 deletions

View File

@ -92,7 +92,6 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
private final boolean debugGraphTransformations; private final boolean debugGraphTransformations;
private final PrintStream graphWriter; private final PrintStream graphWriter;
private final int minKmer; private final int minKmer;
private final int maxHaplotypesToConsider;
private final byte minBaseQualityToUseInAssembly; private final byte minBaseQualityToUseInAssembly;
private final int onlyBuildKmersOfThisSizeWhenDebuggingGraphAlgorithms; private final int onlyBuildKmersOfThisSizeWhenDebuggingGraphAlgorithms;
@ -100,14 +99,13 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
private int PRUNE_FACTOR = 2; private int PRUNE_FACTOR = 2;
protected DeBruijnAssembler() { protected DeBruijnAssembler() {
this(false, -1, null, 11, 1000, DEFAULT_MIN_BASE_QUALITY_TO_USE); this(false, -1, null, 11, DEFAULT_MIN_BASE_QUALITY_TO_USE);
} }
public DeBruijnAssembler(final boolean debug, public DeBruijnAssembler(final boolean debug,
final int debugGraphTransformations, final int debugGraphTransformations,
final PrintStream graphWriter, final PrintStream graphWriter,
final int minKmer, final int minKmer,
final int maxHaplotypesToConsider,
final byte minBaseQualityToUseInAssembly) { final byte minBaseQualityToUseInAssembly) {
super(); super();
this.debug = debug; this.debug = debug;
@ -115,7 +113,6 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
this.onlyBuildKmersOfThisSizeWhenDebuggingGraphAlgorithms = debugGraphTransformations; this.onlyBuildKmersOfThisSizeWhenDebuggingGraphAlgorithms = debugGraphTransformations;
this.graphWriter = graphWriter; this.graphWriter = graphWriter;
this.minKmer = minKmer; this.minKmer = minKmer;
this.maxHaplotypesToConsider = maxHaplotypesToConsider;
this.minBaseQualityToUseInAssembly = minBaseQualityToUseInAssembly; this.minBaseQualityToUseInAssembly = minBaseQualityToUseInAssembly;
} }
@ -371,39 +368,22 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
} }
} }
final List<Haplotype> finalHaplotypes = selectHighestScoringHaplotypes(returnHaplotypes); if ( returnHaplotypes.size() < returnHaplotypes.size() )
if ( finalHaplotypes.size() < returnHaplotypes.size() ) logger.info("Found " + returnHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against at " + refLoc);
logger.info("Found " + finalHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against at " + refLoc);
if( debug ) { if( debug ) {
if( finalHaplotypes.size() > 1 ) { if( returnHaplotypes.size() > 1 ) {
System.out.println("Found " + finalHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against."); System.out.println("Found " + returnHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against.");
} else { } else {
System.out.println("Found only the reference haplotype in the assembly graph."); System.out.println("Found only the reference haplotype in the assembly graph.");
} }
for( final Haplotype h : finalHaplotypes ) { for( final Haplotype h : returnHaplotypes ) {
System.out.println( h.toString() ); System.out.println( h.toString() );
System.out.println( "> Cigar = " + h.getCigar() + " : " + h.getCigar().getReferenceLength() + " score " + h.getScore() ); System.out.println( "> Cigar = " + h.getCigar() + " : " + h.getCigar().getReferenceLength() + " score " + h.getScore() );
} }
} }
return finalHaplotypes; return returnHaplotypes;
}
/**
* Select the best scoring haplotypes among all present, returning no more than maxHaplotypesToConsider
*
* @param haplotypes a list of haplotypes to consider
* @return a sublist of the best haplotypes, with size() <= maxHaplotypesToConsider
*/
private List<Haplotype> selectHighestScoringHaplotypes(final List<Haplotype> haplotypes) {
if ( haplotypes.size() <= maxHaplotypesToConsider )
return haplotypes;
else {
final List<Haplotype> sorted = new ArrayList<Haplotype>(haplotypes);
Collections.sort(sorted, new Haplotype.ScoreComparator());
return sorted.subList(0, maxHaplotypesToConsider);
}
} }
/** /**

View File

@ -206,10 +206,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
@Argument(fullName="minKmer", shortName="minKmer", doc="Minimum kmer length to use in the assembly graph", required = false) @Argument(fullName="minKmer", shortName="minKmer", doc="Minimum kmer length to use in the assembly graph", required = false)
protected int minKmer = 11; protected int minKmer = 11;
@Advanced
@Argument(fullName="maxHaplotypesToConsider", shortName="maxHaplotypesToConsider", doc="Maximum number of haplotypes to consider in the likelihood calculation. Setting this number too high can have dramatic performance implications", required = false)
protected int maxHaplotypesToConsider = 100000;
/** /**
* If this flag is provided, the haplotype caller will include unmapped reads in the assembly and calling * If this flag is provided, the haplotype caller will include unmapped reads in the assembly and calling
* when these reads occur in the region being analyzed. Typically, for paired end analyses, one pair of the * when these reads occur in the region being analyzed. Typically, for paired end analyses, one pair of the
@ -393,7 +389,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
} }
final byte minBaseQualityToUseInAssembly = useLowQualityBasesForAssembly ? (byte)1 : DeBruijnAssembler.DEFAULT_MIN_BASE_QUALITY_TO_USE; final byte minBaseQualityToUseInAssembly = useLowQualityBasesForAssembly ? (byte)1 : DeBruijnAssembler.DEFAULT_MIN_BASE_QUALITY_TO_USE;
assemblyEngine = new DeBruijnAssembler( DEBUG, debugGraphTransformations, graphWriter, minKmer, maxHaplotypesToConsider, minBaseQualityToUseInAssembly ); assemblyEngine = new DeBruijnAssembler( DEBUG, debugGraphTransformations, graphWriter, minKmer, minBaseQualityToUseInAssembly );
likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, pairHMM ); likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, pairHMM );
genotypingEngine = new GenotypingEngine( DEBUG, annotationEngine, USE_FILTERED_READ_MAP_FOR_ANNOTATIONS ); genotypingEngine = new GenotypingEngine( DEBUG, annotationEngine, USE_FILTERED_READ_MAP_FOR_ANNOTATIONS );

View File

@ -61,13 +61,12 @@ import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert; import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import java.util.*; import java.util.*;
public class DeBruijnAssemblerUnitTest extends BaseTest { public class DeBruijnAssemblerUnitTest extends BaseTest {
private final static boolean DEBUG = true; private final static boolean DEBUG = false;
@Test(enabled = !DEBUG) @Test(enabled = !DEBUG)
public void testReferenceCycleGraph() { public void testReferenceCycleGraph() {

View File

@ -37,7 +37,7 @@ import org.testng.annotations.Test;
import java.util.*; import java.util.*;
public class AlignmentUtilsUnitTest { public class AlignmentUtilsUnitTest {
private final static boolean DEBUG = true; private final static boolean DEBUG = false;
private SAMFileHeader header; private SAMFileHeader header;
/** Basic aligned and mapped read. */ /** Basic aligned and mapped read. */