Misc. fixes upon pull request review

-- DeBruijnAssemblerUnitTest and AlignmentUtilsUnitTest were both in DEBUG = true mode (bad!)
-- Remove the maxHaplotypesToConsider feature of HC as it's not useful
This commit is contained in:
Mark DePristo 2013-03-20 14:26:37 -04:00
parent d3b756bdc7
commit 3a8f001c27
4 changed files with 10 additions and 35 deletions

View File

@ -92,7 +92,6 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
private final boolean debugGraphTransformations;
private final PrintStream graphWriter;
private final int minKmer;
private final int maxHaplotypesToConsider;
private final byte minBaseQualityToUseInAssembly;
private final int onlyBuildKmersOfThisSizeWhenDebuggingGraphAlgorithms;
@ -100,14 +99,13 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
private int PRUNE_FACTOR = 2;
protected DeBruijnAssembler() {
this(false, -1, null, 11, 1000, DEFAULT_MIN_BASE_QUALITY_TO_USE);
this(false, -1, null, 11, DEFAULT_MIN_BASE_QUALITY_TO_USE);
}
public DeBruijnAssembler(final boolean debug,
final int debugGraphTransformations,
final PrintStream graphWriter,
final int minKmer,
final int maxHaplotypesToConsider,
final byte minBaseQualityToUseInAssembly) {
super();
this.debug = debug;
@ -115,7 +113,6 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
this.onlyBuildKmersOfThisSizeWhenDebuggingGraphAlgorithms = debugGraphTransformations;
this.graphWriter = graphWriter;
this.minKmer = minKmer;
this.maxHaplotypesToConsider = maxHaplotypesToConsider;
this.minBaseQualityToUseInAssembly = minBaseQualityToUseInAssembly;
}
@ -371,39 +368,22 @@ public class DeBruijnAssembler extends LocalAssemblyEngine {
}
}
final List<Haplotype> finalHaplotypes = selectHighestScoringHaplotypes(returnHaplotypes);
if ( finalHaplotypes.size() < returnHaplotypes.size() )
logger.info("Found " + finalHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against at " + refLoc);
if ( returnHaplotypes.size() < returnHaplotypes.size() )
logger.info("Found " + returnHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against at " + refLoc);
if( debug ) {
if( finalHaplotypes.size() > 1 ) {
System.out.println("Found " + finalHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against.");
if( returnHaplotypes.size() > 1 ) {
System.out.println("Found " + returnHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against.");
} else {
System.out.println("Found only the reference haplotype in the assembly graph.");
}
for( final Haplotype h : finalHaplotypes ) {
for( final Haplotype h : returnHaplotypes ) {
System.out.println( h.toString() );
System.out.println( "> Cigar = " + h.getCigar() + " : " + h.getCigar().getReferenceLength() + " score " + h.getScore() );
}
}
return finalHaplotypes;
}
/**
* Select the best scoring haplotypes among all present, returning no more than maxHaplotypesToConsider
*
* @param haplotypes a list of haplotypes to consider
* @return a sublist of the best haplotypes, with size() <= maxHaplotypesToConsider
*/
private List<Haplotype> selectHighestScoringHaplotypes(final List<Haplotype> haplotypes) {
if ( haplotypes.size() <= maxHaplotypesToConsider )
return haplotypes;
else {
final List<Haplotype> sorted = new ArrayList<Haplotype>(haplotypes);
Collections.sort(sorted, new Haplotype.ScoreComparator());
return sorted.subList(0, maxHaplotypesToConsider);
}
return returnHaplotypes;
}
/**

View File

@ -206,10 +206,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
@Argument(fullName="minKmer", shortName="minKmer", doc="Minimum kmer length to use in the assembly graph", required = false)
protected int minKmer = 11;
@Advanced
@Argument(fullName="maxHaplotypesToConsider", shortName="maxHaplotypesToConsider", doc="Maximum number of haplotypes to consider in the likelihood calculation. Setting this number too high can have dramatic performance implications", required = false)
protected int maxHaplotypesToConsider = 100000;
/**
* If this flag is provided, the haplotype caller will include unmapped reads in the assembly and calling
* when these reads occur in the region being analyzed. Typically, for paired end analyses, one pair of the
@ -393,7 +389,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
}
final byte minBaseQualityToUseInAssembly = useLowQualityBasesForAssembly ? (byte)1 : DeBruijnAssembler.DEFAULT_MIN_BASE_QUALITY_TO_USE;
assemblyEngine = new DeBruijnAssembler( DEBUG, debugGraphTransformations, graphWriter, minKmer, maxHaplotypesToConsider, minBaseQualityToUseInAssembly );
assemblyEngine = new DeBruijnAssembler( DEBUG, debugGraphTransformations, graphWriter, minKmer, minBaseQualityToUseInAssembly );
likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, pairHMM );
genotypingEngine = new GenotypingEngine( DEBUG, annotationEngine, USE_FILTERED_READ_MAP_FOR_ANNOTATIONS );

View File

@ -61,13 +61,12 @@ import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.*;
public class DeBruijnAssemblerUnitTest extends BaseTest {
private final static boolean DEBUG = true;
private final static boolean DEBUG = false;
@Test(enabled = !DEBUG)
public void testReferenceCycleGraph() {

View File

@ -37,7 +37,7 @@ import org.testng.annotations.Test;
import java.util.*;
public class AlignmentUtilsUnitTest {
private final static boolean DEBUG = true;
private final static boolean DEBUG = false;
private SAMFileHeader header;
/** Basic aligned and mapped read. */