Swapping in logless Pair HMM for default usage with UG:
-- Changed default HMM model. -- Removed check. -- Changed md5's: PL's in the high 100s change by a point or two due to new implementation. -- Resulting performance improvement is about 30 to 50% less runtime when using -glm INDEL.
This commit is contained in:
parent
4c482eb0f0
commit
f176c854c6
|
|
@ -76,7 +76,8 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
private List<Allele> alleleList = new ArrayList<Allele>();
|
||||
|
||||
|
||||
protected IndelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
||||
protected IndelGenotypeLikelihoodsCalculationModel(final UnifiedArgumentCollection UAC,
|
||||
final Logger logger) {
|
||||
super(UAC, logger);
|
||||
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY, UAC.INDEL_GAP_CONTINUATION_PENALTY,
|
||||
UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.pairHMM);
|
||||
|
|
@ -85,10 +86,11 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
ignoreSNPAllelesWhenGenotypingIndels = UAC.IGNORE_SNP_ALLELES;
|
||||
}
|
||||
|
||||
protected static List<Allele> computeConsensusAlleles(ReferenceContext ref,
|
||||
Map<String, AlignmentContext> contexts,
|
||||
AlignmentContextUtils.ReadOrientation contextType,
|
||||
GenomeLocParser locParser, UnifiedArgumentCollection UAC) {
|
||||
protected static List<Allele> computeConsensusAlleles(final ReferenceContext ref,
|
||||
final Map<String, AlignmentContext> contexts,
|
||||
final AlignmentContextUtils.ReadOrientation contextType,
|
||||
final GenomeLocParser locParser,
|
||||
final UnifiedArgumentCollection UAC) {
|
||||
ConsensusAlleleCounter counter = new ConsensusAlleleCounter(locParser, true, UAC.MIN_INDEL_COUNT_FOR_GENOTYPING, UAC.MIN_INDEL_FRACTION_PER_SAMPLE);
|
||||
return counter.computeConsensusAlleles(ref, contexts, contextType);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
|
|||
* The PairHMM implementation to use for -glm INDEL genotype likelihood calculations. The various implementations balance a tradeoff of accuracy and runtime.
|
||||
*/
|
||||
@Argument(fullName = "pair_hmm_implementation", shortName = "pairHMM", doc = "The PairHMM implementation to use for -glm INDEL genotype likelihood calculations", required = false)
|
||||
public PairHMM.HMM_IMPLEMENTATION pairHMM = PairHMM.HMM_IMPLEMENTATION.ORIGINAL;
|
||||
public PairHMM.HMM_IMPLEMENTATION pairHMM = PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING;
|
||||
|
||||
/**
|
||||
* The minimum confidence needed in a given base for it to be used in variant calling. Note that the base quality of a base
|
||||
|
|
|
|||
|
|
@ -54,6 +54,7 @@ import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
|||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.pairhmm.Log10PairHMM;
|
||||
import org.broadinstitute.sting.utils.pairhmm.LoglessPairHMM;
|
||||
import org.broadinstitute.sting.utils.pairhmm.PairHMM;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
@ -116,12 +117,11 @@ public class PairHMMIndelErrorModel {
|
|||
case ORIGINAL:
|
||||
pairHMM = new Log10PairHMM(false);
|
||||
break;
|
||||
case LOGLESS_CACHING: //TODO: still not tested so please do not use yet
|
||||
//pairHMM = new LoglessCachingPairHMM(); //TODO - add it back when the figure out how to use the protected LoglessCachingPairHMM class
|
||||
throw new UserException.BadArgumentValue("pairHMM"," this option (LOGLESS_CACHING in UG) is still under development");
|
||||
//break;
|
||||
case LOGLESS_CACHING:
|
||||
pairHMM = new LoglessPairHMM();
|
||||
break;
|
||||
default:
|
||||
throw new UserException.BadArgumentValue("pairHMM", "Specified pairHMM implementation is unrecognized or incompatible with the UnifiedGenotyper. Acceptable options are ORIGINAL, EXACT or LOGLESS_CACHING (the third option is still under development).");
|
||||
throw new UserException.BadArgumentValue("pairHMM", "Specified pairHMM implementation is unrecognized or incompatible with the UnifiedGenotyper. Acceptable options are ORIGINAL, EXACT or LOGLESS_CACHING.");
|
||||
}
|
||||
|
||||
// fill gap penalty table, affine naive model:
|
||||
|
|
|
|||
|
|
@ -79,6 +79,6 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() {
|
||||
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "dd28b14d732852bffbba4f22f7697227");
|
||||
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "98f4d78aad745c6e853b81b2e4e207b4");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() {
|
||||
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","369ad0ff28bb9ce7974dc2c7343c8737");
|
||||
executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","25902d7a6a0c00c60c2d5845dfaa1a4c");
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
|
|
|
|||
|
|
@ -136,7 +136,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
|
||||
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1,
|
||||
Arrays.asList("587bf6bad368ed81189747a84f913ab2"));
|
||||
Arrays.asList("5596851d19582dd1af3901b7d703ae0a"));
|
||||
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -96,7 +96,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{
|
|||
public void testMultipleSNPAlleles() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
|
||||
Arrays.asList("70a21812d4dd6b72c44f60c74d508d5b"));
|
||||
Arrays.asList("06c85e8eab08b67244cf38fc785aca22"));
|
||||
executeTest("test Multiple SNP alleles", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue