Enable new indel likelihood model by default, cleanup code, remove dead arguments, still more cleanups to follow. This isn't final version but at least it performs better in all cases than previous Dindel-based version, so no reason to keep old one around.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5615 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
delangel 2011-04-12 17:54:46 +00:00
parent 4b7c3af763
commit 3b424fd74d
4 changed files with 32 additions and 107 deletions

View File

@ -62,21 +62,14 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo
// todo -cleanup
private HaplotypeIndelErrorModel model;
private PairHMMIndelErrorModel pairModel;
private boolean newLike = false;
private GenomeLoc lastSiteVisited;
private List<Allele> alleleList;
protected DindelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
super(UAC, logger);
if (UAC.newlike) {
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY,UAC.INDEL_GAP_CONTINUATION_PENALTY,
UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.DO_CONTEXT_DEPENDENT_PENALTIES, UAC.S1, UAC.S2, UAC.dovit);
newLike = true;
}
else
model = new HaplotypeIndelErrorModel(maxReadDeletionLength, UAC.INSERTION_START_PROBABILITY,
UAC.INSERTION_END_PROBABILITY,UAC.ALPHA_DELETION_PROBABILITY,UAC.INDEL_HAPLOTYPE_SIZE, false, UAC.OUTPUT_DEBUG_INDEL_INFO);
UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.DO_CONTEXT_DEPENDENT_PENALTIES, UAC.dovit);
alleleList = new ArrayList<Allele>();
getAlleleListFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
minIndelCountForGenotyping = UAC.MIN_INDEL_COUNT_FOR_GENOTYPING;
@ -338,18 +331,15 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo
// assume only one alt allele for now
List<Haplotype> haplotypesInVC;
if (newLike) {
int hsize = (int)ref.getWindow().size()-Math.abs(eventLength)-1;
int numPrefBases = ref.getLocus().getStart()-ref.getWindow().getStart()+1;
if (DEBUG)
System.out.format("hsize: %d eventLength: %d refSize: %d, locStart: %d numpr: %d\n",hsize,eventLength,
(int)ref.getWindow().size(), loc.getStart(), numPrefBases);
haplotypesInVC = Haplotype.makeHaplotypeListFromAlleles( alleleList, loc.getStart(),
ref, hsize, numPrefBases);
}
else
haplotypesInVC = Haplotype.makeHaplotypeListFromAlleles( alleleList, loc.getStart(),
ref, HAPLOTYPE_SIZE, 20);
int hsize = (int)ref.getWindow().size()-Math.abs(eventLength)-1;
int numPrefBases = ref.getLocus().getStart()-ref.getWindow().getStart()+1;
if (DEBUG)
System.out.format("hsize: %d eventLength: %d refSize: %d, locStart: %d numpr: %d\n",hsize,eventLength,
(int)ref.getWindow().size(), loc.getStart(), numPrefBases);
haplotypesInVC = Haplotype.makeHaplotypeListFromAlleles( alleleList, loc.getStart(),
ref, hsize, numPrefBases);
// For each sample, get genotype likelihoods based on pileup
// compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them.
@ -370,10 +360,7 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo
if (pileup != null ) {
if (newLike)
haplotypeLikehoodMatrix = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypesInVC, ref, HAPLOTYPE_SIZE, eventLength);
else
haplotypeLikehoodMatrix = model.computeReadHaplotypeLikelihoods( pileup, haplotypesInVC);
haplotypeLikehoodMatrix = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypesInVC, ref, HAPLOTYPE_SIZE, eventLength);
double[] genotypeLikelihoods = HaplotypeIndelErrorModel.getHaplotypeLikelihoods( haplotypeLikehoodMatrix);

View File

@ -89,45 +89,25 @@ public class UnifiedArgumentCollection {
@Argument(fullName = "indel_heterozygosity", shortName = "indelHeterozygosity", doc = "Heterozygosity for indel calling", required = false)
public double INDEL_HETEROZYGOSITY = 1.0/8000;
// todo - remove these 3 arguments
@Argument(fullName = "insertionStartProbability", shortName = "insertionStartProbability", doc = "Heterozygosity for indel calling", required = false)
public double INSERTION_START_PROBABILITY = 1e-3;
@Argument(fullName = "insertionEndProbability", shortName = "insertionEndProbability", doc = "Heterozygosity for indel calling", required = false)
public double INSERTION_END_PROBABILITY = 0.5;
@Argument(fullName = "alphaDeletionProbability", shortName = "alphaDeletionProbability", doc = "Heterozygosity for indel calling", required = false)
public double ALPHA_DELETION_PROBABILITY = 1e-3;
@Argument(fullName = "indelGapContinuationPenalty", shortName = "indelGCP", doc = "Indel gap continuation penalty", required = false)
public double INDEL_GAP_CONTINUATION_PENALTY = 10.0;
@Argument(fullName = "indelGapOpenPenalty", shortName = "indelGOP", doc = "Indel gap open penalty", required = false)
public double INDEL_GAP_OPEN_PENALTY = 40.0;
public double INDEL_GAP_OPEN_PENALTY = 45.0;
@Argument(fullName = "indelHaplotypeSize", shortName = "indelHSize", doc = "Indel haplotype size", required = false)
public int INDEL_HAPLOTYPE_SIZE = 80;
@Argument(fullName = "doContextDependentGapPenalties", shortName = "doCDP", doc = "Vary gap penalties by context", required = false)
public boolean DO_CONTEXT_DEPENDENT_PENALTIES = false;
public boolean DO_CONTEXT_DEPENDENT_PENALTIES = true;
//gdebug+
@Hidden
@Argument(fullName = "indelDebug", shortName = "indelDebug", doc = "Output indel debug info", required = false)
public boolean OUTPUT_DEBUG_INDEL_INFO = false;
@Hidden
@Argument(fullName = "s1", shortName = "s1", doc = "Output indel debug info", required = false)
public String S1 = null;
@Hidden
@Argument(fullName = "s2", shortName = "s2", doc = "Output indel debug info", required = false)
public String S2 = null;
@Hidden
@Argument(fullName = "dovit", shortName = "dovit", doc = "Output indel debug info", required = false)
public boolean dovit = false;
@Hidden
@Argument(fullName = "newlike", shortName = "newlike", doc = "Output indel debug info", required = false)
public boolean newlike = false;
//gdebug-
@Argument(fullName = "exactCalculation", shortName = "exactCalculation", doc = "expt", required = false)
public ExactAFCalculationModel.ExactCalculation EXACT_CALCULATION_TYPE = ExactAFCalculationModel.ExactCalculation.LINEAR_EXPERIMENTAL;
@ -168,13 +148,7 @@ public class UnifiedArgumentCollection {
// todo- arguments to remove
uac.COVERAGE_AT_WHICH_TO_ABORT = COVERAGE_AT_WHICH_TO_ABORT;
uac.INSERTION_START_PROBABILITY = INSERTION_START_PROBABILITY;
uac.INSERTION_END_PROBABILITY = INSERTION_END_PROBABILITY;
uac.ALPHA_DELETION_PROBABILITY = ALPHA_DELETION_PROBABILITY;
uac.S1 = S1;
uac.S2 = S2;
uac.dovit = dovit;
uac.newlike = newlike;
return uac;
}

View File

@ -83,16 +83,15 @@ public class PairHMMIndelErrorModel {
private static final double baseMatchArray[];
private static final double baseMismatchArray[];
private final static double LOG_ONE_THIRD;
private final static double LOG_ONE_HALF;
private final static double END_GAP_COST;
private static final int START_HRUN_GAP_IDX = 4;
private static final int MAX_HRUN_GAP_IDX = 20;
private static final double MIN_GAP_OPEN_PENALTY = 20.0;
private static final double MIN_GAP_CONT_PENALTY = 6.0;
private static final double GAP_PENALTY_HRUN_STEP = 2.0; // each increase in hrun decreases gap penalty by this.
private static final double MIN_GAP_OPEN_PENALTY = 30.0;
private static final double MIN_GAP_CONT_PENALTY = 10.0;
private static final double GAP_PENALTY_HRUN_STEP = 1.0; // each increase in hrun decreases gap penalty by this.
private boolean doViterbi = false;
@ -100,17 +99,11 @@ public class PairHMMIndelErrorModel {
private final boolean useAffineGapModel = true;
private boolean doContextDependentPenalties = false;
private String s1;
private String s2;
private final double[] GAP_OPEN_PROB_TABLE;
private final double[] GAP_CONT_PROB_TABLE;
// private BAQ baq;
static {
LOG_ONE_THIRD= -Math.log10(3.0);
LOG_ONE_HALF= -Math.log10(2.0);
END_GAP_COST = LOG_ONE_HALF;
@ -125,10 +118,8 @@ public class PairHMMIndelErrorModel {
}
}
public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean doCDP, String s1, String s2, boolean dovit) {
public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean doCDP, boolean dovit) {
this(indelGOP, indelGCP, deb, doCDP);
this.s1 = s1;
this.s2 = s2;
this.doViterbi = dovit;
}
@ -168,8 +159,6 @@ public class PairHMMIndelErrorModel {
GAP_OPEN_PROB_TABLE[i] = gop;
GAP_CONT_PROB_TABLE[i] = gcp;
}
// baq = new BAQ(Math.pow(10.0,logGapOpenProbability),Math.pow(10.0,logGapContinuationProbability),
// 3,(byte)BASE_QUAL_THRESHOLD,true);
}
@ -654,13 +643,14 @@ public class PairHMMIndelErrorModel {
numStartSoftClippedBases = read.getAlignmentStart()- read.getUnclippedStart();
numEndSoftClippedBases = read.getUnclippedEnd()- read.getAlignmentEnd();
if (eventLength > 0) {
/*if (eventLength > 0) */
{
if ((read.getAlignmentStart()>=eventStartPos-eventLength && read.getAlignmentStart() <= eventStartPos+1) ||
(read.getAlignmentEnd() >= eventStartPos && read.getAlignmentEnd() <= eventStartPos + eventLength)) {
numStartSoftClippedBases = 0;
numEndSoftClippedBases = 0;
}
}
}
byte[] unclippedReadBases, unclippedReadQuals;
@ -752,34 +742,8 @@ public class PairHMMIndelErrorModel {
long indStart = start - haplotype.getStartPosition();
long indStop = stop - haplotype.getStartPosition();
//long indStart = 0;
//long indStop = 400;
byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBasesAsBytes(),
(int)indStart, (int)indStop);
//byte[] haplotypeBases = haplotype.getBasesAsBytes();
//BAQ.BAQCalculationResult baqRes = baq.calcBAQFromHMM(read, haplotypeBases, (int)(start - readStart));
/* if (s1 != null && s2 != null) {
haplotypeBases = s2.getBytes();
readBases = s1.getBytes();
readQuals = null;
readQuals = new byte[readBases.length];
for (int i=0; i < readQuals.length; i++)
readQuals[i] = (byte)30;
byte[] iq = new byte[readBases.length];
byte[] q = new byte[readBases.length];
int[] state = new int[readBases.length];
int a=baq.hmm_glocal(haplotypeBases, readBases, 0, readBases.length, iq, state, q);
System.out.println("BAQ State:");
for (int i=0; i < state.length; i++)
System.out.format("%d ",state[i]);
System.out.println();
System.out.println("BAQ Q:");
for (int i=0; i < q.length; i++)
System.out.format("%d ",(int)q[i]);
System.out.println();
} */
if (DEBUG) {
System.out.println("Haplotype to test:");

View File

@ -27,13 +27,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("9895541f0395ddb9977abb7d8274831e"));
Arrays.asList("a2218244df08dce96d04f8736a6de796"));
executeTest("test MultiSample Pilot1", spec);
}
@Test
public void testMultiSamplePilot2AndRecallingWithAlleles() {
String md5 = "2a8461e846a437ddcac9f1be185d0a4b";
String md5 = "d50bc17991d0dfd03ab2810a1cfeec85";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,050,000", 1,
@ -75,7 +75,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
//
// --------------------------------------------------------------------------------------------------------------
private final static String COMPRESSED_OUTPUT_MD5 = "92be4e11ee4660b67b32ae466d0651b0";
private final static String COMPRESSED_OUTPUT_MD5 = "211e4ed67cd7bd3501555d829548da48";
@Test
public void testCompressedOutput() {
@ -105,7 +105,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
String md5 = "f56fdf5c6a2db85031a3cece37e12a56";
String md5 = "f83a33a1ecc350cae0c002e4a43a7861";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
@ -184,8 +184,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testHeterozyosity() {
HashMap<Double, String> e = new HashMap<Double, String>();
e.put( 0.01, "9ed7893a36b27d5e63b6ee021918a4dd" );
e.put( 1.0 / 1850, "533bd796d2345a00536bf3164c4f75e1" );
e.put( 0.01, "4fb6e66895bbaa9a3b9af91c985a2782" );
e.put( 1.0 / 1850, "1397d29d924e74f43de466927a9f7915" );
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -209,7 +209,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("1cdc0a1a40495cd17461280a1a37d429"));
Arrays.asList("b44ddfbeb9fc413e44194799b3e3bbf8"));
executeTest(String.format("test multiple technologies"), spec);
}
@ -228,7 +228,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -L 1:10,000,000-10,100,000" +
" -baq CALCULATE_AS_NECESSARY",
1,
Arrays.asList("940b42d14378e41d45e5e25a9b5aaebc"));
Arrays.asList("164e25d61ef562ae863871d4ec7cb387"));
executeTest(String.format("test calling with BAQ"), spec);
}
@ -248,7 +248,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -glm INDEL" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("d4c15c60ffefe754d83e1164e78f2b6a"));
Arrays.asList("ffeea8550a707871a68f6707159f4ea9"));
executeTest(String.format("test indel caller in SLX"), spec);
}
@ -263,7 +263,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -glm INDEL -minIndelCnt 1" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("599220ba0cc5d8a32e4952fca85fd080"));
Arrays.asList("b6546f6e8f092b3c39cffec22aa47bcb"));
executeTest(String.format("test indel caller in SLX witn low min allele count"), spec);
}
@ -277,7 +277,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -glm INDEL" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("4c8c2ac4e024f70779465fb14c5d8c8a"));
Arrays.asList("897057070aa2e3651d91625a58c5ec4b"));
executeTest(String.format("test indel calling, multiple technologies"), spec);
}