diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java index d83cf4a18..e37b0ccc5 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java @@ -62,21 +62,14 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo // todo -cleanup private HaplotypeIndelErrorModel model; private PairHMMIndelErrorModel pairModel; - private boolean newLike = false; private GenomeLoc lastSiteVisited; private List alleleList; protected DindelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) { super(UAC, logger); - if (UAC.newlike) { pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY,UAC.INDEL_GAP_CONTINUATION_PENALTY, - UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.DO_CONTEXT_DEPENDENT_PENALTIES, UAC.S1, UAC.S2, UAC.dovit); - newLike = true; - } - else - model = new HaplotypeIndelErrorModel(maxReadDeletionLength, UAC.INSERTION_START_PROBABILITY, - UAC.INSERTION_END_PROBABILITY,UAC.ALPHA_DELETION_PROBABILITY,UAC.INDEL_HAPLOTYPE_SIZE, false, UAC.OUTPUT_DEBUG_INDEL_INFO); + UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.DO_CONTEXT_DEPENDENT_PENALTIES, UAC.dovit); alleleList = new ArrayList(); getAlleleListFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES; minIndelCountForGenotyping = UAC.MIN_INDEL_COUNT_FOR_GENOTYPING; @@ -338,18 +331,15 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo // assume only one alt allele for now List haplotypesInVC; - if (newLike) { - int hsize = (int)ref.getWindow().size()-Math.abs(eventLength)-1; - int numPrefBases = ref.getLocus().getStart()-ref.getWindow().getStart()+1; - if (DEBUG) - System.out.format("hsize: %d eventLength: %d refSize: %d, locStart: %d numpr: %d\n",hsize,eventLength, - (int)ref.getWindow().size(), loc.getStart(), numPrefBases); - haplotypesInVC = Haplotype.makeHaplotypeListFromAlleles( alleleList, loc.getStart(), - ref, hsize, numPrefBases); - } - else - haplotypesInVC = Haplotype.makeHaplotypeListFromAlleles( alleleList, loc.getStart(), - ref, HAPLOTYPE_SIZE, 20); + + int hsize = (int)ref.getWindow().size()-Math.abs(eventLength)-1; + int numPrefBases = ref.getLocus().getStart()-ref.getWindow().getStart()+1; + if (DEBUG) + System.out.format("hsize: %d eventLength: %d refSize: %d, locStart: %d numpr: %d\n",hsize,eventLength, + (int)ref.getWindow().size(), loc.getStart(), numPrefBases); + + haplotypesInVC = Haplotype.makeHaplotypeListFromAlleles( alleleList, loc.getStart(), + ref, hsize, numPrefBases); // For each sample, get genotype likelihoods based on pileup // compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them. @@ -370,10 +360,7 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo if (pileup != null ) { - if (newLike) - haplotypeLikehoodMatrix = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypesInVC, ref, HAPLOTYPE_SIZE, eventLength); - else - haplotypeLikehoodMatrix = model.computeReadHaplotypeLikelihoods( pileup, haplotypesInVC); + haplotypeLikehoodMatrix = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypesInVC, ref, HAPLOTYPE_SIZE, eventLength); double[] genotypeLikelihoods = HaplotypeIndelErrorModel.getHaplotypeLikelihoods( haplotypeLikehoodMatrix); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 09c3a2a5c..0e2ecb378 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -89,45 +89,25 @@ public class UnifiedArgumentCollection { @Argument(fullName = "indel_heterozygosity", shortName = "indelHeterozygosity", doc = "Heterozygosity for indel calling", required = false) public double INDEL_HETEROZYGOSITY = 1.0/8000; - // todo - remove these 3 arguments - @Argument(fullName = "insertionStartProbability", shortName = "insertionStartProbability", doc = "Heterozygosity for indel calling", required = false) - public double INSERTION_START_PROBABILITY = 1e-3; - - @Argument(fullName = "insertionEndProbability", shortName = "insertionEndProbability", doc = "Heterozygosity for indel calling", required = false) - public double INSERTION_END_PROBABILITY = 0.5; - - @Argument(fullName = "alphaDeletionProbability", shortName = "alphaDeletionProbability", doc = "Heterozygosity for indel calling", required = false) - public double ALPHA_DELETION_PROBABILITY = 1e-3; - - @Argument(fullName = "indelGapContinuationPenalty", shortName = "indelGCP", doc = "Indel gap continuation penalty", required = false) public double INDEL_GAP_CONTINUATION_PENALTY = 10.0; @Argument(fullName = "indelGapOpenPenalty", shortName = "indelGOP", doc = "Indel gap open penalty", required = false) - public double INDEL_GAP_OPEN_PENALTY = 40.0; + public double INDEL_GAP_OPEN_PENALTY = 45.0; @Argument(fullName = "indelHaplotypeSize", shortName = "indelHSize", doc = "Indel haplotype size", required = false) public int INDEL_HAPLOTYPE_SIZE = 80; @Argument(fullName = "doContextDependentGapPenalties", shortName = "doCDP", doc = "Vary gap penalties by context", required = false) - public boolean DO_CONTEXT_DEPENDENT_PENALTIES = false; + public boolean DO_CONTEXT_DEPENDENT_PENALTIES = true; //gdebug+ @Hidden @Argument(fullName = "indelDebug", shortName = "indelDebug", doc = "Output indel debug info", required = false) public boolean OUTPUT_DEBUG_INDEL_INFO = false; @Hidden - @Argument(fullName = "s1", shortName = "s1", doc = "Output indel debug info", required = false) - public String S1 = null; - @Hidden - @Argument(fullName = "s2", shortName = "s2", doc = "Output indel debug info", required = false) - public String S2 = null; - @Hidden @Argument(fullName = "dovit", shortName = "dovit", doc = "Output indel debug info", required = false) public boolean dovit = false; @Hidden - @Argument(fullName = "newlike", shortName = "newlike", doc = "Output indel debug info", required = false) - public boolean newlike = false; - //gdebug- - + @Argument(fullName = "exactCalculation", shortName = "exactCalculation", doc = "expt", required = false) public ExactAFCalculationModel.ExactCalculation EXACT_CALCULATION_TYPE = ExactAFCalculationModel.ExactCalculation.LINEAR_EXPERIMENTAL; @@ -168,13 +148,7 @@ public class UnifiedArgumentCollection { // todo- arguments to remove uac.COVERAGE_AT_WHICH_TO_ABORT = COVERAGE_AT_WHICH_TO_ABORT; - uac.INSERTION_START_PROBABILITY = INSERTION_START_PROBABILITY; - uac.INSERTION_END_PROBABILITY = INSERTION_END_PROBABILITY; - uac.ALPHA_DELETION_PROBABILITY = ALPHA_DELETION_PROBABILITY; - uac.S1 = S1; - uac.S2 = S2; uac.dovit = dovit; - uac.newlike = newlike; return uac; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index e66be1e14..afbb76581 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -83,16 +83,15 @@ public class PairHMMIndelErrorModel { private static final double baseMatchArray[]; private static final double baseMismatchArray[]; - private final static double LOG_ONE_THIRD; private final static double LOG_ONE_HALF; private final static double END_GAP_COST; private static final int START_HRUN_GAP_IDX = 4; private static final int MAX_HRUN_GAP_IDX = 20; - private static final double MIN_GAP_OPEN_PENALTY = 20.0; - private static final double MIN_GAP_CONT_PENALTY = 6.0; - private static final double GAP_PENALTY_HRUN_STEP = 2.0; // each increase in hrun decreases gap penalty by this. + private static final double MIN_GAP_OPEN_PENALTY = 30.0; + private static final double MIN_GAP_CONT_PENALTY = 10.0; + private static final double GAP_PENALTY_HRUN_STEP = 1.0; // each increase in hrun decreases gap penalty by this. private boolean doViterbi = false; @@ -100,17 +99,11 @@ public class PairHMMIndelErrorModel { private final boolean useAffineGapModel = true; private boolean doContextDependentPenalties = false; - private String s1; - private String s2; - - private final double[] GAP_OPEN_PROB_TABLE; private final double[] GAP_CONT_PROB_TABLE; - // private BAQ baq; static { - LOG_ONE_THIRD= -Math.log10(3.0); LOG_ONE_HALF= -Math.log10(2.0); END_GAP_COST = LOG_ONE_HALF; @@ -125,10 +118,8 @@ public class PairHMMIndelErrorModel { } } - public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean doCDP, String s1, String s2, boolean dovit) { + public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean doCDP, boolean dovit) { this(indelGOP, indelGCP, deb, doCDP); - this.s1 = s1; - this.s2 = s2; this.doViterbi = dovit; } @@ -168,8 +159,6 @@ public class PairHMMIndelErrorModel { GAP_OPEN_PROB_TABLE[i] = gop; GAP_CONT_PROB_TABLE[i] = gcp; } -// baq = new BAQ(Math.pow(10.0,logGapOpenProbability),Math.pow(10.0,logGapContinuationProbability), - // 3,(byte)BASE_QUAL_THRESHOLD,true); } @@ -654,13 +643,14 @@ public class PairHMMIndelErrorModel { numStartSoftClippedBases = read.getAlignmentStart()- read.getUnclippedStart(); numEndSoftClippedBases = read.getUnclippedEnd()- read.getAlignmentEnd(); - if (eventLength > 0) { + /*if (eventLength > 0) */ + { if ((read.getAlignmentStart()>=eventStartPos-eventLength && read.getAlignmentStart() <= eventStartPos+1) || (read.getAlignmentEnd() >= eventStartPos && read.getAlignmentEnd() <= eventStartPos + eventLength)) { numStartSoftClippedBases = 0; numEndSoftClippedBases = 0; } - } + } byte[] unclippedReadBases, unclippedReadQuals; @@ -752,34 +742,8 @@ public class PairHMMIndelErrorModel { long indStart = start - haplotype.getStartPosition(); long indStop = stop - haplotype.getStartPosition(); - //long indStart = 0; - //long indStop = 400; byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBasesAsBytes(), (int)indStart, (int)indStop); - //byte[] haplotypeBases = haplotype.getBasesAsBytes(); - //BAQ.BAQCalculationResult baqRes = baq.calcBAQFromHMM(read, haplotypeBases, (int)(start - readStart)); - /* if (s1 != null && s2 != null) { - haplotypeBases = s2.getBytes(); - readBases = s1.getBytes(); - readQuals = null; - readQuals = new byte[readBases.length]; - for (int i=0; i < readQuals.length; i++) - readQuals[i] = (byte)30; - - byte[] iq = new byte[readBases.length]; - byte[] q = new byte[readBases.length]; - int[] state = new int[readBases.length]; - int a=baq.hmm_glocal(haplotypeBases, readBases, 0, readBases.length, iq, state, q); - System.out.println("BAQ State:"); - for (int i=0; i < state.length; i++) - System.out.format("%d ",state[i]); - System.out.println(); - System.out.println("BAQ Q:"); - for (int i=0; i < q.length; i++) - System.out.format("%d ",(int)q[i]); - System.out.println(); - - } */ if (DEBUG) { System.out.println("Haplotype to test:"); diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 18103f690..ad49221c5 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -27,13 +27,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("9895541f0395ddb9977abb7d8274831e")); + Arrays.asList("a2218244df08dce96d04f8736a6de796")); executeTest("test MultiSample Pilot1", spec); } @Test public void testMultiSamplePilot2AndRecallingWithAlleles() { - String md5 = "2a8461e846a437ddcac9f1be185d0a4b"; + String md5 = "d50bc17991d0dfd03ab2810a1cfeec85"; WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,050,000", 1, @@ -75,7 +75,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // // -------------------------------------------------------------------------------------------------------------- - private final static String COMPRESSED_OUTPUT_MD5 = "92be4e11ee4660b67b32ae466d0651b0"; + private final static String COMPRESSED_OUTPUT_MD5 = "211e4ed67cd7bd3501555d829548da48"; @Test public void testCompressedOutput() { @@ -105,7 +105,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // Note that we need to turn off any randomization for this to work, so no downsampling and no annotations - String md5 = "f56fdf5c6a2db85031a3cece37e12a56"; + String md5 = "f83a33a1ecc350cae0c002e4a43a7861"; WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1, @@ -184,8 +184,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testHeterozyosity() { HashMap e = new HashMap(); - e.put( 0.01, "9ed7893a36b27d5e63b6ee021918a4dd" ); - e.put( 1.0 / 1850, "533bd796d2345a00536bf3164c4f75e1" ); + e.put( 0.01, "4fb6e66895bbaa9a3b9af91c985a2782" ); + e.put( 1.0 / 1850, "1397d29d924e74f43de466927a9f7915" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -209,7 +209,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("1cdc0a1a40495cd17461280a1a37d429")); + Arrays.asList("b44ddfbeb9fc413e44194799b3e3bbf8")); executeTest(String.format("test multiple technologies"), spec); } @@ -228,7 +228,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -L 1:10,000,000-10,100,000" + " -baq CALCULATE_AS_NECESSARY", 1, - Arrays.asList("940b42d14378e41d45e5e25a9b5aaebc")); + Arrays.asList("164e25d61ef562ae863871d4ec7cb387")); executeTest(String.format("test calling with BAQ"), spec); } @@ -248,7 +248,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -glm INDEL" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("d4c15c60ffefe754d83e1164e78f2b6a")); + Arrays.asList("ffeea8550a707871a68f6707159f4ea9")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -263,7 +263,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -glm INDEL -minIndelCnt 1" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("599220ba0cc5d8a32e4952fca85fd080")); + Arrays.asList("b6546f6e8f092b3c39cffec22aa47bcb")); executeTest(String.format("test indel caller in SLX witn low min allele count"), spec); } @@ -277,7 +277,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -glm INDEL" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("4c8c2ac4e024f70779465fb14c5d8c8a")); + Arrays.asList("897057070aa2e3651d91625a58c5ec4b")); executeTest(String.format("test indel calling, multiple technologies"), spec); }