diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index 6ea735ec0..9df92ba7a 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -334,10 +334,10 @@ public class GenotypingEngine { boolean isBiallelic = true; VariantContext thisVC = null; VariantContext nextVC = null; - int x11 = 0; - int x12 = 0; - int x21 = 0; - int x22 = 0; + double x11 = Double.NEGATIVE_INFINITY; + double x12 = Double.NEGATIVE_INFINITY; + double x21 = Double.NEGATIVE_INFINITY; + double x22 = Double.NEGATIVE_INFINITY; for( final Haplotype h : haplotypes ) { // only make complex substitutions out of consecutive biallelic sites @@ -360,13 +360,17 @@ public class GenotypingEngine { } } // count up the co-occurrences of the events for the R^2 calculation - // BUGBUG: use haplotype likelihoods per-sample to make this more accurate - if( thisHapVC == null ) { - if( nextHapVC == null ) { x11++; } - else { x12++; } - } else { - if( nextHapVC == null ) { x21++; } - else { x22++; } + final ArrayList haplotypeList = new ArrayList(); + haplotypeList.add(h); + for( final String sample : haplotypes.get(0).getSampleKeySet() ) { + final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( haplotypeList, sample )[0][0]; + if( thisHapVC == null ) { + if( nextHapVC == null ) { x11 = MathUtils.approximateLog10SumLog10(x11, haplotypeLikelihood); } + else { x12 = MathUtils.approximateLog10SumLog10(x12, haplotypeLikelihood); } + } else { + if( nextHapVC == null ) { x21 = MathUtils.approximateLog10SumLog10(x21, haplotypeLikelihood); } + else { x22 = MathUtils.approximateLog10SumLog10(x22, haplotypeLikelihood); } + } } } if( thisVC == null || nextVC == null ) { @@ -374,7 +378,7 @@ public class GenotypingEngine { //throw new ReviewedStingException("StartPos TreeSet has an entry for an event that is found on no haplotype. start pos = " + thisStart + ", next pos = " + nextStart); } if( isBiallelic ) { - final double R2 = calculateR2LD( x11, x12, x21, x22 ); + final double R2 = calculateR2LD( Math.pow(10.0, x11), Math.pow(10.0, x12), Math.pow(10.0, x21), Math.pow(10.0, x22) ); if( DEBUG ) { System.out.println("Found consecutive biallelic events with R^2 = " + String.format("%.4f", R2)); System.out.println("-- " + thisVC); @@ -445,12 +449,11 @@ public class GenotypingEngine { return new VariantContextBuilder("merged", thisVC.getChr(), thisVC.getStart() + iii, nextVC.getEnd(), mergedAlleles).make(); } - @Requires({"x11 >= 0", "x12 >= 0", "x21 >= 0", "x22 >= 0"}) - protected static double calculateR2LD( final int x11, final int x12, final int x21, final int x22 ) { - final int total = x11 + x12 + x21 + x22; - final double pa1b1 = ((double) x11) / ((double) total); - final double pa1b2 = ((double) x12) / ((double) total); - final double pa2b1 = ((double) x21) / ((double) total); + protected static double calculateR2LD( final double x11, final double x12, final double x21, final double x22 ) { + final double total = x11 + x12 + x21 + x22; + final double pa1b1 = x11 / total; + final double pa1b2 = x12 / total; + final double pa2b1 = x21 / total; final double pa1 = pa1b1 + pa1b2; final double pb1 = pa1b1 + pa2b1; return ((pa1b1 - pa1*pb1) * (pa1b1 - pa1*pb1)) / ( pa1 * (1.0 - pa1) * pb1 * (1.0 - pb1) ); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 14ea17483..1130feaea 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -415,7 +415,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem likelihoodCalculationEngine.computeReadLikelihoods( haplotypes, perSampleReadList ); // subset down to only the best haplotypes to be genotyped in all samples ( in GGA mode use all discovered haplotypes ) - final ArrayList bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? likelihoodCalculationEngine.selectBestHaplotypes( haplotypes ) : haplotypes ); + final ArrayList bestHaplotypes = haplotypes;// ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? likelihoodCalculationEngine.selectBestHaplotypes( haplotypes ) : haplotypes ); for( final Pair>> callResult : ( GENOTYPE_FULL_ACTIVE_REGION && UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES @@ -496,7 +496,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem //--------------------------------------------------------------------------------------------------------------- private void finalizeActiveRegion( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) { - if( DEBUG ) { System.out.println("\nAssembling " + activeRegion.getExtendedLoc() + " with " + activeRegion.size() + " reads:"); } + if( DEBUG ) { System.out.println("\nAssembling " + activeRegion.getLocation() + " with " + activeRegion.size() + " reads: (with overlap region = " + activeRegion.getExtendedLoc() + ")"); } final ArrayList finalizedReadList = new ArrayList(); final FragmentCollection fragmentCollection = FragmentUtils.create( ReadUtils.sortReadsByCoordinate(activeRegion.getReads()) ); activeRegion.clearReads(); @@ -525,7 +525,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem private List filterNonPassingReads( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) { final ArrayList readsToRemove = new ArrayList(); for( final GATKSAMRecord rec : activeRegion.getReads() ) { - if( rec.getReadLength() < 24 || rec.getMappingQuality() <= 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) { + if( rec.getReadLength() < 24 || rec.getMappingQuality() < 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) { readsToRemove.add(rec); } }