From 02ff930f6a854b0704a882860b243216e7d7cf92 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Thu, 19 Apr 2012 12:45:18 -0400 Subject: [PATCH] My changes --- .../indels/PairHMMIndelErrorModel.java | 49 +++++++++++++------ .../broadinstitute/sting/utils/PairHMM.java | 12 ++--- .../UnifiedGenotyperIntegrationTest.java | 36 +++++++------- 3 files changed, 57 insertions(+), 40 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 171c42040..5ac8b981e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -43,14 +43,13 @@ import org.broadinstitute.sting.utils.variantcontext.Allele; import java.util.Arrays; import java.util.HashMap; import java.util.LinkedHashMap; -import java.util.Map; public class PairHMMIndelErrorModel { public static final int BASE_QUAL_THRESHOLD = 20; private boolean DEBUG = false; - private boolean bandedLikelihoods = true; + private boolean bandedLikelihoods = false; private static final int MAX_CACHED_QUAL = 127; @@ -157,7 +156,7 @@ public class PairHMMIndelErrorModel { } - private void updateCell(final int indI, final int indJ, final int X_METRIC_LENGTH, final int Y_METRIC_LENGTH, byte[] readBases, byte[] readQuals, byte[] haplotypeBases, + private static void updateCell(final int indI, final int indJ, final int X_METRIC_LENGTH, final int Y_METRIC_LENGTH, byte[] readBases, byte[] readQuals, byte[] haplotypeBases, byte[] currentGOP, byte[] currentGCP, double[][] matchMetricArray, double[][] XMetricArray, double[][] YMetricArray) { if (indI > 0 && indJ > 0) { final int im1 = indI -1; @@ -183,9 +182,27 @@ public class PairHMMIndelErrorModel { } } - private double computeReadLikelihoodGivenHaplotypeAffineGaps(byte[] haplotypeBases, byte[] readBases, byte[] readQuals, + public static double computeReadLikehoodGivenHaplotype(byte[] haplotypeBases, byte[] readBases, byte[] readQuals, + byte[] currentGOP, byte[] currentGCP, boolean bandedLikelihoods) { + // M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions + final int X_METRIC_LENGTH = readBases.length + 1; + final int Y_METRIC_LENGTH = haplotypeBases.length + 1; + + // initial arrays to hold the probabilities of being in the match, insertion and deletion cases + final double[][] matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH]; + final double[][] XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH]; + final double[][] YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH]; + + PairHMM.initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH); + + return computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals, currentGOP, + currentGCP, 0, matchMetricArray, XMetricArray, YMetricArray, bandedLikelihoods); + + } + private static double computeReadLikelihoodGivenHaplotypeAffineGaps(byte[] haplotypeBases, byte[] readBases, byte[] readQuals, byte[] currentGOP, byte[] currentGCP, int indToStart, - double[][] matchMetricArray, double[][] XMetricArray, double[][] YMetricArray) { + double[][] matchMetricArray, double[][] XMetricArray, double[][] YMetricArray, + boolean bandedLikelihoods) { final int X_METRIC_LENGTH = readBases.length+1; final int Y_METRIC_LENGTH = haplotypeBases.length+1; @@ -391,6 +408,9 @@ public class PairHMMIndelErrorModel { } } else { + if (DEBUG) { + System.out.format("Read Name:%s, aln start:%d aln stop:%d orig cigar:%s\n",p.getRead().getReadName(), p.getRead().getAlignmentStart(), p.getRead().getAlignmentEnd(), p.getRead().getCigarString()); + } // System.out.format("%d %s\n",p.getRead().getAlignmentStart(), p.getRead().getClass().getName()); GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead()); @@ -577,8 +597,8 @@ public class PairHMMIndelErrorModel { final byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBases(), (int)indStart, (int)indStop); - final int X_METRIC_LENGTH = readBases.length+1; - final int Y_METRIC_LENGTH = haplotypeBases.length+1; + final int X_METRIC_LENGTH = readBases.length+2; + final int Y_METRIC_LENGTH = haplotypeBases.length+2; if (matchMetricArray == null) { //no need to reallocate arrays for each new haplotype, as length won't change @@ -588,7 +608,7 @@ public class PairHMMIndelErrorModel { } - pairHMM.initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH); + PairHMM.initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH); /* if (previousHaplotypeSeen == null) @@ -602,17 +622,14 @@ public class PairHMMIndelErrorModel { contextLogGapOpenProbabilities, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities, startIndexInHaplotype, matchMetricArray, XMetricArray, YMetricArray); - /* double r2 = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals, contextLogGapOpenProbabilities, - contextLogGapContinuationProbabilities, 0, matchMetricArray, XMetricArray, YMetricArray); - - if (readLikelihood > 0) { - int k=0; - } - */ if (DEBUG) { +/* double l2 = computeReadLikehoodGivenHaplotype(haplotypeBases, readBases, readQuals, contextLogGapOpenProbabilities, + contextLogGapContinuationProbabilities, bandedLikelihoods); + */ + if (DEBUG) { System.out.println("H:"+new String(haplotypeBases)); System.out.println("R:"+new String(readBases)); System.out.format("L:%4.2f\n",readLikelihood); - // System.out.format("Lorig:%4.2f\n",r2); + // System.out.format("Lorig:%4.2f\n",r2); System.out.format("StPos:%d\n", startIndexInHaplotype); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/PairHMM.java b/public/java/src/org/broadinstitute/sting/utils/PairHMM.java index 58bed2795..9fcb97a4d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/PairHMM.java +++ b/public/java/src/org/broadinstitute/sting/utils/PairHMM.java @@ -41,18 +41,18 @@ public class PairHMM { private static final byte DEFAULT_GCP = (byte) 10; private static final double BANDING_TOLERANCE = 22.0; private static final int BANDING_CLUSTER_WINDOW = 12; - private final boolean doBanded; + private final boolean noBanded; public PairHMM() { - doBanded = false; + noBanded = false; } - public PairHMM( final boolean doBanded ) { - this.doBanded = doBanded; + public PairHMM( final boolean noBanded ) { + this.noBanded = noBanded; } - public void initializeArrays(final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray, + public static void initializeArrays(final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray, final int X_METRIC_LENGTH) { for( int iii=0; iii < X_METRIC_LENGTH; iii++ ) { @@ -100,7 +100,7 @@ public class PairHMM { readQuals[iii] = ( readQuals[iii] < QualityUtils.MIN_USABLE_Q_SCORE ? QualityUtils.MIN_USABLE_Q_SCORE : (readQuals[iii] > MAX_CACHED_QUAL ? MAX_CACHED_QUAL : readQuals[iii]) ); } - if( doBanded ) { + if( false ) { final ArrayList workQueue = new ArrayList(); // holds a queue of starting work location (indices along the diagonal). Will be sorted each step final ArrayList workToBeAdded = new ArrayList(); final ArrayList calculatedValues = new ArrayList(); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 4d00f6113..067e9088c 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -30,7 +30,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("d3191b2f10139c969501990ffdf29082")); + Arrays.asList("9b08dc6800ba11bc6d9f6ccf392a60fe")); executeTest("test MultiSample Pilot1", spec); } @@ -54,7 +54,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSingleSamplePilot2() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1, - Arrays.asList("7c7288170c6aadae555a44e79ca5bf19")); + Arrays.asList("d275e0f75368dbff012ea8655dce3444")); executeTest("test SingleSample Pilot2", spec); } @@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + validationDataLocation + "multiallelic.snps.bam -o %s -L " + validationDataLocation + "multiallelic.snps.intervals", 1, - Arrays.asList("c956f0ea0e5f002288a09f4bc4af1319")); + Arrays.asList("e948543b83bfd0640fcb994d72f8e234")); executeTest("test Multiple SNP alleles", spec); } @@ -80,7 +80,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // // -------------------------------------------------------------------------------------------------------------- - private final static String COMPRESSED_OUTPUT_MD5 = "2158eb918abb95225ea5372fcd9c9236"; + private final static String COMPRESSED_OUTPUT_MD5 = "1e3c897794e5763a8720807686707b18"; @Test public void testCompressedOutput() { @@ -101,7 +101,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // Note that we need to turn off any randomization for this to work, so no downsampling and no annotations - String md5 = "834e85f6af4ad4a143b913dfc7defb08"; + String md5 = "06d11ed89f02f08911e100df0f7db7a4"; WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1, @@ -200,8 +200,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testHeterozyosity() { HashMap e = new HashMap(); - e.put( 0.01, "d5879f1c277035060434d79a441b31ca" ); - e.put( 1.0 / 1850, "13f80245bab2321b92d27eebd5c2fc33" ); + e.put( 0.01, "d07e5ca757fbcb1c03f652f82265c2f8" ); + e.put( 1.0 / 1850, "d1fb9186e6f39f2bcf5d0edacd8f7fe2" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -225,7 +225,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("8c134a6e0abcc70d2ed3216d5f8e0100")); + Arrays.asList("623be1fd8b63a01bfe35ac864d5199fe")); executeTest(String.format("test multiple technologies"), spec); } @@ -244,7 +244,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -L 1:10,000,000-10,100,000" + " -baq CALCULATE_AS_NECESSARY", 1, - Arrays.asList("34baad3177712f6cd0b476f4c578e08f")); + Arrays.asList("40ea10c0238c3be2991d31ae72476884")); executeTest(String.format("test calling with BAQ"), spec); } @@ -263,7 +263,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("4bf4f819a39a73707cae60fe30478742")); + Arrays.asList("c9b0bd900a4ec949adfbd28909581eeb")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -278,7 +278,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -minIndelCnt 1" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("ae08fbd6b0618cf3ac1be763ed7b41ca")); + Arrays.asList("6b7c8691c527facf9884c2517d943f2f")); executeTest(String.format("test indel caller in SLX with low min allele count"), spec); } @@ -291,7 +291,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("120600f2bfa3a47bd93b50f768f98d5b")); + Arrays.asList("d72603aa33a086d64d4dddfd2995552f")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -301,7 +301,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("2e75d2766235eab23091a67ea2947d13")); + Arrays.asList("4a59fe207949b7d043481d7c1b786573")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec); } @@ -311,7 +311,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("5057bd7d07111e8b1085064782eb6c80")); + Arrays.asList("a8a9ccf30bddee94bb1d300600794ee7")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec); } @@ -319,13 +319,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSampleIndels() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, - Arrays.asList("c0f9ca3ceab90ebd38cc0eec9441d71f")); + Arrays.asList("0b388936022539530f565da14d5496d3")); List result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst(); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, - Arrays.asList("0240f34e71f137518be233c9890a5349")); + Arrays.asList("537dd9b4174dc356fb13d8d3098ad602")); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); } @@ -368,7 +368,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMinIndelFraction0() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.0", 1, - Arrays.asList("53758e66e3a3188bd9c78d2329d41962")); + Arrays.asList("973178b97efd2daacc9e45c414275d59")); executeTest("test minIndelFraction 0.0", spec); } @@ -376,7 +376,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMinIndelFraction25() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.25", 1, - Arrays.asList("3aa39b1f6f3b1eb051765f9c21f6f461")); + Arrays.asList("220facd2eb0923515d1d8ab874055564")); executeTest("test minIndelFraction 0.25", spec); }