My changes
This commit is contained in:
parent
143e92b797
commit
02ff930f6a
|
|
@ -43,14 +43,13 @@ import org.broadinstitute.sting.utils.variantcontext.Allele;
|
|||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
public class PairHMMIndelErrorModel {
|
||||
public static final int BASE_QUAL_THRESHOLD = 20;
|
||||
|
||||
private boolean DEBUG = false;
|
||||
private boolean bandedLikelihoods = true;
|
||||
private boolean bandedLikelihoods = false;
|
||||
|
||||
private static final int MAX_CACHED_QUAL = 127;
|
||||
|
||||
|
|
@ -157,7 +156,7 @@ public class PairHMMIndelErrorModel {
|
|||
}
|
||||
|
||||
|
||||
private void updateCell(final int indI, final int indJ, final int X_METRIC_LENGTH, final int Y_METRIC_LENGTH, byte[] readBases, byte[] readQuals, byte[] haplotypeBases,
|
||||
private static void updateCell(final int indI, final int indJ, final int X_METRIC_LENGTH, final int Y_METRIC_LENGTH, byte[] readBases, byte[] readQuals, byte[] haplotypeBases,
|
||||
byte[] currentGOP, byte[] currentGCP, double[][] matchMetricArray, double[][] XMetricArray, double[][] YMetricArray) {
|
||||
if (indI > 0 && indJ > 0) {
|
||||
final int im1 = indI -1;
|
||||
|
|
@ -183,9 +182,27 @@ public class PairHMMIndelErrorModel {
|
|||
}
|
||||
}
|
||||
|
||||
private double computeReadLikelihoodGivenHaplotypeAffineGaps(byte[] haplotypeBases, byte[] readBases, byte[] readQuals,
|
||||
public static double computeReadLikehoodGivenHaplotype(byte[] haplotypeBases, byte[] readBases, byte[] readQuals,
|
||||
byte[] currentGOP, byte[] currentGCP, boolean bandedLikelihoods) {
|
||||
// M, X, and Y arrays are of size read and haplotype + 1 because of an extra column for initial conditions
|
||||
final int X_METRIC_LENGTH = readBases.length + 1;
|
||||
final int Y_METRIC_LENGTH = haplotypeBases.length + 1;
|
||||
|
||||
// initial arrays to hold the probabilities of being in the match, insertion and deletion cases
|
||||
final double[][] matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||
final double[][] XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||
final double[][] YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||
|
||||
PairHMM.initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH);
|
||||
|
||||
return computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals, currentGOP,
|
||||
currentGCP, 0, matchMetricArray, XMetricArray, YMetricArray, bandedLikelihoods);
|
||||
|
||||
}
|
||||
private static double computeReadLikelihoodGivenHaplotypeAffineGaps(byte[] haplotypeBases, byte[] readBases, byte[] readQuals,
|
||||
byte[] currentGOP, byte[] currentGCP, int indToStart,
|
||||
double[][] matchMetricArray, double[][] XMetricArray, double[][] YMetricArray) {
|
||||
double[][] matchMetricArray, double[][] XMetricArray, double[][] YMetricArray,
|
||||
boolean bandedLikelihoods) {
|
||||
|
||||
final int X_METRIC_LENGTH = readBases.length+1;
|
||||
final int Y_METRIC_LENGTH = haplotypeBases.length+1;
|
||||
|
|
@ -391,6 +408,9 @@ public class PairHMMIndelErrorModel {
|
|||
}
|
||||
}
|
||||
else {
|
||||
if (DEBUG) {
|
||||
System.out.format("Read Name:%s, aln start:%d aln stop:%d orig cigar:%s\n",p.getRead().getReadName(), p.getRead().getAlignmentStart(), p.getRead().getAlignmentEnd(), p.getRead().getCigarString());
|
||||
}
|
||||
// System.out.format("%d %s\n",p.getRead().getAlignmentStart(), p.getRead().getClass().getName());
|
||||
GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
|
||||
|
||||
|
|
@ -577,8 +597,8 @@ public class PairHMMIndelErrorModel {
|
|||
final byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBases(),
|
||||
(int)indStart, (int)indStop);
|
||||
|
||||
final int X_METRIC_LENGTH = readBases.length+1;
|
||||
final int Y_METRIC_LENGTH = haplotypeBases.length+1;
|
||||
final int X_METRIC_LENGTH = readBases.length+2;
|
||||
final int Y_METRIC_LENGTH = haplotypeBases.length+2;
|
||||
|
||||
if (matchMetricArray == null) {
|
||||
//no need to reallocate arrays for each new haplotype, as length won't change
|
||||
|
|
@ -588,7 +608,7 @@ public class PairHMMIndelErrorModel {
|
|||
|
||||
}
|
||||
|
||||
pairHMM.initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH);
|
||||
PairHMM.initializeArrays(matchMetricArray, XMetricArray, YMetricArray, X_METRIC_LENGTH);
|
||||
|
||||
/*
|
||||
if (previousHaplotypeSeen == null)
|
||||
|
|
@ -602,17 +622,14 @@ public class PairHMMIndelErrorModel {
|
|||
contextLogGapOpenProbabilities, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities,
|
||||
startIndexInHaplotype, matchMetricArray, XMetricArray, YMetricArray);
|
||||
|
||||
/* double r2 = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals, contextLogGapOpenProbabilities,
|
||||
contextLogGapContinuationProbabilities, 0, matchMetricArray, XMetricArray, YMetricArray);
|
||||
|
||||
if (readLikelihood > 0) {
|
||||
int k=0;
|
||||
}
|
||||
*/ if (DEBUG) {
|
||||
/* double l2 = computeReadLikehoodGivenHaplotype(haplotypeBases, readBases, readQuals, contextLogGapOpenProbabilities,
|
||||
contextLogGapContinuationProbabilities, bandedLikelihoods);
|
||||
*/
|
||||
if (DEBUG) {
|
||||
System.out.println("H:"+new String(haplotypeBases));
|
||||
System.out.println("R:"+new String(readBases));
|
||||
System.out.format("L:%4.2f\n",readLikelihood);
|
||||
// System.out.format("Lorig:%4.2f\n",r2);
|
||||
// System.out.format("Lorig:%4.2f\n",r2);
|
||||
System.out.format("StPos:%d\n", startIndexInHaplotype);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,18 +41,18 @@ public class PairHMM {
|
|||
private static final byte DEFAULT_GCP = (byte) 10;
|
||||
private static final double BANDING_TOLERANCE = 22.0;
|
||||
private static final int BANDING_CLUSTER_WINDOW = 12;
|
||||
private final boolean doBanded;
|
||||
private final boolean noBanded;
|
||||
|
||||
public PairHMM() {
|
||||
doBanded = false;
|
||||
noBanded = false;
|
||||
}
|
||||
|
||||
public PairHMM( final boolean doBanded ) {
|
||||
this.doBanded = doBanded;
|
||||
public PairHMM( final boolean noBanded ) {
|
||||
this.noBanded = noBanded;
|
||||
}
|
||||
|
||||
|
||||
public void initializeArrays(final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray,
|
||||
public static void initializeArrays(final double[][] matchMetricArray, final double[][] XMetricArray, final double[][] YMetricArray,
|
||||
final int X_METRIC_LENGTH) {
|
||||
|
||||
for( int iii=0; iii < X_METRIC_LENGTH; iii++ ) {
|
||||
|
|
@ -100,7 +100,7 @@ public class PairHMM {
|
|||
readQuals[iii] = ( readQuals[iii] < QualityUtils.MIN_USABLE_Q_SCORE ? QualityUtils.MIN_USABLE_Q_SCORE : (readQuals[iii] > MAX_CACHED_QUAL ? MAX_CACHED_QUAL : readQuals[iii]) );
|
||||
}
|
||||
|
||||
if( doBanded ) {
|
||||
if( false ) {
|
||||
final ArrayList<Integer> workQueue = new ArrayList<Integer>(); // holds a queue of starting work location (indices along the diagonal). Will be sorted each step
|
||||
final ArrayList<Integer> workToBeAdded = new ArrayList<Integer>();
|
||||
final ArrayList<Double> calculatedValues = new ArrayList<Double>();
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("d3191b2f10139c969501990ffdf29082"));
|
||||
Arrays.asList("9b08dc6800ba11bc6d9f6ccf392a60fe"));
|
||||
executeTest("test MultiSample Pilot1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -54,7 +54,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSingleSamplePilot2() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("7c7288170c6aadae555a44e79ca5bf19"));
|
||||
Arrays.asList("d275e0f75368dbff012ea8655dce3444"));
|
||||
executeTest("test SingleSample Pilot2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultipleSNPAlleles() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl -NO_HEADER -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + validationDataLocation + "multiallelic.snps.bam -o %s -L " + validationDataLocation + "multiallelic.snps.intervals", 1,
|
||||
Arrays.asList("c956f0ea0e5f002288a09f4bc4af1319"));
|
||||
Arrays.asList("e948543b83bfd0640fcb994d72f8e234"));
|
||||
executeTest("test Multiple SNP alleles", spec);
|
||||
}
|
||||
|
||||
|
|
@ -80,7 +80,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "2158eb918abb95225ea5372fcd9c9236";
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "1e3c897794e5763a8720807686707b18";
|
||||
|
||||
@Test
|
||||
public void testCompressedOutput() {
|
||||
|
|
@ -101,7 +101,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
|
||||
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
|
||||
|
||||
String md5 = "834e85f6af4ad4a143b913dfc7defb08";
|
||||
String md5 = "06d11ed89f02f08911e100df0f7db7a4";
|
||||
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
|
||||
|
|
@ -200,8 +200,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHeterozyosity() {
|
||||
HashMap<Double, String> e = new HashMap<Double, String>();
|
||||
e.put( 0.01, "d5879f1c277035060434d79a441b31ca" );
|
||||
e.put( 1.0 / 1850, "13f80245bab2321b92d27eebd5c2fc33" );
|
||||
e.put( 0.01, "d07e5ca757fbcb1c03f652f82265c2f8" );
|
||||
e.put( 1.0 / 1850, "d1fb9186e6f39f2bcf5d0edacd8f7fe2" );
|
||||
|
||||
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -225,7 +225,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("8c134a6e0abcc70d2ed3216d5f8e0100"));
|
||||
Arrays.asList("623be1fd8b63a01bfe35ac864d5199fe"));
|
||||
|
||||
executeTest(String.format("test multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -244,7 +244,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -L 1:10,000,000-10,100,000" +
|
||||
" -baq CALCULATE_AS_NECESSARY",
|
||||
1,
|
||||
Arrays.asList("34baad3177712f6cd0b476f4c578e08f"));
|
||||
Arrays.asList("40ea10c0238c3be2991d31ae72476884"));
|
||||
|
||||
executeTest(String.format("test calling with BAQ"), spec);
|
||||
}
|
||||
|
|
@ -263,7 +263,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("4bf4f819a39a73707cae60fe30478742"));
|
||||
Arrays.asList("c9b0bd900a4ec949adfbd28909581eeb"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX"), spec);
|
||||
}
|
||||
|
|
@ -278,7 +278,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -minIndelCnt 1" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("ae08fbd6b0618cf3ac1be763ed7b41ca"));
|
||||
Arrays.asList("6b7c8691c527facf9884c2517d943f2f"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
|
||||
}
|
||||
|
|
@ -291,7 +291,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("120600f2bfa3a47bd93b50f768f98d5b"));
|
||||
Arrays.asList("d72603aa33a086d64d4dddfd2995552f"));
|
||||
|
||||
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -301,7 +301,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("2e75d2766235eab23091a67ea2947d13"));
|
||||
Arrays.asList("4a59fe207949b7d043481d7c1b786573"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
|
||||
}
|
||||
|
||||
|
|
@ -311,7 +311,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("5057bd7d07111e8b1085064782eb6c80"));
|
||||
Arrays.asList("a8a9ccf30bddee94bb1d300600794ee7"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
|
||||
}
|
||||
|
||||
|
|
@ -319,13 +319,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSampleIndels() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||
Arrays.asList("c0f9ca3ceab90ebd38cc0eec9441d71f"));
|
||||
Arrays.asList("0b388936022539530f565da14d5496d3"));
|
||||
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
|
||||
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||
Arrays.asList("0240f34e71f137518be233c9890a5349"));
|
||||
Arrays.asList("537dd9b4174dc356fb13d8d3098ad602"));
|
||||
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -368,7 +368,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction0() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
|
||||
Arrays.asList("53758e66e3a3188bd9c78d2329d41962"));
|
||||
Arrays.asList("973178b97efd2daacc9e45c414275d59"));
|
||||
executeTest("test minIndelFraction 0.0", spec);
|
||||
}
|
||||
|
||||
|
|
@ -376,7 +376,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction25() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
|
||||
Arrays.asList("3aa39b1f6f3b1eb051765f9c21f6f461"));
|
||||
Arrays.asList("220facd2eb0923515d1d8ab874055564"));
|
||||
executeTest("test minIndelFraction 0.25", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue