From 8ca3390ee06c33febbf2e641fb6bf6f73f32e5a3 Mon Sep 17 00:00:00 2001 From: delangel Date: Sat, 12 Mar 2011 00:00:55 +0000 Subject: [PATCH] Low level plumbing work required to have a context dependent error model with the new indel probabilistic alignment model. This just adds an extra input argument and does some refactoring so that when an actual model is ready it will be easy to plug in. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5427 348d0f76-0448-11de-a6fe-93d51630548a --- ...elGenotypeLikelihoodsCalculationModel.java | 38 +++-- .../genotyper/UnifiedArgumentCollection.java | 5 +- .../indels/PairHMMIndelErrorModel.java | 148 ++++++------------ 3 files changed, 79 insertions(+), 112 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java index 1d1fb3e6d..6cf1a1c1a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java @@ -31,6 +31,7 @@ import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel; import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.baq.BAQ; @@ -71,7 +72,7 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo super(UAC, logger); if (UAC.newlike) { pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY,UAC.INDEL_GAP_CONTINUATION_PENALTY, - UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.S1, UAC.S2, UAC.dovit); + UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.DO_CONTEXT_DEPENDENT_PENALTIES, UAC.S1, UAC.S2, UAC.dovit); newLike = true; } else @@ -88,7 +89,7 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo private ArrayList computeConsensusAlleles(ReferenceContext ref, Map contexts, StratifiedAlignmentContext.StratifiedContextType contextType) { - Allele refAllele, altAllele; + Allele refAllele=null, altAllele=null; GenomeLoc loc = ref.getLocus(); ArrayList aList = new ArrayList(); @@ -235,19 +236,33 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo // get ref bases of accurate deletion int startIdxInReference = (int)(1+loc.getStart()-ref.getWindow().getStart()); + //System.out.println(new String(ref.getBases())); byte[] refBases = Arrays.copyOfRange(ref.getBases(),startIdxInReference,startIdxInReference+dLen); - refAllele = Allele.create(refBases,true); - altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false); + boolean ok = true; + for (int i=0; i < refBases.length; i++) + if (!BaseUtils.isRegularBase(refBases[i])) + ok = false; + + if (ok) { + refAllele = Allele.create(refBases,true); + altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false); + } } else { // insertion case - refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true); - altAllele = Allele.create(bestAltAllele, false); + boolean ok = true; + for (int i=0; i < bestAltAllele.length(); i++) + if (!BaseUtils.isRegularBase(bestAltAllele.getBytes()[i])) + ok = false; + if (ok) { + refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true); + altAllele = Allele.create(bestAltAllele, false); + } + } + if (refAllele != null && altAllele != null) { + aList.add(0,refAllele); + aList.add(1,altAllele); } - - aList.add(0,refAllele); - aList.add(1,altAllele); - return aList; } @@ -310,6 +325,9 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo if ( !(priors instanceof DiploidIndelGenotypePriors) ) throw new StingException("Only diploid-based Indel priors are supported in the DINDEL GL model"); + if (alleleList.isEmpty()) + return null; + refAllele = alleleList.get(0); altAllele = alleleList.get(1); int eventLength = refAllele.getBaseString().length() - altAllele.getBaseString().length(); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 488c24fc0..25f912742 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -114,6 +114,8 @@ public class UnifiedArgumentCollection { @Argument(fullName = "indelHaplotypeSize", shortName = "indelHSize", doc = "Indel haplotype size", required = false) public int INDEL_HAPLOTYPE_SIZE = 80; + @Argument(fullName = "doContextDependentGapPenalties", shortName = "doCDP", doc = "Vary gap penalties by context", required = false) + public boolean DO_CONTEXT_DEPENDENT_PENALTIES = false; //gdebug+ @Hidden @Argument(fullName = "indelDebug", shortName = "indelDebug", doc = "Output indel debug info", required = false) @@ -167,7 +169,8 @@ public class UnifiedArgumentCollection { uac.INDEL_GAP_CONTINUATION_PENALTY = INDEL_GAP_CONTINUATION_PENALTY; uac.OUTPUT_DEBUG_INDEL_INFO = OUTPUT_DEBUG_INDEL_INFO; uac.INDEL_HAPLOTYPE_SIZE = INDEL_HAPLOTYPE_SIZE; - + uac.DO_CONTEXT_DEPENDENT_PENALTIES = DO_CONTEXT_DEPENDENT_PENALTIES; + // todo- arguments to remove uac.COVERAGE_AT_WHICH_TO_ABORT = COVERAGE_AT_WHICH_TO_ABORT; uac.INSERTION_START_PROBABILITY = INSERTION_START_PROBABILITY; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 6db196150..a7bd7aed0 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -33,6 +33,7 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.genotype.Haplotype; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -95,6 +96,7 @@ public class PairHMMIndelErrorModel { private boolean doViterbi = false; private final boolean useSoftClippedBases = false; private final boolean useAffineGapModel = true; + private boolean doContextDependentPenalties = false; private String s1; private String s2; @@ -117,19 +119,19 @@ public class PairHMMIndelErrorModel { } } - public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, String s1, String s2, boolean dovit) { - this(indelGOP, indelGCP, deb); + public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean doCDP, String s1, String s2, boolean dovit) { + this(indelGOP, indelGCP, deb, doCDP); this.s1 = s1; this.s2 = s2; this.doViterbi = dovit; } - public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb) { + public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean doCDP) { this.logGapOpenProbability = -indelGOP/10.0; // QUAL to log prob this.logGapContinuationProbability = -indelGCP/10.0; // QUAL to log prob - + this.doContextDependentPenalties = doCDP; this.DEBUG = deb; @@ -264,6 +266,36 @@ public class PairHMMIndelErrorModel { } + private Pair getGapPenalties(final int indI, final int indJ, final int X_METRIC_LENGTH, + final int Y_METRIC_LENGTH, final int tableToUpdate) { + + double c=0.0,d=0.0; + + if (doContextDependentPenalties) { + // todo- fill me!! + } else { + switch (tableToUpdate) { + case MATCH_OFFSET: + + break; + case X_OFFSET: + c = (indJ==Y_METRIC_LENGTH-1? 0: logGapOpenProbability); + d = (indJ==Y_METRIC_LENGTH-1? 0: logGapContinuationProbability); + + break; + + case Y_OFFSET: + c = (indI==X_METRIC_LENGTH-1? 0: logGapOpenProbability); + d = (indI==X_METRIC_LENGTH-1? 0: logGapContinuationProbability); + + break; + default: + throw new StingException("BUG!! Invalid table offset"); + } + } + return new Pair(Double.valueOf(c),Double.valueOf(d)); + } + public double computeReadLikelihoodGivenHaplotypeAffineGaps(byte[] haplotypeBases, byte[] readBases, byte[] readQuals) { final int X_METRIC_LENGTH = readBases.length+1; @@ -318,7 +350,6 @@ public class PairHMMIndelErrorModel { double pBaseRead = (x == y)? baseMatchArray[(int)qual]:baseMismatchArray[(int)qual]; - double c,d; double[] metrics = new double[3]; // update match array @@ -339,10 +370,11 @@ public class PairHMMIndelErrorModel { // update X array // State X(i,j): X(1:i) aligned to a gap in Y(1:j). // When in last column of X, ie X(1:i) aligned to full Y, we don't want to penalize gaps - c = (indJ==Y_METRIC_LENGTH-1? 0: logGapOpenProbability); - d = (indJ==Y_METRIC_LENGTH-1? 0: logGapContinuationProbability); - metrics[MATCH_OFFSET] = matchMetricArray[indI-1][indJ] + c; - metrics[X_OFFSET] = XMetricArray[indI-1][indJ] + d; + Pair p = getGapPenalties(indI, indJ, X_METRIC_LENGTH, + Y_METRIC_LENGTH, X_OFFSET); + + metrics[MATCH_OFFSET] = matchMetricArray[indI-1][indJ] + p.first; + metrics[X_OFFSET] = XMetricArray[indI-1][indJ] + p.second; metrics[Y_OFFSET] = Double.NEGATIVE_INFINITY; //YMetricArray[indI-1][indJ] + logGapOpenProbability; if (doViterbi) { @@ -356,12 +388,12 @@ public class PairHMMIndelErrorModel { bestActionArrayX[indI][indJ] = ACTIONS_X[bestMetricIdx]; // update Y array - c = (indI==X_METRIC_LENGTH-1? 0: logGapOpenProbability); - d = (indI==X_METRIC_LENGTH-1? 0: logGapContinuationProbability); + p = getGapPenalties(indI, indJ, X_METRIC_LENGTH, + Y_METRIC_LENGTH, Y_OFFSET); - metrics[MATCH_OFFSET] = matchMetricArray[indI][indJ-1] + c; + metrics[MATCH_OFFSET] = matchMetricArray[indI][indJ-1] + p.first; metrics[X_OFFSET] = Double.NEGATIVE_INFINITY; //XMetricArray[indI][indJ-1] + logGapOpenProbability; - metrics[Y_OFFSET] = YMetricArray[indI][indJ-1] + d; + metrics[Y_OFFSET] = YMetricArray[indI][indJ-1] + p.second; if (doViterbi) { bestMetricIdx = MathUtils.maxElementIndex(metrics); @@ -437,11 +469,7 @@ public class PairHMMIndelErrorModel { i--; j--; } - /* if (j<0 || i < 0) - { - int k=1; - }*/ - } + } @@ -687,87 +715,5 @@ public class PairHMMIndelErrorModel { } - /* Retrieves bases and qualities from a read that align to a start and stop position within a given contig */ - public static Pair getBasesBetweenPositions(SAMRecord read, long startPos, long endPos ) { - - final Cigar cigar = read.getCigar(); - final byte[] bases = read.getReadBases(); - final byte[] quals = read.getBaseQualities(); - // final byte[] alignment = new byte[alignmentLength]; - long alignStartPos = startPos; - long readStartPos = read.getUnclippedStart(); - - if (alignStartPos < readStartPos ) - alignStartPos = readStartPos; - - long alignEndPos = endPos; - long readEndPos = read.getUnclippedEnd(); - - if (alignEndPos > readEndPos ) - alignEndPos = readEndPos; - - // We need to now locate read bases between alignStartPos and alignEndPos - - //first see how many bytes will we require for output - final byte[] readAlignment = new byte[bases.length]; - final byte[] qualAlignment = new byte[bases.length]; - - int outPos = 0; - int readPos = 0; - int alignPos = 0; - for ( int iii = 0 ; iii < cigar.numCigarElements() ; iii++ ) { - - final CigarElement ce = cigar.getCigarElement(iii); - final int elementLength = ce.getLength(); - - switch( ce.getOperator() ) { - case I: - for ( int jjj = 0; jjj < elementLength; jjj++ ) { - // check if insertion falls before window we're looking for - if (readStartPos +readPos >= alignStartPos) { - readAlignment[outPos] = bases[readPos]; - qualAlignment[outPos] = quals[readPos]; - outPos++; - } - readPos++; - } - break; - case D: - case N: - for ( int jjj = 0; jjj < elementLength; jjj++ ) { - alignPos++; - if (alignStartPos + alignPos > alignEndPos) - break; - } - break; - case S: - case M: - for ( int jjj = 0; jjj < elementLength; jjj++ ) { - if (readStartPos +readPos >= alignStartPos) { - readAlignment[outPos] = bases[readPos]; - qualAlignment[outPos] = quals[readPos]; - outPos++; - alignPos++; - } - - readPos++; - - if (alignStartPos + alignPos > alignEndPos) - break; - } - break; - case H: - case P: - break; - default: - throw new ReviewedStingException( "Unsupported cigar operator: " + ce.getOperator() ); - } - // if we're already beyong of edge of window of interest, nothing more to do - if (alignStartPos + alignPos > alignEndPos) - break; - - } - return new Pair(Arrays.copyOf(readAlignment,outPos), Arrays.copyOf(qualAlignment,outPos)); - } - + }