diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java index 1d1fb3e6d..6cf1a1c1a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DindelGenotypeLikelihoodsCalculationModel.java @@ -31,6 +31,7 @@ import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel; import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.baq.BAQ; @@ -71,7 +72,7 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo super(UAC, logger); if (UAC.newlike) { pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY,UAC.INDEL_GAP_CONTINUATION_PENALTY, - UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.S1, UAC.S2, UAC.dovit); + UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.DO_CONTEXT_DEPENDENT_PENALTIES, UAC.S1, UAC.S2, UAC.dovit); newLike = true; } else @@ -88,7 +89,7 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo private ArrayList computeConsensusAlleles(ReferenceContext ref, Map contexts, StratifiedAlignmentContext.StratifiedContextType contextType) { - Allele refAllele, altAllele; + Allele refAllele=null, altAllele=null; GenomeLoc loc = ref.getLocus(); ArrayList aList = new ArrayList(); @@ -235,19 +236,33 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo // get ref bases of accurate deletion int startIdxInReference = (int)(1+loc.getStart()-ref.getWindow().getStart()); + //System.out.println(new String(ref.getBases())); byte[] refBases = Arrays.copyOfRange(ref.getBases(),startIdxInReference,startIdxInReference+dLen); - refAllele = Allele.create(refBases,true); - altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false); + boolean ok = true; + for (int i=0; i < refBases.length; i++) + if (!BaseUtils.isRegularBase(refBases[i])) + ok = false; + + if (ok) { + refAllele = Allele.create(refBases,true); + altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false); + } } else { // insertion case - refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true); - altAllele = Allele.create(bestAltAllele, false); + boolean ok = true; + for (int i=0; i < bestAltAllele.length(); i++) + if (!BaseUtils.isRegularBase(bestAltAllele.getBytes()[i])) + ok = false; + if (ok) { + refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true); + altAllele = Allele.create(bestAltAllele, false); + } + } + if (refAllele != null && altAllele != null) { + aList.add(0,refAllele); + aList.add(1,altAllele); } - - aList.add(0,refAllele); - aList.add(1,altAllele); - return aList; } @@ -310,6 +325,9 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo if ( !(priors instanceof DiploidIndelGenotypePriors) ) throw new StingException("Only diploid-based Indel priors are supported in the DINDEL GL model"); + if (alleleList.isEmpty()) + return null; + refAllele = alleleList.get(0); altAllele = alleleList.get(1); int eventLength = refAllele.getBaseString().length() - altAllele.getBaseString().length(); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 488c24fc0..25f912742 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -114,6 +114,8 @@ public class UnifiedArgumentCollection { @Argument(fullName = "indelHaplotypeSize", shortName = "indelHSize", doc = "Indel haplotype size", required = false) public int INDEL_HAPLOTYPE_SIZE = 80; + @Argument(fullName = "doContextDependentGapPenalties", shortName = "doCDP", doc = "Vary gap penalties by context", required = false) + public boolean DO_CONTEXT_DEPENDENT_PENALTIES = false; //gdebug+ @Hidden @Argument(fullName = "indelDebug", shortName = "indelDebug", doc = "Output indel debug info", required = false) @@ -167,7 +169,8 @@ public class UnifiedArgumentCollection { uac.INDEL_GAP_CONTINUATION_PENALTY = INDEL_GAP_CONTINUATION_PENALTY; uac.OUTPUT_DEBUG_INDEL_INFO = OUTPUT_DEBUG_INDEL_INFO; uac.INDEL_HAPLOTYPE_SIZE = INDEL_HAPLOTYPE_SIZE; - + uac.DO_CONTEXT_DEPENDENT_PENALTIES = DO_CONTEXT_DEPENDENT_PENALTIES; + // todo- arguments to remove uac.COVERAGE_AT_WHICH_TO_ABORT = COVERAGE_AT_WHICH_TO_ABORT; uac.INSERTION_START_PROBABILITY = INSERTION_START_PROBABILITY; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java index 6db196150..a7bd7aed0 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java @@ -33,6 +33,7 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.genotype.Haplotype; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -95,6 +96,7 @@ public class PairHMMIndelErrorModel { private boolean doViterbi = false; private final boolean useSoftClippedBases = false; private final boolean useAffineGapModel = true; + private boolean doContextDependentPenalties = false; private String s1; private String s2; @@ -117,19 +119,19 @@ public class PairHMMIndelErrorModel { } } - public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, String s1, String s2, boolean dovit) { - this(indelGOP, indelGCP, deb); + public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean doCDP, String s1, String s2, boolean dovit) { + this(indelGOP, indelGCP, deb, doCDP); this.s1 = s1; this.s2 = s2; this.doViterbi = dovit; } - public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb) { + public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean doCDP) { this.logGapOpenProbability = -indelGOP/10.0; // QUAL to log prob this.logGapContinuationProbability = -indelGCP/10.0; // QUAL to log prob - + this.doContextDependentPenalties = doCDP; this.DEBUG = deb; @@ -264,6 +266,36 @@ public class PairHMMIndelErrorModel { } + private Pair getGapPenalties(final int indI, final int indJ, final int X_METRIC_LENGTH, + final int Y_METRIC_LENGTH, final int tableToUpdate) { + + double c=0.0,d=0.0; + + if (doContextDependentPenalties) { + // todo- fill me!! + } else { + switch (tableToUpdate) { + case MATCH_OFFSET: + + break; + case X_OFFSET: + c = (indJ==Y_METRIC_LENGTH-1? 0: logGapOpenProbability); + d = (indJ==Y_METRIC_LENGTH-1? 0: logGapContinuationProbability); + + break; + + case Y_OFFSET: + c = (indI==X_METRIC_LENGTH-1? 0: logGapOpenProbability); + d = (indI==X_METRIC_LENGTH-1? 0: logGapContinuationProbability); + + break; + default: + throw new StingException("BUG!! Invalid table offset"); + } + } + return new Pair(Double.valueOf(c),Double.valueOf(d)); + } + public double computeReadLikelihoodGivenHaplotypeAffineGaps(byte[] haplotypeBases, byte[] readBases, byte[] readQuals) { final int X_METRIC_LENGTH = readBases.length+1; @@ -318,7 +350,6 @@ public class PairHMMIndelErrorModel { double pBaseRead = (x == y)? baseMatchArray[(int)qual]:baseMismatchArray[(int)qual]; - double c,d; double[] metrics = new double[3]; // update match array @@ -339,10 +370,11 @@ public class PairHMMIndelErrorModel { // update X array // State X(i,j): X(1:i) aligned to a gap in Y(1:j). // When in last column of X, ie X(1:i) aligned to full Y, we don't want to penalize gaps - c = (indJ==Y_METRIC_LENGTH-1? 0: logGapOpenProbability); - d = (indJ==Y_METRIC_LENGTH-1? 0: logGapContinuationProbability); - metrics[MATCH_OFFSET] = matchMetricArray[indI-1][indJ] + c; - metrics[X_OFFSET] = XMetricArray[indI-1][indJ] + d; + Pair p = getGapPenalties(indI, indJ, X_METRIC_LENGTH, + Y_METRIC_LENGTH, X_OFFSET); + + metrics[MATCH_OFFSET] = matchMetricArray[indI-1][indJ] + p.first; + metrics[X_OFFSET] = XMetricArray[indI-1][indJ] + p.second; metrics[Y_OFFSET] = Double.NEGATIVE_INFINITY; //YMetricArray[indI-1][indJ] + logGapOpenProbability; if (doViterbi) { @@ -356,12 +388,12 @@ public class PairHMMIndelErrorModel { bestActionArrayX[indI][indJ] = ACTIONS_X[bestMetricIdx]; // update Y array - c = (indI==X_METRIC_LENGTH-1? 0: logGapOpenProbability); - d = (indI==X_METRIC_LENGTH-1? 0: logGapContinuationProbability); + p = getGapPenalties(indI, indJ, X_METRIC_LENGTH, + Y_METRIC_LENGTH, Y_OFFSET); - metrics[MATCH_OFFSET] = matchMetricArray[indI][indJ-1] + c; + metrics[MATCH_OFFSET] = matchMetricArray[indI][indJ-1] + p.first; metrics[X_OFFSET] = Double.NEGATIVE_INFINITY; //XMetricArray[indI][indJ-1] + logGapOpenProbability; - metrics[Y_OFFSET] = YMetricArray[indI][indJ-1] + d; + metrics[Y_OFFSET] = YMetricArray[indI][indJ-1] + p.second; if (doViterbi) { bestMetricIdx = MathUtils.maxElementIndex(metrics); @@ -437,11 +469,7 @@ public class PairHMMIndelErrorModel { i--; j--; } - /* if (j<0 || i < 0) - { - int k=1; - }*/ - } + } @@ -687,87 +715,5 @@ public class PairHMMIndelErrorModel { } - /* Retrieves bases and qualities from a read that align to a start and stop position within a given contig */ - public static Pair getBasesBetweenPositions(SAMRecord read, long startPos, long endPos ) { - - final Cigar cigar = read.getCigar(); - final byte[] bases = read.getReadBases(); - final byte[] quals = read.getBaseQualities(); - // final byte[] alignment = new byte[alignmentLength]; - long alignStartPos = startPos; - long readStartPos = read.getUnclippedStart(); - - if (alignStartPos < readStartPos ) - alignStartPos = readStartPos; - - long alignEndPos = endPos; - long readEndPos = read.getUnclippedEnd(); - - if (alignEndPos > readEndPos ) - alignEndPos = readEndPos; - - // We need to now locate read bases between alignStartPos and alignEndPos - - //first see how many bytes will we require for output - final byte[] readAlignment = new byte[bases.length]; - final byte[] qualAlignment = new byte[bases.length]; - - int outPos = 0; - int readPos = 0; - int alignPos = 0; - for ( int iii = 0 ; iii < cigar.numCigarElements() ; iii++ ) { - - final CigarElement ce = cigar.getCigarElement(iii); - final int elementLength = ce.getLength(); - - switch( ce.getOperator() ) { - case I: - for ( int jjj = 0; jjj < elementLength; jjj++ ) { - // check if insertion falls before window we're looking for - if (readStartPos +readPos >= alignStartPos) { - readAlignment[outPos] = bases[readPos]; - qualAlignment[outPos] = quals[readPos]; - outPos++; - } - readPos++; - } - break; - case D: - case N: - for ( int jjj = 0; jjj < elementLength; jjj++ ) { - alignPos++; - if (alignStartPos + alignPos > alignEndPos) - break; - } - break; - case S: - case M: - for ( int jjj = 0; jjj < elementLength; jjj++ ) { - if (readStartPos +readPos >= alignStartPos) { - readAlignment[outPos] = bases[readPos]; - qualAlignment[outPos] = quals[readPos]; - outPos++; - alignPos++; - } - - readPos++; - - if (alignStartPos + alignPos > alignEndPos) - break; - } - break; - case H: - case P: - break; - default: - throw new ReviewedStingException( "Unsupported cigar operator: " + ce.getOperator() ); - } - // if we're already beyong of edge of window of interest, nothing more to do - if (alignStartPos + alignPos > alignEndPos) - break; - - } - return new Pair(Arrays.copyOf(readAlignment,outPos), Arrays.copyOf(qualAlignment,outPos)); - } - + }