Low level plumbing work required to have a context dependent error model with the new indel probabilistic alignment model. This just adds an extra input argument and does some refactoring so that when an actual model is ready it will be easy to plug in.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5427 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
e35a67b3cc
commit
8ca3390ee0
|
|
@ -31,6 +31,7 @@ import org.broad.tribble.util.variantcontext.VariantContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel;
|
||||
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
|
|
@ -71,7 +72,7 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo
|
|||
super(UAC, logger);
|
||||
if (UAC.newlike) {
|
||||
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY,UAC.INDEL_GAP_CONTINUATION_PENALTY,
|
||||
UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.S1, UAC.S2, UAC.dovit);
|
||||
UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.DO_CONTEXT_DEPENDENT_PENALTIES, UAC.S1, UAC.S2, UAC.dovit);
|
||||
newLike = true;
|
||||
}
|
||||
else
|
||||
|
|
@ -88,7 +89,7 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo
|
|||
private ArrayList<Allele> computeConsensusAlleles(ReferenceContext ref,
|
||||
Map<String, StratifiedAlignmentContext> contexts,
|
||||
StratifiedAlignmentContext.StratifiedContextType contextType) {
|
||||
Allele refAllele, altAllele;
|
||||
Allele refAllele=null, altAllele=null;
|
||||
GenomeLoc loc = ref.getLocus();
|
||||
ArrayList<Allele> aList = new ArrayList<Allele>();
|
||||
|
||||
|
|
@ -235,19 +236,33 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo
|
|||
// get ref bases of accurate deletion
|
||||
int startIdxInReference = (int)(1+loc.getStart()-ref.getWindow().getStart());
|
||||
|
||||
//System.out.println(new String(ref.getBases()));
|
||||
byte[] refBases = Arrays.copyOfRange(ref.getBases(),startIdxInReference,startIdxInReference+dLen);
|
||||
refAllele = Allele.create(refBases,true);
|
||||
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
|
||||
boolean ok = true;
|
||||
for (int i=0; i < refBases.length; i++)
|
||||
if (!BaseUtils.isRegularBase(refBases[i]))
|
||||
ok = false;
|
||||
|
||||
if (ok) {
|
||||
refAllele = Allele.create(refBases,true);
|
||||
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// insertion case
|
||||
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
||||
altAllele = Allele.create(bestAltAllele, false);
|
||||
boolean ok = true;
|
||||
for (int i=0; i < bestAltAllele.length(); i++)
|
||||
if (!BaseUtils.isRegularBase(bestAltAllele.getBytes()[i]))
|
||||
ok = false;
|
||||
if (ok) {
|
||||
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
||||
altAllele = Allele.create(bestAltAllele, false);
|
||||
}
|
||||
}
|
||||
if (refAllele != null && altAllele != null) {
|
||||
aList.add(0,refAllele);
|
||||
aList.add(1,altAllele);
|
||||
}
|
||||
|
||||
aList.add(0,refAllele);
|
||||
aList.add(1,altAllele);
|
||||
|
||||
return aList;
|
||||
|
||||
}
|
||||
|
|
@ -310,6 +325,9 @@ public class DindelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoo
|
|||
if ( !(priors instanceof DiploidIndelGenotypePriors) )
|
||||
throw new StingException("Only diploid-based Indel priors are supported in the DINDEL GL model");
|
||||
|
||||
if (alleleList.isEmpty())
|
||||
return null;
|
||||
|
||||
refAllele = alleleList.get(0);
|
||||
altAllele = alleleList.get(1);
|
||||
int eventLength = refAllele.getBaseString().length() - altAllele.getBaseString().length();
|
||||
|
|
|
|||
|
|
@ -114,6 +114,8 @@ public class UnifiedArgumentCollection {
|
|||
|
||||
@Argument(fullName = "indelHaplotypeSize", shortName = "indelHSize", doc = "Indel haplotype size", required = false)
|
||||
public int INDEL_HAPLOTYPE_SIZE = 80;
|
||||
@Argument(fullName = "doContextDependentGapPenalties", shortName = "doCDP", doc = "Vary gap penalties by context", required = false)
|
||||
public boolean DO_CONTEXT_DEPENDENT_PENALTIES = false;
|
||||
//gdebug+
|
||||
@Hidden
|
||||
@Argument(fullName = "indelDebug", shortName = "indelDebug", doc = "Output indel debug info", required = false)
|
||||
|
|
@ -167,7 +169,8 @@ public class UnifiedArgumentCollection {
|
|||
uac.INDEL_GAP_CONTINUATION_PENALTY = INDEL_GAP_CONTINUATION_PENALTY;
|
||||
uac.OUTPUT_DEBUG_INDEL_INFO = OUTPUT_DEBUG_INDEL_INFO;
|
||||
uac.INDEL_HAPLOTYPE_SIZE = INDEL_HAPLOTYPE_SIZE;
|
||||
|
||||
uac.DO_CONTEXT_DEPENDENT_PENALTIES = DO_CONTEXT_DEPENDENT_PENALTIES;
|
||||
|
||||
// todo- arguments to remove
|
||||
uac.COVERAGE_AT_WHICH_TO_ABORT = COVERAGE_AT_WHICH_TO_ABORT;
|
||||
uac.INSERTION_START_PROBABILITY = INSERTION_START_PROBABILITY;
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ import org.broadinstitute.sting.utils.MathUtils;
|
|||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.Haplotype;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
|
@ -95,6 +96,7 @@ public class PairHMMIndelErrorModel {
|
|||
private boolean doViterbi = false;
|
||||
private final boolean useSoftClippedBases = false;
|
||||
private final boolean useAffineGapModel = true;
|
||||
private boolean doContextDependentPenalties = false;
|
||||
|
||||
private String s1;
|
||||
private String s2;
|
||||
|
|
@ -117,19 +119,19 @@ public class PairHMMIndelErrorModel {
|
|||
}
|
||||
}
|
||||
|
||||
public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, String s1, String s2, boolean dovit) {
|
||||
this(indelGOP, indelGCP, deb);
|
||||
public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean doCDP, String s1, String s2, boolean dovit) {
|
||||
this(indelGOP, indelGCP, deb, doCDP);
|
||||
this.s1 = s1;
|
||||
this.s2 = s2;
|
||||
this.doViterbi = dovit;
|
||||
}
|
||||
|
||||
public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb) {
|
||||
public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean doCDP) {
|
||||
|
||||
|
||||
this.logGapOpenProbability = -indelGOP/10.0; // QUAL to log prob
|
||||
this.logGapContinuationProbability = -indelGCP/10.0; // QUAL to log prob
|
||||
|
||||
this.doContextDependentPenalties = doCDP;
|
||||
this.DEBUG = deb;
|
||||
|
||||
|
||||
|
|
@ -264,6 +266,36 @@ public class PairHMMIndelErrorModel {
|
|||
|
||||
}
|
||||
|
||||
private Pair<Double,Double> getGapPenalties(final int indI, final int indJ, final int X_METRIC_LENGTH,
|
||||
final int Y_METRIC_LENGTH, final int tableToUpdate) {
|
||||
|
||||
double c=0.0,d=0.0;
|
||||
|
||||
if (doContextDependentPenalties) {
|
||||
// todo- fill me!!
|
||||
} else {
|
||||
switch (tableToUpdate) {
|
||||
case MATCH_OFFSET:
|
||||
|
||||
break;
|
||||
case X_OFFSET:
|
||||
c = (indJ==Y_METRIC_LENGTH-1? 0: logGapOpenProbability);
|
||||
d = (indJ==Y_METRIC_LENGTH-1? 0: logGapContinuationProbability);
|
||||
|
||||
break;
|
||||
|
||||
case Y_OFFSET:
|
||||
c = (indI==X_METRIC_LENGTH-1? 0: logGapOpenProbability);
|
||||
d = (indI==X_METRIC_LENGTH-1? 0: logGapContinuationProbability);
|
||||
|
||||
break;
|
||||
default:
|
||||
throw new StingException("BUG!! Invalid table offset");
|
||||
}
|
||||
}
|
||||
return new Pair<Double,Double>(Double.valueOf(c),Double.valueOf(d));
|
||||
}
|
||||
|
||||
public double computeReadLikelihoodGivenHaplotypeAffineGaps(byte[] haplotypeBases, byte[] readBases, byte[] readQuals) {
|
||||
|
||||
final int X_METRIC_LENGTH = readBases.length+1;
|
||||
|
|
@ -318,7 +350,6 @@ public class PairHMMIndelErrorModel {
|
|||
double pBaseRead = (x == y)? baseMatchArray[(int)qual]:baseMismatchArray[(int)qual];
|
||||
|
||||
|
||||
double c,d;
|
||||
double[] metrics = new double[3];
|
||||
|
||||
// update match array
|
||||
|
|
@ -339,10 +370,11 @@ public class PairHMMIndelErrorModel {
|
|||
// update X array
|
||||
// State X(i,j): X(1:i) aligned to a gap in Y(1:j).
|
||||
// When in last column of X, ie X(1:i) aligned to full Y, we don't want to penalize gaps
|
||||
c = (indJ==Y_METRIC_LENGTH-1? 0: logGapOpenProbability);
|
||||
d = (indJ==Y_METRIC_LENGTH-1? 0: logGapContinuationProbability);
|
||||
metrics[MATCH_OFFSET] = matchMetricArray[indI-1][indJ] + c;
|
||||
metrics[X_OFFSET] = XMetricArray[indI-1][indJ] + d;
|
||||
Pair<Double,Double> p = getGapPenalties(indI, indJ, X_METRIC_LENGTH,
|
||||
Y_METRIC_LENGTH, X_OFFSET);
|
||||
|
||||
metrics[MATCH_OFFSET] = matchMetricArray[indI-1][indJ] + p.first;
|
||||
metrics[X_OFFSET] = XMetricArray[indI-1][indJ] + p.second;
|
||||
metrics[Y_OFFSET] = Double.NEGATIVE_INFINITY; //YMetricArray[indI-1][indJ] + logGapOpenProbability;
|
||||
|
||||
if (doViterbi) {
|
||||
|
|
@ -356,12 +388,12 @@ public class PairHMMIndelErrorModel {
|
|||
bestActionArrayX[indI][indJ] = ACTIONS_X[bestMetricIdx];
|
||||
|
||||
// update Y array
|
||||
c = (indI==X_METRIC_LENGTH-1? 0: logGapOpenProbability);
|
||||
d = (indI==X_METRIC_LENGTH-1? 0: logGapContinuationProbability);
|
||||
p = getGapPenalties(indI, indJ, X_METRIC_LENGTH,
|
||||
Y_METRIC_LENGTH, Y_OFFSET);
|
||||
|
||||
metrics[MATCH_OFFSET] = matchMetricArray[indI][indJ-1] + c;
|
||||
metrics[MATCH_OFFSET] = matchMetricArray[indI][indJ-1] + p.first;
|
||||
metrics[X_OFFSET] = Double.NEGATIVE_INFINITY; //XMetricArray[indI][indJ-1] + logGapOpenProbability;
|
||||
metrics[Y_OFFSET] = YMetricArray[indI][indJ-1] + d;
|
||||
metrics[Y_OFFSET] = YMetricArray[indI][indJ-1] + p.second;
|
||||
|
||||
if (doViterbi) {
|
||||
bestMetricIdx = MathUtils.maxElementIndex(metrics);
|
||||
|
|
@ -437,11 +469,7 @@ public class PairHMMIndelErrorModel {
|
|||
i--; j--;
|
||||
}
|
||||
|
||||
/* if (j<0 || i < 0)
|
||||
{
|
||||
int k=1;
|
||||
}*/
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
@ -687,87 +715,5 @@ public class PairHMMIndelErrorModel {
|
|||
|
||||
}
|
||||
|
||||
/* Retrieves bases and qualities from a read that align to a start and stop position within a given contig */
|
||||
public static Pair<byte[],byte[]> getBasesBetweenPositions(SAMRecord read, long startPos, long endPos ) {
|
||||
|
||||
final Cigar cigar = read.getCigar();
|
||||
final byte[] bases = read.getReadBases();
|
||||
final byte[] quals = read.getBaseQualities();
|
||||
// final byte[] alignment = new byte[alignmentLength];
|
||||
long alignStartPos = startPos;
|
||||
long readStartPos = read.getUnclippedStart();
|
||||
|
||||
if (alignStartPos < readStartPos )
|
||||
alignStartPos = readStartPos;
|
||||
|
||||
long alignEndPos = endPos;
|
||||
long readEndPos = read.getUnclippedEnd();
|
||||
|
||||
if (alignEndPos > readEndPos )
|
||||
alignEndPos = readEndPos;
|
||||
|
||||
// We need to now locate read bases between alignStartPos and alignEndPos
|
||||
|
||||
//first see how many bytes will we require for output
|
||||
final byte[] readAlignment = new byte[bases.length];
|
||||
final byte[] qualAlignment = new byte[bases.length];
|
||||
|
||||
int outPos = 0;
|
||||
int readPos = 0;
|
||||
int alignPos = 0;
|
||||
for ( int iii = 0 ; iii < cigar.numCigarElements() ; iii++ ) {
|
||||
|
||||
final CigarElement ce = cigar.getCigarElement(iii);
|
||||
final int elementLength = ce.getLength();
|
||||
|
||||
switch( ce.getOperator() ) {
|
||||
case I:
|
||||
for ( int jjj = 0; jjj < elementLength; jjj++ ) {
|
||||
// check if insertion falls before window we're looking for
|
||||
if (readStartPos +readPos >= alignStartPos) {
|
||||
readAlignment[outPos] = bases[readPos];
|
||||
qualAlignment[outPos] = quals[readPos];
|
||||
outPos++;
|
||||
}
|
||||
readPos++;
|
||||
}
|
||||
break;
|
||||
case D:
|
||||
case N:
|
||||
for ( int jjj = 0; jjj < elementLength; jjj++ ) {
|
||||
alignPos++;
|
||||
if (alignStartPos + alignPos > alignEndPos)
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case S:
|
||||
case M:
|
||||
for ( int jjj = 0; jjj < elementLength; jjj++ ) {
|
||||
if (readStartPos +readPos >= alignStartPos) {
|
||||
readAlignment[outPos] = bases[readPos];
|
||||
qualAlignment[outPos] = quals[readPos];
|
||||
outPos++;
|
||||
alignPos++;
|
||||
}
|
||||
|
||||
readPos++;
|
||||
|
||||
if (alignStartPos + alignPos > alignEndPos)
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case H:
|
||||
case P:
|
||||
break;
|
||||
default:
|
||||
throw new ReviewedStingException( "Unsupported cigar operator: " + ce.getOperator() );
|
||||
}
|
||||
// if we're already beyong of edge of window of interest, nothing more to do
|
||||
if (alignStartPos + alignPos > alignEndPos)
|
||||
break;
|
||||
|
||||
}
|
||||
return new Pair(Arrays.copyOf(readAlignment,outPos), Arrays.copyOf(qualAlignment,outPos));
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue