HaplotypeCaller now use an excessive number of high quality soft clips as a triggering signal in order to capture both end points of a large deletion in a single active region.
This commit is contained in:
parent
a0890126a8
commit
22bb4804f0
|
|
@ -317,7 +317,7 @@ public class GenotypingEngine {
|
|||
}
|
||||
|
||||
protected void mergeConsecutiveEventsBasedOnLD( final ArrayList<Haplotype> haplotypes, final TreeSet<Integer> startPosKeySet, final byte[] ref, final GenomeLoc refLoc ) {
|
||||
final int MAX_SIZE_TO_COMBINE = 10;
|
||||
final int MAX_SIZE_TO_COMBINE = 15;
|
||||
final double MERGE_EVENTS_R2_THRESHOLD = 0.95;
|
||||
if( startPosKeySet.size() <= 1 ) { return; }
|
||||
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
|||
import org.broadinstitute.sting.utils.fragments.FragmentCollection;
|
||||
import org.broadinstitute.sting.utils.fragments.FragmentUtils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
|
@ -104,7 +105,7 @@ import java.util.*;
|
|||
|
||||
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@PartitionBy(PartitionType.LOCUS)
|
||||
@ActiveRegionExtension(extension=65, maxRegion=275)
|
||||
@ActiveRegionExtension(extension=65, maxRegion=300)
|
||||
public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implements AnnotatorCompatible {
|
||||
|
||||
/**
|
||||
|
|
@ -329,6 +330,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
|
||||
final Map<String, AlignmentContext> splitContexts = AlignmentContextUtils.splitContextBySampleName(context);
|
||||
final GenotypesContext genotypes = GenotypesContext.create(splitContexts.keySet().size());
|
||||
final MathUtils.RunningAverage averageHQSoftClips = new MathUtils.RunningAverage();
|
||||
for( final String sample : splitContexts.keySet() ) {
|
||||
final double[] genotypeLikelihoods = new double[3]; // ref versus non-ref (any event)
|
||||
Arrays.fill(genotypeLikelihoods, 0.0);
|
||||
|
|
@ -349,6 +351,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
if( p.getBase() != ref.getBase() || p.isDeletion() || p.isBeforeDeletedBase() || p.isAfterDeletedBase() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip() ) {
|
||||
AA = 2;
|
||||
BB = 0;
|
||||
if( p.isNextToSoftClip() ) {
|
||||
averageHQSoftClips.add(AlignmentUtils.calcNumHighQualitySoftClips(p.getRead(), (byte) 28));
|
||||
}
|
||||
}
|
||||
}
|
||||
genotypeLikelihoods[AA] += QualityUtils.qualToProbLog10(qual);
|
||||
|
|
@ -363,7 +368,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
alleles.add( FAKE_REF_ALLELE );
|
||||
alleles.add( FAKE_ALT_ALLELE );
|
||||
final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL);
|
||||
return new ActivityProfileResult( vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() ) );
|
||||
final double isActiveProb = vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() );
|
||||
|
||||
return new ActivityProfileResult( isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileResult.ActivityProfileResultState.NONE, averageHQSoftClips.mean() );
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -103,7 +103,7 @@ public class LikelihoodCalculationEngine {
|
|||
readQuals[kkk] = ( readQuals[kkk] > (byte) read.getMappingQuality() ? (byte) read.getMappingQuality() : readQuals[kkk] ); // cap base quality by mapping quality
|
||||
//readQuals[kkk] = ( readQuals[kkk] > readInsQuals[kkk] ? readInsQuals[kkk] : readQuals[kkk] ); // cap base quality by base insertion quality, needs to be evaluated
|
||||
//readQuals[kkk] = ( readQuals[kkk] > readDelQuals[kkk] ? readDelQuals[kkk] : readQuals[kkk] ); // cap base quality by base deletion quality, needs to be evaluated
|
||||
readQuals[kkk] = ( readQuals[kkk] < (byte) 17 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] );
|
||||
readQuals[kkk] = ( readQuals[kkk] < (byte) 18 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] );
|
||||
}
|
||||
|
||||
for( int jjj = 0; jjj < numHaplotypes; jjj++ ) {
|
||||
|
|
@ -311,7 +311,7 @@ public class LikelihoodCalculationEngine {
|
|||
int hap1 = 0;
|
||||
int hap2 = 0;
|
||||
//double bestElement = Double.NEGATIVE_INFINITY;
|
||||
final int maxChosenHaplotypes = Math.min( 8, sampleKeySet.size() * 2 + 1 );
|
||||
final int maxChosenHaplotypes = Math.min( 9, sampleKeySet.size() * 2 + 1 );
|
||||
while( bestHaplotypesIndexList.size() < maxChosenHaplotypes ) {
|
||||
double maxElement = Double.NEGATIVE_INFINITY;
|
||||
for( int iii = 0; iii < numHaplotypes; iii++ ) {
|
||||
|
|
|
|||
|
|
@ -47,13 +47,13 @@ public class ActivityProfile {
|
|||
GenomeLoc regionStartLoc = null;
|
||||
final List<ActivityProfileResult> isActiveList;
|
||||
private GenomeLoc lastLoc = null;
|
||||
private static final int FILTER_SIZE = 65;
|
||||
private static final int FILTER_SIZE = 80;
|
||||
private static final double[] GaussianKernel;
|
||||
|
||||
static {
|
||||
GaussianKernel = new double[2*FILTER_SIZE + 1];
|
||||
for( int iii = 0; iii < 2*FILTER_SIZE + 1; iii++ ) {
|
||||
GaussianKernel[iii] = MathUtils.NormalDistribution(FILTER_SIZE, 40.0, iii);
|
||||
GaussianKernel[iii] = MathUtils.NormalDistribution(FILTER_SIZE, 55.0, iii);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -97,6 +97,16 @@ public class ActivityProfile {
|
|||
for( final ActivityProfileResult result : isActiveList ) {
|
||||
activeProbArray[iii++] = result.isActiveProb;
|
||||
}
|
||||
iii = 0;
|
||||
for( final ActivityProfileResult result : isActiveList ) {
|
||||
if( result.resultState.equals(ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS) ) { // special code to deal with the problem that high quality soft clipped bases aren't added to pileups
|
||||
final int numHQClips = result.resultValue.intValue();
|
||||
for( int jjj = Math.max(0, iii - numHQClips); jjj < Math.min(activeProbArray.length, iii+numHQClips); jjj++ ) {
|
||||
activeProbArray[jjj] = Math.max(activeProbArray[jjj], activeProbArray[iii]);
|
||||
}
|
||||
}
|
||||
iii++;
|
||||
}
|
||||
final double[] filteredProbArray = new double[activeProbArray.length];
|
||||
if( !presetRegions ) {
|
||||
for( iii = 0; iii < activeProbArray.length; iii++ ) {
|
||||
|
|
@ -170,7 +180,7 @@ public class ActivityProfile {
|
|||
int cutPoint = -1;
|
||||
|
||||
final int size = curEnd - curStart + 1;
|
||||
for( int iii = curStart + (int)(size*0.25); iii < curEnd - (int)(size*0.25); iii++ ) {
|
||||
for( int iii = curStart + (int)(size*0.15); iii < curEnd - (int)(size*0.15); iii++ ) {
|
||||
if( isActiveList.get(iii).isActiveProb < minProb ) { minProb = isActiveList.get(iii).isActiveProb; cutPoint = iii; }
|
||||
}
|
||||
final List<ActiveRegion> leftList = createActiveRegion(isActive, curStart, cutPoint, activeRegionExtension, maxRegionSize, new ArrayList<ActiveRegion>());
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ import net.sf.samtools.CigarElement;
|
|||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -405,6 +406,40 @@ public class AlignmentUtils {
|
|||
return alignment;
|
||||
}
|
||||
|
||||
public static int calcNumHighQualitySoftClips( final GATKSAMRecord read, final byte qualThreshold ) {
|
||||
|
||||
int numHQSoftClips = 0;
|
||||
int alignPos = 0;
|
||||
final Cigar cigar = read.getCigar();
|
||||
final byte[] qual = read.getBaseQualities( EventType.BASE_SUBSTITUTION );
|
||||
|
||||
for( int iii = 0; iii < cigar.numCigarElements(); iii++ ) {
|
||||
|
||||
final CigarElement ce = cigar.getCigarElement(iii);
|
||||
final int elementLength = ce.getLength();
|
||||
|
||||
switch( ce.getOperator() ) {
|
||||
case S:
|
||||
for( int jjj = 0; jjj < elementLength; jjj++ ) {
|
||||
if( qual[alignPos++] > qualThreshold ) { numHQSoftClips++; }
|
||||
}
|
||||
break;
|
||||
case M:
|
||||
case I:
|
||||
alignPos += elementLength;
|
||||
break;
|
||||
case H:
|
||||
case P:
|
||||
case D:
|
||||
case N:
|
||||
break;
|
||||
default:
|
||||
throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator());
|
||||
}
|
||||
}
|
||||
return numHQSoftClips;
|
||||
}
|
||||
|
||||
public static int calcAlignmentByteArrayOffset(final Cigar cigar, final PileupElement pileupElement, final int alignmentStart, final int refLocus) {
|
||||
return calcAlignmentByteArrayOffset( cigar, pileupElement.getOffset(), pileupElement.isInsertionAtBeginningOfRead(), pileupElement.isDeletion(), alignmentStart, refLocus );
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue