HaplotypeCaller now use an excessive number of high quality soft clips as a triggering signal in order to capture both end points of a large deletion in a single active region.

This commit is contained in:
Ryan Poplin 2012-07-27 12:44:02 -04:00
parent a0890126a8
commit 22bb4804f0
5 changed files with 60 additions and 8 deletions

View File

@ -317,7 +317,7 @@ public class GenotypingEngine {
}
protected void mergeConsecutiveEventsBasedOnLD( final ArrayList<Haplotype> haplotypes, final TreeSet<Integer> startPosKeySet, final byte[] ref, final GenomeLoc refLoc ) {
final int MAX_SIZE_TO_COMBINE = 10;
final int MAX_SIZE_TO_COMBINE = 15;
final double MERGE_EVENTS_R2_THRESHOLD = 0.95;
if( startPosKeySet.size() <= 1 ) { return; }

View File

@ -57,6 +57,7 @@ import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.fragments.FragmentCollection;
import org.broadinstitute.sting.utils.fragments.FragmentUtils;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.variantcontext.*;
@ -104,7 +105,7 @@ import java.util.*;
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
@PartitionBy(PartitionType.LOCUS)
@ActiveRegionExtension(extension=65, maxRegion=275)
@ActiveRegionExtension(extension=65, maxRegion=300)
public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implements AnnotatorCompatible {
/**
@ -329,6 +330,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
final Map<String, AlignmentContext> splitContexts = AlignmentContextUtils.splitContextBySampleName(context);
final GenotypesContext genotypes = GenotypesContext.create(splitContexts.keySet().size());
final MathUtils.RunningAverage averageHQSoftClips = new MathUtils.RunningAverage();
for( final String sample : splitContexts.keySet() ) {
final double[] genotypeLikelihoods = new double[3]; // ref versus non-ref (any event)
Arrays.fill(genotypeLikelihoods, 0.0);
@ -349,6 +351,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
if( p.getBase() != ref.getBase() || p.isDeletion() || p.isBeforeDeletedBase() || p.isAfterDeletedBase() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip() ) {
AA = 2;
BB = 0;
if( p.isNextToSoftClip() ) {
averageHQSoftClips.add(AlignmentUtils.calcNumHighQualitySoftClips(p.getRead(), (byte) 28));
}
}
}
genotypeLikelihoods[AA] += QualityUtils.qualToProbLog10(qual);
@ -363,7 +368,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
alleles.add( FAKE_REF_ALLELE );
alleles.add( FAKE_ALT_ALLELE );
final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL);
return new ActivityProfileResult( vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() ) );
final double isActiveProb = vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() );
return new ActivityProfileResult( isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileResult.ActivityProfileResultState.NONE, averageHQSoftClips.mean() );
}
//---------------------------------------------------------------------------------------------------------------

View File

@ -103,7 +103,7 @@ public class LikelihoodCalculationEngine {
readQuals[kkk] = ( readQuals[kkk] > (byte) read.getMappingQuality() ? (byte) read.getMappingQuality() : readQuals[kkk] ); // cap base quality by mapping quality
//readQuals[kkk] = ( readQuals[kkk] > readInsQuals[kkk] ? readInsQuals[kkk] : readQuals[kkk] ); // cap base quality by base insertion quality, needs to be evaluated
//readQuals[kkk] = ( readQuals[kkk] > readDelQuals[kkk] ? readDelQuals[kkk] : readQuals[kkk] ); // cap base quality by base deletion quality, needs to be evaluated
readQuals[kkk] = ( readQuals[kkk] < (byte) 17 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] );
readQuals[kkk] = ( readQuals[kkk] < (byte) 18 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] );
}
for( int jjj = 0; jjj < numHaplotypes; jjj++ ) {
@ -311,7 +311,7 @@ public class LikelihoodCalculationEngine {
int hap1 = 0;
int hap2 = 0;
//double bestElement = Double.NEGATIVE_INFINITY;
final int maxChosenHaplotypes = Math.min( 8, sampleKeySet.size() * 2 + 1 );
final int maxChosenHaplotypes = Math.min( 9, sampleKeySet.size() * 2 + 1 );
while( bestHaplotypesIndexList.size() < maxChosenHaplotypes ) {
double maxElement = Double.NEGATIVE_INFINITY;
for( int iii = 0; iii < numHaplotypes; iii++ ) {

View File

@ -47,13 +47,13 @@ public class ActivityProfile {
GenomeLoc regionStartLoc = null;
final List<ActivityProfileResult> isActiveList;
private GenomeLoc lastLoc = null;
private static final int FILTER_SIZE = 65;
private static final int FILTER_SIZE = 80;
private static final double[] GaussianKernel;
static {
GaussianKernel = new double[2*FILTER_SIZE + 1];
for( int iii = 0; iii < 2*FILTER_SIZE + 1; iii++ ) {
GaussianKernel[iii] = MathUtils.NormalDistribution(FILTER_SIZE, 40.0, iii);
GaussianKernel[iii] = MathUtils.NormalDistribution(FILTER_SIZE, 55.0, iii);
}
}
@ -97,6 +97,16 @@ public class ActivityProfile {
for( final ActivityProfileResult result : isActiveList ) {
activeProbArray[iii++] = result.isActiveProb;
}
iii = 0;
for( final ActivityProfileResult result : isActiveList ) {
if( result.resultState.equals(ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS) ) { // special code to deal with the problem that high quality soft clipped bases aren't added to pileups
final int numHQClips = result.resultValue.intValue();
for( int jjj = Math.max(0, iii - numHQClips); jjj < Math.min(activeProbArray.length, iii+numHQClips); jjj++ ) {
activeProbArray[jjj] = Math.max(activeProbArray[jjj], activeProbArray[iii]);
}
}
iii++;
}
final double[] filteredProbArray = new double[activeProbArray.length];
if( !presetRegions ) {
for( iii = 0; iii < activeProbArray.length; iii++ ) {
@ -170,7 +180,7 @@ public class ActivityProfile {
int cutPoint = -1;
final int size = curEnd - curStart + 1;
for( int iii = curStart + (int)(size*0.25); iii < curEnd - (int)(size*0.25); iii++ ) {
for( int iii = curStart + (int)(size*0.15); iii < curEnd - (int)(size*0.15); iii++ ) {
if( isActiveList.get(iii).isActiveProb < minProb ) { minProb = isActiveList.get(iii).isActiveProb; cutPoint = iii; }
}
final List<ActiveRegion> leftList = createActiveRegion(isActive, curStart, cutPoint, activeRegionExtension, maxRegionSize, new ArrayList<ActiveRegion>());

View File

@ -30,6 +30,7 @@ import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -405,6 +406,40 @@ public class AlignmentUtils {
return alignment;
}
public static int calcNumHighQualitySoftClips( final GATKSAMRecord read, final byte qualThreshold ) {
int numHQSoftClips = 0;
int alignPos = 0;
final Cigar cigar = read.getCigar();
final byte[] qual = read.getBaseQualities( EventType.BASE_SUBSTITUTION );
for( int iii = 0; iii < cigar.numCigarElements(); iii++ ) {
final CigarElement ce = cigar.getCigarElement(iii);
final int elementLength = ce.getLength();
switch( ce.getOperator() ) {
case S:
for( int jjj = 0; jjj < elementLength; jjj++ ) {
if( qual[alignPos++] > qualThreshold ) { numHQSoftClips++; }
}
break;
case M:
case I:
alignPos += elementLength;
break;
case H:
case P:
case D:
case N:
break;
default:
throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator());
}
}
return numHQSoftClips;
}
public static int calcAlignmentByteArrayOffset(final Cigar cigar, final PileupElement pileupElement, final int alignmentStart, final int refLocus) {
return calcAlignmentByteArrayOffset( cigar, pileupElement.getOffset(), pileupElement.isInsertionAtBeginningOfRead(), pileupElement.isDeletion(), alignmentStart, refLocus );
}