diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index e2445e926..6ea735ec0 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -317,7 +317,7 @@ public class GenotypingEngine { } protected void mergeConsecutiveEventsBasedOnLD( final ArrayList haplotypes, final TreeSet startPosKeySet, final byte[] ref, final GenomeLoc refLoc ) { - final int MAX_SIZE_TO_COMBINE = 10; + final int MAX_SIZE_TO_COMBINE = 15; final double MERGE_EVENTS_R2_THRESHOLD = 0.95; if( startPosKeySet.size() <= 1 ) { return; } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index a7c2ff23c..14ea17483 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -57,6 +57,7 @@ import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.fragments.FragmentCollection; import org.broadinstitute.sting.utils.fragments.FragmentUtils; import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.variantcontext.*; @@ -104,7 +105,7 @@ import java.util.*; @DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} ) @PartitionBy(PartitionType.LOCUS) -@ActiveRegionExtension(extension=65, maxRegion=275) +@ActiveRegionExtension(extension=65, maxRegion=300) public class HaplotypeCaller extends ActiveRegionWalker implements AnnotatorCompatible { /** @@ -329,6 +330,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem final Map splitContexts = AlignmentContextUtils.splitContextBySampleName(context); final GenotypesContext genotypes = GenotypesContext.create(splitContexts.keySet().size()); + final MathUtils.RunningAverage averageHQSoftClips = new MathUtils.RunningAverage(); for( final String sample : splitContexts.keySet() ) { final double[] genotypeLikelihoods = new double[3]; // ref versus non-ref (any event) Arrays.fill(genotypeLikelihoods, 0.0); @@ -349,6 +351,9 @@ public class HaplotypeCaller extends ActiveRegionWalker implem if( p.getBase() != ref.getBase() || p.isDeletion() || p.isBeforeDeletedBase() || p.isAfterDeletedBase() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip() ) { AA = 2; BB = 0; + if( p.isNextToSoftClip() ) { + averageHQSoftClips.add(AlignmentUtils.calcNumHighQualitySoftClips(p.getRead(), (byte) 28)); + } } } genotypeLikelihoods[AA] += QualityUtils.qualToProbLog10(qual); @@ -363,7 +368,9 @@ public class HaplotypeCaller extends ActiveRegionWalker implem alleles.add( FAKE_REF_ALLELE ); alleles.add( FAKE_ALT_ALLELE ); final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL); - return new ActivityProfileResult( vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() ) ); + final double isActiveProb = vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() ); + + return new ActivityProfileResult( isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileResult.ActivityProfileResultState.NONE, averageHQSoftClips.mean() ); } //--------------------------------------------------------------------------------------------------------------- diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index 535508d09..365459882 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -103,7 +103,7 @@ public class LikelihoodCalculationEngine { readQuals[kkk] = ( readQuals[kkk] > (byte) read.getMappingQuality() ? (byte) read.getMappingQuality() : readQuals[kkk] ); // cap base quality by mapping quality //readQuals[kkk] = ( readQuals[kkk] > readInsQuals[kkk] ? readInsQuals[kkk] : readQuals[kkk] ); // cap base quality by base insertion quality, needs to be evaluated //readQuals[kkk] = ( readQuals[kkk] > readDelQuals[kkk] ? readDelQuals[kkk] : readQuals[kkk] ); // cap base quality by base deletion quality, needs to be evaluated - readQuals[kkk] = ( readQuals[kkk] < (byte) 17 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] ); + readQuals[kkk] = ( readQuals[kkk] < (byte) 18 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] ); } for( int jjj = 0; jjj < numHaplotypes; jjj++ ) { @@ -311,7 +311,7 @@ public class LikelihoodCalculationEngine { int hap1 = 0; int hap2 = 0; //double bestElement = Double.NEGATIVE_INFINITY; - final int maxChosenHaplotypes = Math.min( 8, sampleKeySet.size() * 2 + 1 ); + final int maxChosenHaplotypes = Math.min( 9, sampleKeySet.size() * 2 + 1 ); while( bestHaplotypesIndexList.size() < maxChosenHaplotypes ) { double maxElement = Double.NEGATIVE_INFINITY; for( int iii = 0; iii < numHaplotypes; iii++ ) { diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java index 1c91551db..7e736b7bf 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java @@ -47,13 +47,13 @@ public class ActivityProfile { GenomeLoc regionStartLoc = null; final List isActiveList; private GenomeLoc lastLoc = null; - private static final int FILTER_SIZE = 65; + private static final int FILTER_SIZE = 80; private static final double[] GaussianKernel; static { GaussianKernel = new double[2*FILTER_SIZE + 1]; for( int iii = 0; iii < 2*FILTER_SIZE + 1; iii++ ) { - GaussianKernel[iii] = MathUtils.NormalDistribution(FILTER_SIZE, 40.0, iii); + GaussianKernel[iii] = MathUtils.NormalDistribution(FILTER_SIZE, 55.0, iii); } } @@ -97,6 +97,16 @@ public class ActivityProfile { for( final ActivityProfileResult result : isActiveList ) { activeProbArray[iii++] = result.isActiveProb; } + iii = 0; + for( final ActivityProfileResult result : isActiveList ) { + if( result.resultState.equals(ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS) ) { // special code to deal with the problem that high quality soft clipped bases aren't added to pileups + final int numHQClips = result.resultValue.intValue(); + for( int jjj = Math.max(0, iii - numHQClips); jjj < Math.min(activeProbArray.length, iii+numHQClips); jjj++ ) { + activeProbArray[jjj] = Math.max(activeProbArray[jjj], activeProbArray[iii]); + } + } + iii++; + } final double[] filteredProbArray = new double[activeProbArray.length]; if( !presetRegions ) { for( iii = 0; iii < activeProbArray.length; iii++ ) { @@ -170,7 +180,7 @@ public class ActivityProfile { int cutPoint = -1; final int size = curEnd - curStart + 1; - for( int iii = curStart + (int)(size*0.25); iii < curEnd - (int)(size*0.25); iii++ ) { + for( int iii = curStart + (int)(size*0.15); iii < curEnd - (int)(size*0.15); iii++ ) { if( isActiveList.get(iii).isActiveProb < minProb ) { minProb = isActiveList.get(iii).isActiveProb; cutPoint = iii; } } final List leftList = createActiveRegion(isActive, curStart, cutPoint, activeRegionExtension, maxRegionSize, new ArrayList()); diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java index e5e747c2d..895de3578 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java @@ -30,6 +30,7 @@ import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.walkers.bqsr.EventType; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -405,6 +406,40 @@ public class AlignmentUtils { return alignment; } + public static int calcNumHighQualitySoftClips( final GATKSAMRecord read, final byte qualThreshold ) { + + int numHQSoftClips = 0; + int alignPos = 0; + final Cigar cigar = read.getCigar(); + final byte[] qual = read.getBaseQualities( EventType.BASE_SUBSTITUTION ); + + for( int iii = 0; iii < cigar.numCigarElements(); iii++ ) { + + final CigarElement ce = cigar.getCigarElement(iii); + final int elementLength = ce.getLength(); + + switch( ce.getOperator() ) { + case S: + for( int jjj = 0; jjj < elementLength; jjj++ ) { + if( qual[alignPos++] > qualThreshold ) { numHQSoftClips++; } + } + break; + case M: + case I: + alignPos += elementLength; + break; + case H: + case P: + case D: + case N: + break; + default: + throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator()); + } + } + return numHQSoftClips; + } + public static int calcAlignmentByteArrayOffset(final Cigar cigar, final PileupElement pileupElement, final int alignmentStart, final int refLocus) { return calcAlignmentByteArrayOffset( cigar, pileupElement.getOffset(), pileupElement.isInsertionAtBeginningOfRead(), pileupElement.isDeletion(), alignmentStart, refLocus ); }