diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompareBAM.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompareBAM.java index f8c9c5396..9809709a8 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompareBAM.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompareBAM.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.compression.reducereads; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter; @@ -11,6 +12,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.ReadFilters; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.util.HashMap; import java.util.Map; @@ -39,6 +41,7 @@ import java.util.Map; * @since 10/30/11 */ +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) @ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class}) public class CompareBAM extends LocusWalker, CompareBAM.TestResults> { @Argument(required = true, shortName = "rr", fullName = "reduced_readgroup", doc = "The read group ID corresponding to the compressed BAM being tested") public String reducedReadGroupID; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index 678a65024..2787689b5 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -321,7 +321,7 @@ public class GenotypingEngine { } protected void mergeConsecutiveEventsBasedOnLD( final ArrayList haplotypes, final TreeSet startPosKeySet, final byte[] ref, final GenomeLoc refLoc ) { - final int MAX_SIZE_TO_COMBINE = 10; + final int MAX_SIZE_TO_COMBINE = 15; final double MERGE_EVENTS_R2_THRESHOLD = 0.95; if( startPosKeySet.size() <= 1 ) { return; } @@ -338,10 +338,10 @@ public class GenotypingEngine { boolean isBiallelic = true; VariantContext thisVC = null; VariantContext nextVC = null; - int x11 = 0; - int x12 = 0; - int x21 = 0; - int x22 = 0; + double x11 = Double.NEGATIVE_INFINITY; + double x12 = Double.NEGATIVE_INFINITY; + double x21 = Double.NEGATIVE_INFINITY; + double x22 = Double.NEGATIVE_INFINITY; for( final Haplotype h : haplotypes ) { // only make complex substitutions out of consecutive biallelic sites @@ -364,21 +364,24 @@ public class GenotypingEngine { } } // count up the co-occurrences of the events for the R^2 calculation - // BUGBUG: use haplotype likelihoods per-sample to make this more accurate - if( thisHapVC == null ) { - if( nextHapVC == null ) { x11++; } - else { x12++; } - } else { - if( nextHapVC == null ) { x21++; } - else { x22++; } + final ArrayList haplotypeList = new ArrayList(); + haplotypeList.add(h); + for( final String sample : haplotypes.get(0).getSampleKeySet() ) { + final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( haplotypeList, sample )[0][0]; + if( thisHapVC == null ) { + if( nextHapVC == null ) { x11 = MathUtils.approximateLog10SumLog10(x11, haplotypeLikelihood); } + else { x12 = MathUtils.approximateLog10SumLog10(x12, haplotypeLikelihood); } + } else { + if( nextHapVC == null ) { x21 = MathUtils.approximateLog10SumLog10(x21, haplotypeLikelihood); } + else { x22 = MathUtils.approximateLog10SumLog10(x22, haplotypeLikelihood); } + } } } if( thisVC == null || nextVC == null ) { continue; - //throw new ReviewedStingException("StartPos TreeSet has an entry for an event that is found on no haplotype. start pos = " + thisStart + ", next pos = " + nextStart); } if( isBiallelic ) { - final double R2 = calculateR2LD( x11, x12, x21, x22 ); + final double R2 = calculateR2LD( Math.pow(10.0, x11), Math.pow(10.0, x12), Math.pow(10.0, x21), Math.pow(10.0, x22) ); if( DEBUG ) { System.out.println("Found consecutive biallelic events with R^2 = " + String.format("%.4f", R2)); System.out.println("-- " + thisVC); @@ -423,20 +426,21 @@ public class GenotypingEngine { protected static VariantContext createMergedVariantContext( final VariantContext thisVC, final VariantContext nextVC, final byte[] ref, final GenomeLoc refLoc ) { final int thisStart = thisVC.getStart(); final int nextStart = nextVC.getStart(); - byte[] refBases = ( new byte[]{} ); - byte[] altBases = ( new byte[]{} ); + byte[] refBases = new byte[]{}; + byte[] altBases = new byte[]{}; refBases = ArrayUtils.addAll(refBases, thisVC.getReference().getBases()); altBases = ArrayUtils.addAll(altBases, thisVC.getAlternateAllele(0).getBases()); - for( int locus = thisStart + refBases.length; locus < nextStart; locus++ ) { + int locus; + for( locus = thisStart + refBases.length; locus < nextStart; locus++ ) { final byte refByte = ref[locus - refLoc.getStart()]; refBases = ArrayUtils.add(refBases, refByte); altBases = ArrayUtils.add(altBases, refByte); } - refBases = ArrayUtils.addAll(refBases, nextVC.getReference().getBases()); + refBases = ArrayUtils.addAll(refBases, ArrayUtils.subarray(nextVC.getReference().getBases(), locus > nextStart ? 1 : 0, nextVC.getReference().getBases().length)); // special case of deletion including the padding base of consecutive indel altBases = ArrayUtils.addAll(altBases, nextVC.getAlternateAllele(0).getBases()); int iii = 0; - if( refBases.length == altBases.length ) { // special case of insertion + deletion of same length creates an MNP --> trim padding bases off the allele + if( refBases.length == altBases.length ) { // insertion + deletion of same length creates an MNP --> trim common prefix bases off the beginning of the allele while( iii < refBases.length && refBases[iii] == altBases[iii] ) { iii++; } } final ArrayList mergedAlleles = new ArrayList(); @@ -445,12 +449,11 @@ public class GenotypingEngine { return new VariantContextBuilder("merged", thisVC.getChr(), thisVC.getStart() + iii, nextVC.getEnd(), mergedAlleles).make(); } - @Requires({"x11 >= 0", "x12 >= 0", "x21 >= 0", "x22 >= 0"}) - protected static double calculateR2LD( final int x11, final int x12, final int x21, final int x22 ) { - final int total = x11 + x12 + x21 + x22; - final double pa1b1 = ((double) x11) / ((double) total); - final double pa1b2 = ((double) x12) / ((double) total); - final double pa2b1 = ((double) x21) / ((double) total); + protected static double calculateR2LD( final double x11, final double x12, final double x21, final double x22 ) { + final double total = x11 + x12 + x21 + x22; + final double pa1b1 = x11 / total; + final double pa1b2 = x12 / total; + final double pa2b1 = x21 / total; final double pa1 = pa1b1 + pa1b2; final double pb1 = pa1b1 + pa2b1; return ((pa1b1 - pa1*pb1) * (pa1b1 - pa1*pb1)) / ( pa1 * (1.0 - pa1) * pb1 * (1.0 - pb1) ); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index f5f707690..1130feaea 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; import com.google.java.contract.Ensures; import net.sf.picard.reference.IndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.CommandLineGATK; @@ -56,6 +57,7 @@ import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.fragments.FragmentCollection; import org.broadinstitute.sting.utils.fragments.FragmentUtils; import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.variantcontext.*; @@ -103,7 +105,7 @@ import java.util.*; @DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} ) @PartitionBy(PartitionType.LOCUS) -@ActiveRegionExtension(extension=65, maxRegion=275) +@ActiveRegionExtension(extension=65, maxRegion=300) public class HaplotypeCaller extends ActiveRegionWalker implements AnnotatorCompatible { /** @@ -303,8 +305,8 @@ public class HaplotypeCaller extends ActiveRegionWalker implem public boolean wantsNonPrimaryReads() { return true; } @Override - @Ensures({"result >= 0.0", "result <= 1.0"}) - public double isActive( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context ) { + @Ensures({"result.isActiveProb >= 0.0", "result.isActiveProb <= 1.0"}) + public ActivityProfileResult isActive( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context ) { if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { for( final VariantContext vc : tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()) ) { @@ -313,21 +315,22 @@ public class HaplotypeCaller extends ActiveRegionWalker implem } } if( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ) { - return 1.0; + return new ActivityProfileResult(1.0); } } if( USE_ALLELES_TRIGGER ) { - return ( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 ); + return new ActivityProfileResult( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 ); } - if( context == null ) { return 0.0; } + if( context == null ) { return new ActivityProfileResult(0.0); } final List noCall = new ArrayList(); // used to noCall all genotypes until the exact model is applied noCall.add(Allele.NO_CALL); final Map splitContexts = AlignmentContextUtils.splitContextBySampleName(context); final GenotypesContext genotypes = GenotypesContext.create(splitContexts.keySet().size()); + final MathUtils.RunningAverage averageHQSoftClips = new MathUtils.RunningAverage(); for( final String sample : splitContexts.keySet() ) { final double[] genotypeLikelihoods = new double[3]; // ref versus non-ref (any event) Arrays.fill(genotypeLikelihoods, 0.0); @@ -348,6 +351,9 @@ public class HaplotypeCaller extends ActiveRegionWalker implem if( p.getBase() != ref.getBase() || p.isDeletion() || p.isBeforeDeletedBase() || p.isAfterDeletedBase() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip() ) { AA = 2; BB = 0; + if( p.isNextToSoftClip() ) { + averageHQSoftClips.add(AlignmentUtils.calcNumHighQualitySoftClips(p.getRead(), (byte) 28)); + } } } genotypeLikelihoods[AA] += QualityUtils.qualToProbLog10(qual); @@ -362,7 +368,9 @@ public class HaplotypeCaller extends ActiveRegionWalker implem alleles.add( FAKE_REF_ALLELE ); alleles.add( FAKE_ALT_ALLELE ); final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL); - return ( vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() ) ); + final double isActiveProb = vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() ); + + return new ActivityProfileResult( isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileResult.ActivityProfileResultState.NONE, averageHQSoftClips.mean() ); } //--------------------------------------------------------------------------------------------------------------- @@ -407,7 +415,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem likelihoodCalculationEngine.computeReadLikelihoods( haplotypes, perSampleReadList ); // subset down to only the best haplotypes to be genotyped in all samples ( in GGA mode use all discovered haplotypes ) - final ArrayList bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? likelihoodCalculationEngine.selectBestHaplotypes( haplotypes ) : haplotypes ); + final ArrayList bestHaplotypes = haplotypes;// ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? likelihoodCalculationEngine.selectBestHaplotypes( haplotypes ) : haplotypes ); for( final Pair>> callResult : ( GENOTYPE_FULL_ACTIVE_REGION && UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES @@ -488,7 +496,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem //--------------------------------------------------------------------------------------------------------------- private void finalizeActiveRegion( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) { - if( DEBUG ) { System.out.println("\nAssembling " + activeRegion.getExtendedLoc() + " with " + activeRegion.size() + " reads:"); } + if( DEBUG ) { System.out.println("\nAssembling " + activeRegion.getLocation() + " with " + activeRegion.size() + " reads: (with overlap region = " + activeRegion.getExtendedLoc() + ")"); } final ArrayList finalizedReadList = new ArrayList(); final FragmentCollection fragmentCollection = FragmentUtils.create( ReadUtils.sortReadsByCoordinate(activeRegion.getReads()) ); activeRegion.clearReads(); @@ -517,7 +525,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem private List filterNonPassingReads( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) { final ArrayList readsToRemove = new ArrayList(); for( final GATKSAMRecord rec : activeRegion.getReads() ) { - if( rec.getReadLength() < 24 || rec.getMappingQuality() <= 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) { + if( rec.getReadLength() < 24 || rec.getMappingQuality() < 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) { readsToRemove.add(rec); } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java index 8d1ffa0a0..38f11cc5d 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeResolver.java @@ -29,6 +29,7 @@ import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -42,6 +43,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder; @@ -80,6 +82,7 @@ import java.util.*; * * */ +@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} ) @Reference(window=@Window(start=-HaplotypeResolver.ACTIVE_WINDOW,stop= HaplotypeResolver.ACTIVE_WINDOW)) public class HaplotypeResolver extends RodWalker { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index 535508d09..365459882 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -103,7 +103,7 @@ public class LikelihoodCalculationEngine { readQuals[kkk] = ( readQuals[kkk] > (byte) read.getMappingQuality() ? (byte) read.getMappingQuality() : readQuals[kkk] ); // cap base quality by mapping quality //readQuals[kkk] = ( readQuals[kkk] > readInsQuals[kkk] ? readInsQuals[kkk] : readQuals[kkk] ); // cap base quality by base insertion quality, needs to be evaluated //readQuals[kkk] = ( readQuals[kkk] > readDelQuals[kkk] ? readDelQuals[kkk] : readQuals[kkk] ); // cap base quality by base deletion quality, needs to be evaluated - readQuals[kkk] = ( readQuals[kkk] < (byte) 17 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] ); + readQuals[kkk] = ( readQuals[kkk] < (byte) 18 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] ); } for( int jjj = 0; jjj < numHaplotypes; jjj++ ) { @@ -311,7 +311,7 @@ public class LikelihoodCalculationEngine { int hap1 = 0; int hap2 = 0; //double bestElement = Double.NEGATIVE_INFINITY; - final int maxChosenHaplotypes = Math.min( 8, sampleKeySet.size() * 2 + 1 ); + final int maxChosenHaplotypes = Math.min( 9, sampleKeySet.size() * 2 + 1 ); while( bestHaplotypesIndexList.size() < maxChosenHaplotypes ) { double maxElement = Double.NEGATIVE_INFINITY; for( int iii = 0; iii < numHaplotypes; iii++ ) { diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java index 4bcf5a0a0..539190fe9 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngineUnitTest.java @@ -353,6 +353,16 @@ public class GenotypingEngineUnitTest extends BaseTest { Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); + // deletion + insertion (abutting) + thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","A").make(); + nextVC = new VariantContextBuilder().loc("2", 1702, 1702).alleles("T","GCGCGC").make(); + truthVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","AGCGCGC").source("merged").make(); + mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc); + logger.warn(truthVC + " == " + mergedVC); + Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC)); + Assert.assertEquals(truthVC.getStart(), mergedVC.getStart()); + Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd()); + // complex + complex thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","AAA").make(); nextVC = new VariantContextBuilder().loc("2", 1706, 1707).alleles("GG","AC").make(); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 9b8d1b3d7..17ad37deb 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -20,20 +20,17 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "7b4e76934e0c911220b4e7da8776ab2b"); + HCTest(CEUTRIO_BAM, "", "eff4c820226abafcaa058c66585198a7"); } @Test public void testHaplotypeCallerSingleSample() { - HCTest(NA12878_BAM, "", "fcf0cea98a571d5e2d1dfa8b5edc599d"); + HCTest(NA12878_BAM, "", "2b40b314e6e63ae165186b55b14eee41"); } @Test public void testHaplotypeCallerMultiSampleGGA() { - // TODO -- Ryan, do you know why the md5s changed just for the rank sum tests? - final String RyansMd5 = "ff370c42c8b09a29f1aeff5ac57c7ea6"; - final String EricsMd5 = "d8317f4589e8e0c48bcd087cdb75ce88"; - HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", EricsMd5); + HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "553870cc4d7e66f30862f8ae5dee01ff"); } private void HCTestComplexVariants(String bam, String args, String md5) { @@ -44,8 +41,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleComplex() { - HCTestComplexVariants(CEUTRIO_BAM, "", "6f9fda3ea82c5696bed1d48ee90cd76b"); + HCTestComplexVariants(CEUTRIO_BAM, "", "0936c41e8f006174f7cf27d97235133e"); } - } diff --git a/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidation.java b/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidation.java index 7af83d14b..e8eea5ff0 100644 --- a/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidation.java +++ b/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidation.java @@ -29,11 +29,13 @@ import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; import org.broadinstitute.sting.alignment.bwa.BWTFiles; import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.Iterator; @@ -46,6 +48,7 @@ import java.util.Iterator; * @author mhanna * @version 0.1 */ +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) public class AlignmentValidation extends ReadWalker { /** * The supporting BWT index generated using BWT. diff --git a/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java b/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java index c8554573b..6206fc2ce 100644 --- a/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java +++ b/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java @@ -34,11 +34,13 @@ import org.broadinstitute.sting.alignment.bwa.BWTFiles; import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.WalkerName; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.io.File; @@ -50,6 +52,7 @@ import java.io.File; * @author mhanna * @version 0.1 */ +@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} ) @WalkerName("Align") public class AlignmentWalker extends ReadWalker { @Argument(fullName="target_reference",shortName="target_ref",doc="The reference to which reads in the source file should be aligned. Alongside this reference should sit index files " + diff --git a/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignments.java b/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignments.java index b0580fe50..336c95d42 100644 --- a/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignments.java +++ b/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignments.java @@ -30,9 +30,11 @@ import org.broadinstitute.sting.alignment.bwa.BWTFiles; import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.io.PrintStream; @@ -48,6 +50,7 @@ import java.util.TreeMap; * @author mhanna * @version 0.1 */ +@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} ) public class CountBestAlignments extends ReadWalker { /** * The supporting BWT index generated using BWT. diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java index b1ad19e69..306ebdd0e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java +++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java @@ -131,6 +131,12 @@ public class CommandLineGATK extends CommandLineExecutable { // can't close tribble index when writing if ( message.indexOf("Unable to close index for") != -1 ) exitSystemWithUserError(new UserException(t.getCause().getMessage())); + + // disk is full + if ( message.indexOf("No space left on device") != -1 ) + exitSystemWithUserError(new UserException(t.getMessage())); + if ( t.getCause().getMessage().indexOf("No space left on device") != -1 ) + exitSystemWithUserError(new UserException(t.getCause().getMessage())); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/examples/CoverageBySample.java b/public/java/src/org/broadinstitute/sting/gatk/examples/CoverageBySample.java index 5dbd90405..6780311bb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/examples/CoverageBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/examples/CoverageBySample.java @@ -3,10 +3,12 @@ package org.broadinstitute.sting.gatk.examples; import net.sf.samtools.SAMReadGroupRecord; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -17,8 +19,9 @@ import java.util.List; import java.util.Map; /** - * Computes the coverage per sample. + * Computes the coverage per sample for every position (use with -L argument!). */ +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) public class CoverageBySample extends LocusWalker { @Output protected PrintStream out; diff --git a/public/java/src/org/broadinstitute/sting/gatk/examples/GATKPaperGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/examples/GATKPaperGenotyper.java index 3069ee528..6482354a9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/examples/GATKPaperGenotyper.java +++ b/public/java/src/org/broadinstitute/sting/gatk/examples/GATKPaperGenotyper.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.examples; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -35,6 +36,7 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype; import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.io.PrintStream; @@ -46,6 +48,7 @@ import java.io.PrintStream; * * @author aaron */ +@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} ) public class GATKPaperGenotyper extends LocusWalker implements TreeReducible { // the possible diploid genotype strings private static enum GENOTYPE { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT } diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java index 6ff9f3bd5..7d035d208 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java @@ -279,7 +279,6 @@ public class LocusIteratorByState extends LocusIterator { */ private void lazyLoadNextAlignmentContext() { while (nextAlignmentContext == null && readStates.hasNext()) { - // this call will set hasExtendedEvents to true if it picks up a read with indel right before the current position on the ref: readStates.collectPendingReads(); final GenomeLoc location = getLocation(); @@ -378,7 +377,7 @@ public class LocusIteratorByState extends LocusIterator { CigarOperator op = state.stepForwardOnGenome(); if (op == null) { // we discard the read only when we are past its end AND indel at the end of the read (if any) was - // already processed. Keeping the read state that retunred null upon stepForwardOnGenome() is safe + // already processed. Keeping the read state that returned null upon stepForwardOnGenome() is safe // as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag. it.remove(); // we've stepped off the end of the object } diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index 1b9c12fb0..845c4eacf 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.activeregion.ActivityProfile; +import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -69,8 +70,7 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine extends TraversalEngine walker, + private final ActivityProfileResult walkerActiveProb(final ActiveRegionWalker walker, final RefMetaDataTracker tracker, final ReferenceContext refContext, final AlignmentContext locus, final GenomeLoc location) { if ( walker.hasPresetActiveRegions() ) { - return walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0; + return new ActivityProfileResult(walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0); } else { return walker.isActive( tracker, refContext, locus ); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java index e38e166ea..b2975cbbf 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java @@ -12,6 +12,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.interval.IntervalSetRule; import org.broadinstitute.sting.utils.interval.IntervalUtils; @@ -72,7 +73,7 @@ public abstract class ActiveRegionWalker extends Walker { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java index 8c4cab877..0c856c6df 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.targets; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -33,9 +34,12 @@ import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker; import org.broadinstitute.sting.gatk.walkers.PartitionBy; import org.broadinstitute.sting.gatk.walkers.PartitionType; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.io.PrintStream; +@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} ) @PartitionBy(PartitionType.CONTIG) @ActiveRegionExtension(extension = 0, maxRegion = 50000) public class FindCoveredIntervals extends ActiveRegionWalker { @@ -44,13 +48,13 @@ public class FindCoveredIntervals extends ActiveRegionWalker { @Override // Look to see if the region has sufficient coverage - public double isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) { + public ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) { int depth = ThresHolder.DEFAULTS.getFilteredCoverage(context.getBasePileup()); // note the linear probability scale int coverageThreshold = 20; - return Math.min((double) depth / coverageThreshold, 1); + return new ActivityProfileResult(Math.min((double) depth / coverageThreshold, 1)); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaStats.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaStats.java index 8a1af623e..6beade070 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaStats.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaStats.java @@ -26,17 +26,20 @@ package org.broadinstitute.sting.gatk.walkers.fasta; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RefWalker; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.io.PrintStream; /** * Calculates basic statistics about the reference sequence itself */ +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) public class FastaStats extends RefWalker { @Output PrintStream out; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java index 67ecea1f0..9915d617e 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java @@ -32,6 +32,7 @@ import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -42,6 +43,7 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.io.PrintStream; import java.util.*; @@ -70,6 +72,7 @@ import java.util.*; * * */ +@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} ) public class CountRODs extends RodWalker, Long>> implements TreeReducible, Long>> { @Output public PrintStream out; diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index e024253c9..1f5eaefee 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -1596,7 +1596,17 @@ public class MathUtils { result += v1[k].doubleValue() * v2[k].doubleValue(); return result; + } + public static double dotProduct(double[] v1, double[] v2) { + if (v1.length != v2.length) + throw new UserException("BUG: vectors v1, v2 of different size in vectorSum()"); + + double result = 0.0; + for (int k = 0; k < v1.length; k++) + result += v1[k] * v2[k]; + + return result; } public static double[] vectorLog10(double v1[]) { diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java index 4333e471e..7e736b7bf 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java @@ -31,7 +31,6 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -46,15 +45,15 @@ public class ActivityProfile { final GenomeLocParser parser; final boolean presetRegions; GenomeLoc regionStartLoc = null; - final List isActiveList; + final List isActiveList; private GenomeLoc lastLoc = null; - private static final int FILTER_SIZE = 65; - private static final Double[] GaussianKernel; + private static final int FILTER_SIZE = 80; + private static final double[] GaussianKernel; static { - GaussianKernel = new Double[2*FILTER_SIZE + 1]; + GaussianKernel = new double[2*FILTER_SIZE + 1]; for( int iii = 0; iii < 2*FILTER_SIZE + 1; iii++ ) { - GaussianKernel[iii] = MathUtils.NormalDistribution(FILTER_SIZE, 40.0, iii); + GaussianKernel[iii] = MathUtils.NormalDistribution(FILTER_SIZE, 55.0, iii); } } @@ -63,22 +62,22 @@ public class ActivityProfile { // todo -- add unit tests // TODO -- own preset regions public ActivityProfile(final GenomeLocParser parser, final boolean presetRegions) { - this(parser, presetRegions, new ArrayList(), null); + this(parser, presetRegions, new ArrayList(), null); } - protected ActivityProfile(final GenomeLocParser parser, final boolean presetRegions, final List isActiveList, final GenomeLoc regionStartLoc) { + protected ActivityProfile(final GenomeLocParser parser, final boolean presetRegions, final List isActiveList, final GenomeLoc regionStartLoc) { this.parser = parser; this.presetRegions = presetRegions; this.isActiveList = isActiveList; this.regionStartLoc = regionStartLoc; } - public void add(final GenomeLoc loc, final double score) { + public void add(final GenomeLoc loc, final ActivityProfileResult result) { if ( loc.size() != 1 ) throw new ReviewedStingException("Bad add call to ActivityProfile: loc " + loc + " size != 1" ); if ( lastLoc != null && loc.getStart() != lastLoc.getStop() + 1 ) throw new ReviewedStingException("Bad add call to ActivityProfile: lastLoc added " + lastLoc + " and next is " + loc); - isActiveList.add(score); + isActiveList.add(result); if( regionStartLoc == null ) { regionStartLoc = loc; } @@ -93,22 +92,43 @@ public class ActivityProfile { * @return a new ActivityProfile that's the band-pass filtered version of this profile */ public ActivityProfile bandPassFilter() { - final Double[] activeProbArray = isActiveList.toArray(new Double[isActiveList.size()]); - final Double[] filteredProbArray = new Double[activeProbArray.length]; + final double[] activeProbArray = new double[isActiveList.size()]; + int iii = 0; + for( final ActivityProfileResult result : isActiveList ) { + activeProbArray[iii++] = result.isActiveProb; + } + iii = 0; + for( final ActivityProfileResult result : isActiveList ) { + if( result.resultState.equals(ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS) ) { // special code to deal with the problem that high quality soft clipped bases aren't added to pileups + final int numHQClips = result.resultValue.intValue(); + for( int jjj = Math.max(0, iii - numHQClips); jjj < Math.min(activeProbArray.length, iii+numHQClips); jjj++ ) { + activeProbArray[jjj] = Math.max(activeProbArray[jjj], activeProbArray[iii]); + } + } + iii++; + } + final double[] filteredProbArray = new double[activeProbArray.length]; if( !presetRegions ) { - for( int iii = 0; iii < activeProbArray.length; iii++ ) { - final Double[] kernel = (Double[]) ArrayUtils.subarray(GaussianKernel, Math.max(FILTER_SIZE-iii, 0), Math.min(GaussianKernel.length,FILTER_SIZE + activeProbArray.length - iii)); - final Double[] activeProbSubArray = (Double[]) ArrayUtils.subarray(activeProbArray, Math.max(0,iii - FILTER_SIZE), Math.min(activeProbArray.length,iii + FILTER_SIZE + 1)); + for( iii = 0; iii < activeProbArray.length; iii++ ) { + final double[] kernel = ArrayUtils.subarray(GaussianKernel, Math.max(FILTER_SIZE-iii, 0), Math.min(GaussianKernel.length,FILTER_SIZE + activeProbArray.length - iii)); + final double[] activeProbSubArray = ArrayUtils.subarray(activeProbArray, Math.max(0,iii - FILTER_SIZE), Math.min(activeProbArray.length,iii + FILTER_SIZE + 1)); filteredProbArray[iii] = MathUtils.dotProduct(activeProbSubArray, kernel); } } - return new ActivityProfile(parser, presetRegions, Arrays.asList(filteredProbArray), regionStartLoc); + iii = 0; + for( final double prob : filteredProbArray ) { + final ActivityProfileResult result = isActiveList.get(iii++); + result.isActiveProb = prob; + result.resultState = ActivityProfileResult.ActivityProfileResultState.NONE; + result.resultValue = null; + } + return new ActivityProfile(parser, presetRegions, isActiveList, regionStartLoc); } /** * Partition this profile into active regions - * @param activeRegionExtension - * @return + * @param activeRegionExtension the amount of margin overlap in the active region + * @return the list of active regions */ public List createActiveRegions( final int activeRegionExtension, final int maxRegionSize ) { final double ACTIVE_PROB_THRESHOLD = 0.002; // TODO: needs to be set-able by the walker author @@ -119,14 +139,14 @@ public class ActivityProfile { return Collections.emptyList(); } else if( isActiveList.size() == 1 ) { // there's a single element, it's either active or inactive - boolean isActive = isActiveList.get(0) > ACTIVE_PROB_THRESHOLD; + boolean isActive = isActiveList.get(0).isActiveProb > ACTIVE_PROB_THRESHOLD; returnList.addAll(createActiveRegion(isActive, 0, 0, activeRegionExtension, maxRegionSize)); } else { // there are 2+ elements, divide these up into regions - boolean isActive = isActiveList.get(0) > ACTIVE_PROB_THRESHOLD; + boolean isActive = isActiveList.get(0).isActiveProb > ACTIVE_PROB_THRESHOLD; int curStart = 0; for(int iii = 1; iii < isActiveList.size(); iii++ ) { - final boolean thisStatus = isActiveList.get(iii) > ACTIVE_PROB_THRESHOLD; + final boolean thisStatus = isActiveList.get(iii).isActiveProb > ACTIVE_PROB_THRESHOLD; if( isActive != thisStatus ) { returnList.addAll(createActiveRegion(isActive, curStart, iii - 1, activeRegionExtension, maxRegionSize)); isActive = thisStatus; @@ -143,7 +163,7 @@ public class ActivityProfile { * @param isActive should the region be active? * @param curStart offset (0-based) from the start of this region * @param curEnd offset (0-based) from the start of this region - * @param activeRegionExtension + * @param activeRegionExtension the amount of margin overlap in the active region * @return a fully initialized ActiveRegion with the above properties */ private final List createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize) { @@ -160,8 +180,8 @@ public class ActivityProfile { int cutPoint = -1; final int size = curEnd - curStart + 1; - for( int iii = curStart + (int)(size*0.25); iii < curEnd - (int)(size*0.25); iii++ ) { - if( isActiveList.get(iii) < minProb ) { minProb = isActiveList.get(iii); cutPoint = iii; } + for( int iii = curStart + (int)(size*0.15); iii < curEnd - (int)(size*0.15); iii++ ) { + if( isActiveList.get(iii).isActiveProb < minProb ) { minProb = isActiveList.get(iii).isActiveProb; cutPoint = iii; } } final List leftList = createActiveRegion(isActive, curStart, cutPoint, activeRegionExtension, maxRegionSize, new ArrayList()); final List rightList = createActiveRegion(isActive, cutPoint+1, curEnd, activeRegionExtension, maxRegionSize, new ArrayList()); diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java new file mode 100644 index 000000000..8dc29aa3c --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java @@ -0,0 +1,31 @@ +package org.broadinstitute.sting.utils.activeregion; + +/** + * Created with IntelliJ IDEA. + * User: rpoplin + * Date: 7/27/12 + */ + +public class ActivityProfileResult { + public double isActiveProb; + public ActivityProfileResultState resultState; + public Number resultValue; + + public enum ActivityProfileResultState { + NONE, + HIGH_QUALITY_SOFT_CLIPS + } + + public ActivityProfileResult( final double isActiveProb ) { + this.isActiveProb = isActiveProb; + this.resultState = ActivityProfileResultState.NONE; + this.resultValue = null; + } + + public ActivityProfileResult( final double isActiveProb, final ActivityProfileResultState resultState, final Number resultValue ) { + this.isActiveProb = isActiveProb; + this.resultState = resultState; + this.resultValue = resultValue; + } + +} diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java index e5e747c2d..895de3578 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java @@ -30,6 +30,7 @@ import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.walkers.bqsr.EventType; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -405,6 +406,40 @@ public class AlignmentUtils { return alignment; } + public static int calcNumHighQualitySoftClips( final GATKSAMRecord read, final byte qualThreshold ) { + + int numHQSoftClips = 0; + int alignPos = 0; + final Cigar cigar = read.getCigar(); + final byte[] qual = read.getBaseQualities( EventType.BASE_SUBSTITUTION ); + + for( int iii = 0; iii < cigar.numCigarElements(); iii++ ) { + + final CigarElement ce = cigar.getCigarElement(iii); + final int elementLength = ce.getLength(); + + switch( ce.getOperator() ) { + case S: + for( int jjj = 0; jjj < elementLength; jjj++ ) { + if( qual[alignPos++] > qualThreshold ) { numHQSoftClips++; } + } + break; + case M: + case I: + alignPos += elementLength; + break; + case H: + case P: + case D: + case N: + break; + default: + throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator()); + } + } + return numHQSoftClips; + } + public static int calcAlignmentByteArrayOffset(final Cigar cigar, final PileupElement pileupElement, final int alignmentStart, final int refLocus) { return calcAlignmentByteArrayOffset( cigar, pileupElement.getOffset(), pileupElement.isInsertionAtBeginningOfRead(), pileupElement.isDeletion(), alignmentStart, refLocus ); } diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java index 282f19d8a..f7c564c74 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java @@ -123,12 +123,12 @@ public class ActivityProfileUnitTest extends BaseTest { for ( int i = 0; i < cfg.probs.size(); i++ ) { double p = cfg.probs.get(i); GenomeLoc loc = genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart() + i, cfg.regionStart.getStart() + i); - profile.add(loc, p); + profile.add(loc, new ActivityProfileResult(p)); } Assert.assertEquals(profile.regionStartLoc, genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart(), cfg.regionStart.getStart() )); Assert.assertEquals(profile.size(), cfg.probs.size()); - Assert.assertEquals(profile.isActiveList, cfg.probs); + assertProbsAreEqual(profile.isActiveList, cfg.probs); assertRegionsAreEqual(profile.createActiveRegions(0, 100), cfg.expectedRegions); } @@ -140,5 +140,12 @@ public class ActivityProfileUnitTest extends BaseTest { } } + private void assertProbsAreEqual(List actual, List expected) { + Assert.assertEquals(actual.size(), expected.size()); + for ( int i = 0; i < actual.size(); i++ ) { + Assert.assertEquals(actual.get(i).isActiveProb, expected.get(i)); + } + } + // todo -- test extensions } \ No newline at end of file