Merge branch 'master' of ssh://gsa4.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Guillermo del Angel 2012-07-30 21:32:19 -04:00
commit e6b326c189
25 changed files with 242 additions and 83 deletions

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
@ -11,6 +12,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.util.HashMap;
import java.util.Map;
@ -39,6 +41,7 @@ import java.util.Map;
* @since 10/30/11
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
public class CompareBAM extends LocusWalker<Map<CompareBAM.TestName, Boolean>, CompareBAM.TestResults> {
@Argument(required = true, shortName = "rr", fullName = "reduced_readgroup", doc = "The read group ID corresponding to the compressed BAM being tested") public String reducedReadGroupID;

View File

@ -321,7 +321,7 @@ public class GenotypingEngine {
}
protected void mergeConsecutiveEventsBasedOnLD( final ArrayList<Haplotype> haplotypes, final TreeSet<Integer> startPosKeySet, final byte[] ref, final GenomeLoc refLoc ) {
final int MAX_SIZE_TO_COMBINE = 10;
final int MAX_SIZE_TO_COMBINE = 15;
final double MERGE_EVENTS_R2_THRESHOLD = 0.95;
if( startPosKeySet.size() <= 1 ) { return; }
@ -338,10 +338,10 @@ public class GenotypingEngine {
boolean isBiallelic = true;
VariantContext thisVC = null;
VariantContext nextVC = null;
int x11 = 0;
int x12 = 0;
int x21 = 0;
int x22 = 0;
double x11 = Double.NEGATIVE_INFINITY;
double x12 = Double.NEGATIVE_INFINITY;
double x21 = Double.NEGATIVE_INFINITY;
double x22 = Double.NEGATIVE_INFINITY;
for( final Haplotype h : haplotypes ) {
// only make complex substitutions out of consecutive biallelic sites
@ -364,21 +364,24 @@ public class GenotypingEngine {
}
}
// count up the co-occurrences of the events for the R^2 calculation
// BUGBUG: use haplotype likelihoods per-sample to make this more accurate
if( thisHapVC == null ) {
if( nextHapVC == null ) { x11++; }
else { x12++; }
} else {
if( nextHapVC == null ) { x21++; }
else { x22++; }
final ArrayList<Haplotype> haplotypeList = new ArrayList<Haplotype>();
haplotypeList.add(h);
for( final String sample : haplotypes.get(0).getSampleKeySet() ) {
final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( haplotypeList, sample )[0][0];
if( thisHapVC == null ) {
if( nextHapVC == null ) { x11 = MathUtils.approximateLog10SumLog10(x11, haplotypeLikelihood); }
else { x12 = MathUtils.approximateLog10SumLog10(x12, haplotypeLikelihood); }
} else {
if( nextHapVC == null ) { x21 = MathUtils.approximateLog10SumLog10(x21, haplotypeLikelihood); }
else { x22 = MathUtils.approximateLog10SumLog10(x22, haplotypeLikelihood); }
}
}
}
if( thisVC == null || nextVC == null ) {
continue;
//throw new ReviewedStingException("StartPos TreeSet has an entry for an event that is found on no haplotype. start pos = " + thisStart + ", next pos = " + nextStart);
}
if( isBiallelic ) {
final double R2 = calculateR2LD( x11, x12, x21, x22 );
final double R2 = calculateR2LD( Math.pow(10.0, x11), Math.pow(10.0, x12), Math.pow(10.0, x21), Math.pow(10.0, x22) );
if( DEBUG ) {
System.out.println("Found consecutive biallelic events with R^2 = " + String.format("%.4f", R2));
System.out.println("-- " + thisVC);
@ -423,20 +426,21 @@ public class GenotypingEngine {
protected static VariantContext createMergedVariantContext( final VariantContext thisVC, final VariantContext nextVC, final byte[] ref, final GenomeLoc refLoc ) {
final int thisStart = thisVC.getStart();
final int nextStart = nextVC.getStart();
byte[] refBases = ( new byte[]{} );
byte[] altBases = ( new byte[]{} );
byte[] refBases = new byte[]{};
byte[] altBases = new byte[]{};
refBases = ArrayUtils.addAll(refBases, thisVC.getReference().getBases());
altBases = ArrayUtils.addAll(altBases, thisVC.getAlternateAllele(0).getBases());
for( int locus = thisStart + refBases.length; locus < nextStart; locus++ ) {
int locus;
for( locus = thisStart + refBases.length; locus < nextStart; locus++ ) {
final byte refByte = ref[locus - refLoc.getStart()];
refBases = ArrayUtils.add(refBases, refByte);
altBases = ArrayUtils.add(altBases, refByte);
}
refBases = ArrayUtils.addAll(refBases, nextVC.getReference().getBases());
refBases = ArrayUtils.addAll(refBases, ArrayUtils.subarray(nextVC.getReference().getBases(), locus > nextStart ? 1 : 0, nextVC.getReference().getBases().length)); // special case of deletion including the padding base of consecutive indel
altBases = ArrayUtils.addAll(altBases, nextVC.getAlternateAllele(0).getBases());
int iii = 0;
if( refBases.length == altBases.length ) { // special case of insertion + deletion of same length creates an MNP --> trim padding bases off the allele
if( refBases.length == altBases.length ) { // insertion + deletion of same length creates an MNP --> trim common prefix bases off the beginning of the allele
while( iii < refBases.length && refBases[iii] == altBases[iii] ) { iii++; }
}
final ArrayList<Allele> mergedAlleles = new ArrayList<Allele>();
@ -445,12 +449,11 @@ public class GenotypingEngine {
return new VariantContextBuilder("merged", thisVC.getChr(), thisVC.getStart() + iii, nextVC.getEnd(), mergedAlleles).make();
}
@Requires({"x11 >= 0", "x12 >= 0", "x21 >= 0", "x22 >= 0"})
protected static double calculateR2LD( final int x11, final int x12, final int x21, final int x22 ) {
final int total = x11 + x12 + x21 + x22;
final double pa1b1 = ((double) x11) / ((double) total);
final double pa1b2 = ((double) x12) / ((double) total);
final double pa2b1 = ((double) x21) / ((double) total);
protected static double calculateR2LD( final double x11, final double x12, final double x21, final double x22 ) {
final double total = x11 + x12 + x21 + x22;
final double pa1b1 = x11 / total;
final double pa1b2 = x12 / total;
final double pa2b1 = x21 / total;
final double pa1 = pa1b1 + pa1b2;
final double pb1 = pa1b1 + pa2b1;
return ((pa1b1 - pa1*pb1) * (pa1b1 - pa1*pb1)) / ( pa1 * (1.0 - pa1) * pb1 * (1.0 - pb1) );

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
import com.google.java.contract.Ensures;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.CommandLineGATK;
@ -56,6 +57,7 @@ import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.fragments.FragmentCollection;
import org.broadinstitute.sting.utils.fragments.FragmentUtils;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.sting.utils.variantcontext.*;
@ -103,7 +105,7 @@ import java.util.*;
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
@PartitionBy(PartitionType.LOCUS)
@ActiveRegionExtension(extension=65, maxRegion=275)
@ActiveRegionExtension(extension=65, maxRegion=300)
public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implements AnnotatorCompatible {
/**
@ -303,8 +305,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
public boolean wantsNonPrimaryReads() { return true; }
@Override
@Ensures({"result >= 0.0", "result <= 1.0"})
public double isActive( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context ) {
@Ensures({"result.isActiveProb >= 0.0", "result.isActiveProb <= 1.0"})
public ActivityProfileResult isActive( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context ) {
if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
for( final VariantContext vc : tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()) ) {
@ -313,21 +315,22 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
}
}
if( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ) {
return 1.0;
return new ActivityProfileResult(1.0);
}
}
if( USE_ALLELES_TRIGGER ) {
return ( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 );
return new ActivityProfileResult( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 );
}
if( context == null ) { return 0.0; }
if( context == null ) { return new ActivityProfileResult(0.0); }
final List<Allele> noCall = new ArrayList<Allele>(); // used to noCall all genotypes until the exact model is applied
noCall.add(Allele.NO_CALL);
final Map<String, AlignmentContext> splitContexts = AlignmentContextUtils.splitContextBySampleName(context);
final GenotypesContext genotypes = GenotypesContext.create(splitContexts.keySet().size());
final MathUtils.RunningAverage averageHQSoftClips = new MathUtils.RunningAverage();
for( final String sample : splitContexts.keySet() ) {
final double[] genotypeLikelihoods = new double[3]; // ref versus non-ref (any event)
Arrays.fill(genotypeLikelihoods, 0.0);
@ -348,6 +351,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
if( p.getBase() != ref.getBase() || p.isDeletion() || p.isBeforeDeletedBase() || p.isAfterDeletedBase() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip() ) {
AA = 2;
BB = 0;
if( p.isNextToSoftClip() ) {
averageHQSoftClips.add(AlignmentUtils.calcNumHighQualitySoftClips(p.getRead(), (byte) 28));
}
}
}
genotypeLikelihoods[AA] += QualityUtils.qualToProbLog10(qual);
@ -362,7 +368,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
alleles.add( FAKE_REF_ALLELE );
alleles.add( FAKE_ALT_ALLELE );
final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL);
return ( vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() ) );
final double isActiveProb = vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() );
return new ActivityProfileResult( isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileResult.ActivityProfileResultState.NONE, averageHQSoftClips.mean() );
}
//---------------------------------------------------------------------------------------------------------------
@ -407,7 +415,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
likelihoodCalculationEngine.computeReadLikelihoods( haplotypes, perSampleReadList );
// subset down to only the best haplotypes to be genotyped in all samples ( in GGA mode use all discovered haplotypes )
final ArrayList<Haplotype> bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? likelihoodCalculationEngine.selectBestHaplotypes( haplotypes ) : haplotypes );
final ArrayList<Haplotype> bestHaplotypes = haplotypes;// ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? likelihoodCalculationEngine.selectBestHaplotypes( haplotypes ) : haplotypes );
for( final Pair<VariantContext, HashMap<Allele, ArrayList<Haplotype>>> callResult :
( GENOTYPE_FULL_ACTIVE_REGION && UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES
@ -488,7 +496,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
//---------------------------------------------------------------------------------------------------------------
private void finalizeActiveRegion( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) {
if( DEBUG ) { System.out.println("\nAssembling " + activeRegion.getExtendedLoc() + " with " + activeRegion.size() + " reads:"); }
if( DEBUG ) { System.out.println("\nAssembling " + activeRegion.getLocation() + " with " + activeRegion.size() + " reads: (with overlap region = " + activeRegion.getExtendedLoc() + ")"); }
final ArrayList<GATKSAMRecord> finalizedReadList = new ArrayList<GATKSAMRecord>();
final FragmentCollection<GATKSAMRecord> fragmentCollection = FragmentUtils.create( ReadUtils.sortReadsByCoordinate(activeRegion.getReads()) );
activeRegion.clearReads();
@ -517,7 +525,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
private List<GATKSAMRecord> filterNonPassingReads( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) {
final ArrayList<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>();
for( final GATKSAMRecord rec : activeRegion.getReads() ) {
if( rec.getReadLength() < 24 || rec.getMappingQuality() <= 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) {
if( rec.getReadLength() < 24 || rec.getMappingQuality() < 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) {
readsToRemove.add(rec);
}
}

View File

@ -29,6 +29,7 @@ import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -42,6 +43,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
@ -80,6 +82,7 @@ import java.util.*;
* </pre>
*
*/
@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} )
@Reference(window=@Window(start=-HaplotypeResolver.ACTIVE_WINDOW,stop= HaplotypeResolver.ACTIVE_WINDOW))
public class HaplotypeResolver extends RodWalker<Integer, Integer> {

View File

@ -103,7 +103,7 @@ public class LikelihoodCalculationEngine {
readQuals[kkk] = ( readQuals[kkk] > (byte) read.getMappingQuality() ? (byte) read.getMappingQuality() : readQuals[kkk] ); // cap base quality by mapping quality
//readQuals[kkk] = ( readQuals[kkk] > readInsQuals[kkk] ? readInsQuals[kkk] : readQuals[kkk] ); // cap base quality by base insertion quality, needs to be evaluated
//readQuals[kkk] = ( readQuals[kkk] > readDelQuals[kkk] ? readDelQuals[kkk] : readQuals[kkk] ); // cap base quality by base deletion quality, needs to be evaluated
readQuals[kkk] = ( readQuals[kkk] < (byte) 17 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] );
readQuals[kkk] = ( readQuals[kkk] < (byte) 18 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] );
}
for( int jjj = 0; jjj < numHaplotypes; jjj++ ) {
@ -311,7 +311,7 @@ public class LikelihoodCalculationEngine {
int hap1 = 0;
int hap2 = 0;
//double bestElement = Double.NEGATIVE_INFINITY;
final int maxChosenHaplotypes = Math.min( 8, sampleKeySet.size() * 2 + 1 );
final int maxChosenHaplotypes = Math.min( 9, sampleKeySet.size() * 2 + 1 );
while( bestHaplotypesIndexList.size() < maxChosenHaplotypes ) {
double maxElement = Double.NEGATIVE_INFINITY;
for( int iii = 0; iii < numHaplotypes; iii++ ) {

View File

@ -353,6 +353,16 @@ public class GenotypingEngineUnitTest extends BaseTest {
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
// deletion + insertion (abutting)
thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","A").make();
nextVC = new VariantContextBuilder().loc("2", 1702, 1702).alleles("T","GCGCGC").make();
truthVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","AGCGCGC").source("merged").make();
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
logger.warn(truthVC + " == " + mergedVC);
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
// complex + complex
thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","AAA").make();
nextVC = new VariantContextBuilder().loc("2", 1706, 1707).alleles("GG","AC").make();

View File

@ -20,20 +20,17 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSample() {
HCTest(CEUTRIO_BAM, "", "7b4e76934e0c911220b4e7da8776ab2b");
HCTest(CEUTRIO_BAM, "", "eff4c820226abafcaa058c66585198a7");
}
@Test
public void testHaplotypeCallerSingleSample() {
HCTest(NA12878_BAM, "", "fcf0cea98a571d5e2d1dfa8b5edc599d");
HCTest(NA12878_BAM, "", "2b40b314e6e63ae165186b55b14eee41");
}
@Test
public void testHaplotypeCallerMultiSampleGGA() {
// TODO -- Ryan, do you know why the md5s changed just for the rank sum tests?
final String RyansMd5 = "ff370c42c8b09a29f1aeff5ac57c7ea6";
final String EricsMd5 = "d8317f4589e8e0c48bcd087cdb75ce88";
HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", EricsMd5);
HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "553870cc4d7e66f30862f8ae5dee01ff");
}
private void HCTestComplexVariants(String bam, String args, String md5) {
@ -44,8 +41,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleComplex() {
HCTestComplexVariants(CEUTRIO_BAM, "", "6f9fda3ea82c5696bed1d48ee90cd76b");
HCTestComplexVariants(CEUTRIO_BAM, "", "0936c41e8f006174f7cf27d97235133e");
}
}

View File

@ -29,11 +29,13 @@ import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
import org.broadinstitute.sting.alignment.bwa.BWTFiles;
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.Iterator;
@ -46,6 +48,7 @@ import java.util.Iterator;
* @author mhanna
* @version 0.1
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
public class AlignmentValidation extends ReadWalker<Integer,Integer> {
/**
* The supporting BWT index generated using BWT.

View File

@ -34,11 +34,13 @@ import org.broadinstitute.sting.alignment.bwa.BWTFiles;
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.WalkerName;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.File;
@ -50,6 +52,7 @@ import java.io.File;
* @author mhanna
* @version 0.1
*/
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
@WalkerName("Align")
public class AlignmentWalker extends ReadWalker<Integer,Integer> {
@Argument(fullName="target_reference",shortName="target_ref",doc="The reference to which reads in the source file should be aligned. Alongside this reference should sit index files " +

View File

@ -30,9 +30,11 @@ import org.broadinstitute.sting.alignment.bwa.BWTFiles;
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.PrintStream;
@ -48,6 +50,7 @@ import java.util.TreeMap;
* @author mhanna
* @version 0.1
*/
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
public class CountBestAlignments extends ReadWalker<Integer,Integer> {
/**
* The supporting BWT index generated using BWT.

View File

@ -131,6 +131,12 @@ public class CommandLineGATK extends CommandLineExecutable {
// can't close tribble index when writing
if ( message.indexOf("Unable to close index for") != -1 )
exitSystemWithUserError(new UserException(t.getCause().getMessage()));
// disk is full
if ( message.indexOf("No space left on device") != -1 )
exitSystemWithUserError(new UserException(t.getMessage()));
if ( t.getCause().getMessage().indexOf("No space left on device") != -1 )
exitSystemWithUserError(new UserException(t.getCause().getMessage()));
}
/**

View File

@ -3,10 +3,12 @@ package org.broadinstitute.sting.gatk.examples;
import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -17,8 +19,9 @@ import java.util.List;
import java.util.Map;
/**
* Computes the coverage per sample.
* Computes the coverage per sample for every position (use with -L argument!).
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
public class CoverageBySample extends LocusWalker<Integer, Integer> {
@Output
protected PrintStream out;

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.examples;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -35,6 +36,7 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import java.io.PrintStream;
@ -46,6 +48,7 @@ import java.io.PrintStream;
*
* @author aaron
*/
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
public class GATKPaperGenotyper extends LocusWalker<Integer,Long> implements TreeReducible<Long> {
// the possible diploid genotype strings
private static enum GENOTYPE { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }

View File

@ -279,7 +279,6 @@ public class LocusIteratorByState extends LocusIterator {
*/
private void lazyLoadNextAlignmentContext() {
while (nextAlignmentContext == null && readStates.hasNext()) {
// this call will set hasExtendedEvents to true if it picks up a read with indel right before the current position on the ref:
readStates.collectPendingReads();
final GenomeLoc location = getLocation();
@ -378,7 +377,7 @@ public class LocusIteratorByState extends LocusIterator {
CigarOperator op = state.stepForwardOnGenome();
if (op == null) {
// we discard the read only when we are past its end AND indel at the end of the read (if any) was
// already processed. Keeping the read state that retunred null upon stepForwardOnGenome() is safe
// already processed. Keeping the read state that returned null upon stepForwardOnGenome() is safe
// as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag.
it.remove(); // we've stepped off the end of the object
}

View File

@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.activeregion.ActivityProfile;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -69,8 +70,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
for(int iii = prevLoc.getStop() + 1; iii < location.getStart(); iii++ ) {
final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii);
if( initialIntervals == null || initialIntervals.overlaps( fakeLoc ) ) {
final double isActiveProb = ( walker.hasPresetActiveRegions() && walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 );
profile.add(fakeLoc, isActiveProb);
profile.add(fakeLoc, new ActivityProfileResult( walker.hasPresetActiveRegions() && walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ));
}
}
}
@ -86,8 +86,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
// Call the walkers isActive function for this locus and add them to the list to be integrated later
if( initialIntervals == null || initialIntervals.overlaps( location ) ) {
final double isActiveProb = walkerActiveProb(walker, tracker, refContext, locus, location);
profile.add(location, isActiveProb);
profile.add(location, walkerActiveProb(walker, tracker, refContext, locus, location));
}
// Grab all the previously unseen reads from this pileup and add them to the massive read list
@ -144,11 +143,11 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
//
// --------------------------------------------------------------------------------
private final double walkerActiveProb(final ActiveRegionWalker<M,T> walker,
private final ActivityProfileResult walkerActiveProb(final ActiveRegionWalker<M,T> walker,
final RefMetaDataTracker tracker, final ReferenceContext refContext,
final AlignmentContext locus, final GenomeLoc location) {
if ( walker.hasPresetActiveRegions() ) {
return walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0;
return new ActivityProfileResult(walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0);
} else {
return walker.isActive( tracker, refContext, locus );
}

View File

@ -12,6 +12,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
@ -72,7 +73,7 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
}
// Determine probability of active status over the AlignmentContext
public abstract double isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
public abstract ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
// Map over the ActiveRegion
public abstract MapType map(final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion, final RefMetaDataTracker metaDataTracker);

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
import net.sf.picard.util.PeekableIterator;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -36,6 +37,7 @@ import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.variantcontext.*;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
@ -80,6 +82,7 @@ import java.util.*;
* @author Mauricio Carneiro, Roger Zurawicki
* @since 5/8/12
*/
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
@By(value = DataSource.READS)
@PartitionBy(PartitionType.INTERVAL)
public class DiagnoseTargets extends LocusWalker<Long, Long> {

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -33,9 +34,12 @@ import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
import org.broadinstitute.sting.gatk.walkers.PartitionBy;
import org.broadinstitute.sting.gatk.walkers.PartitionType;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.io.PrintStream;
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
@PartitionBy(PartitionType.CONTIG)
@ActiveRegionExtension(extension = 0, maxRegion = 50000)
public class FindCoveredIntervals extends ActiveRegionWalker<GenomeLoc, Long> {
@ -44,13 +48,13 @@ public class FindCoveredIntervals extends ActiveRegionWalker<GenomeLoc, Long> {
@Override
// Look to see if the region has sufficient coverage
public double isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
public ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
int depth = ThresHolder.DEFAULTS.getFilteredCoverage(context.getBasePileup());
// note the linear probability scale
int coverageThreshold = 20;
return Math.min((double) depth / coverageThreshold, 1);
return new ActivityProfileResult(Math.min((double) depth / coverageThreshold, 1));
}

View File

@ -26,17 +26,20 @@
package org.broadinstitute.sting.gatk.walkers.fasta;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RefWalker;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.io.PrintStream;
/**
* Calculates basic statistics about the reference sequence itself
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
public class FastaStats extends RefWalker<Byte, FastaStats.FastaStatistics> {
@Output PrintStream out;

View File

@ -32,6 +32,7 @@ import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -42,6 +43,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.io.PrintStream;
import java.util.*;
@ -70,6 +72,7 @@ import java.util.*;
* </pre>
*
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
public class CountRODs extends RodWalker<CountRODs.Datum, Pair<ExpandingArrayList<Long>, Long>> implements TreeReducible<Pair<ExpandingArrayList<Long>, Long>> {
@Output
public PrintStream out;

View File

@ -1596,7 +1596,17 @@ public class MathUtils {
result += v1[k].doubleValue() * v2[k].doubleValue();
return result;
}
public static double dotProduct(double[] v1, double[] v2) {
if (v1.length != v2.length)
throw new UserException("BUG: vectors v1, v2 of different size in vectorSum()");
double result = 0.0;
for (int k = 0; k < v1.length; k++)
result += v1[k] * v2[k];
return result;
}
public static double[] vectorLog10(double v1[]) {

View File

@ -31,7 +31,6 @@ import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
@ -46,15 +45,15 @@ public class ActivityProfile {
final GenomeLocParser parser;
final boolean presetRegions;
GenomeLoc regionStartLoc = null;
final List<Double> isActiveList;
final List<ActivityProfileResult> isActiveList;
private GenomeLoc lastLoc = null;
private static final int FILTER_SIZE = 65;
private static final Double[] GaussianKernel;
private static final int FILTER_SIZE = 80;
private static final double[] GaussianKernel;
static {
GaussianKernel = new Double[2*FILTER_SIZE + 1];
GaussianKernel = new double[2*FILTER_SIZE + 1];
for( int iii = 0; iii < 2*FILTER_SIZE + 1; iii++ ) {
GaussianKernel[iii] = MathUtils.NormalDistribution(FILTER_SIZE, 40.0, iii);
GaussianKernel[iii] = MathUtils.NormalDistribution(FILTER_SIZE, 55.0, iii);
}
}
@ -63,22 +62,22 @@ public class ActivityProfile {
// todo -- add unit tests
// TODO -- own preset regions
public ActivityProfile(final GenomeLocParser parser, final boolean presetRegions) {
this(parser, presetRegions, new ArrayList<Double>(), null);
this(parser, presetRegions, new ArrayList<ActivityProfileResult>(), null);
}
protected ActivityProfile(final GenomeLocParser parser, final boolean presetRegions, final List<Double> isActiveList, final GenomeLoc regionStartLoc) {
protected ActivityProfile(final GenomeLocParser parser, final boolean presetRegions, final List<ActivityProfileResult> isActiveList, final GenomeLoc regionStartLoc) {
this.parser = parser;
this.presetRegions = presetRegions;
this.isActiveList = isActiveList;
this.regionStartLoc = regionStartLoc;
}
public void add(final GenomeLoc loc, final double score) {
public void add(final GenomeLoc loc, final ActivityProfileResult result) {
if ( loc.size() != 1 )
throw new ReviewedStingException("Bad add call to ActivityProfile: loc " + loc + " size != 1" );
if ( lastLoc != null && loc.getStart() != lastLoc.getStop() + 1 )
throw new ReviewedStingException("Bad add call to ActivityProfile: lastLoc added " + lastLoc + " and next is " + loc);
isActiveList.add(score);
isActiveList.add(result);
if( regionStartLoc == null ) {
regionStartLoc = loc;
}
@ -93,22 +92,43 @@ public class ActivityProfile {
* @return a new ActivityProfile that's the band-pass filtered version of this profile
*/
public ActivityProfile bandPassFilter() {
final Double[] activeProbArray = isActiveList.toArray(new Double[isActiveList.size()]);
final Double[] filteredProbArray = new Double[activeProbArray.length];
final double[] activeProbArray = new double[isActiveList.size()];
int iii = 0;
for( final ActivityProfileResult result : isActiveList ) {
activeProbArray[iii++] = result.isActiveProb;
}
iii = 0;
for( final ActivityProfileResult result : isActiveList ) {
if( result.resultState.equals(ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS) ) { // special code to deal with the problem that high quality soft clipped bases aren't added to pileups
final int numHQClips = result.resultValue.intValue();
for( int jjj = Math.max(0, iii - numHQClips); jjj < Math.min(activeProbArray.length, iii+numHQClips); jjj++ ) {
activeProbArray[jjj] = Math.max(activeProbArray[jjj], activeProbArray[iii]);
}
}
iii++;
}
final double[] filteredProbArray = new double[activeProbArray.length];
if( !presetRegions ) {
for( int iii = 0; iii < activeProbArray.length; iii++ ) {
final Double[] kernel = (Double[]) ArrayUtils.subarray(GaussianKernel, Math.max(FILTER_SIZE-iii, 0), Math.min(GaussianKernel.length,FILTER_SIZE + activeProbArray.length - iii));
final Double[] activeProbSubArray = (Double[]) ArrayUtils.subarray(activeProbArray, Math.max(0,iii - FILTER_SIZE), Math.min(activeProbArray.length,iii + FILTER_SIZE + 1));
for( iii = 0; iii < activeProbArray.length; iii++ ) {
final double[] kernel = ArrayUtils.subarray(GaussianKernel, Math.max(FILTER_SIZE-iii, 0), Math.min(GaussianKernel.length,FILTER_SIZE + activeProbArray.length - iii));
final double[] activeProbSubArray = ArrayUtils.subarray(activeProbArray, Math.max(0,iii - FILTER_SIZE), Math.min(activeProbArray.length,iii + FILTER_SIZE + 1));
filteredProbArray[iii] = MathUtils.dotProduct(activeProbSubArray, kernel);
}
}
return new ActivityProfile(parser, presetRegions, Arrays.asList(filteredProbArray), regionStartLoc);
iii = 0;
for( final double prob : filteredProbArray ) {
final ActivityProfileResult result = isActiveList.get(iii++);
result.isActiveProb = prob;
result.resultState = ActivityProfileResult.ActivityProfileResultState.NONE;
result.resultValue = null;
}
return new ActivityProfile(parser, presetRegions, isActiveList, regionStartLoc);
}
/**
* Partition this profile into active regions
* @param activeRegionExtension
* @return
* @param activeRegionExtension the amount of margin overlap in the active region
* @return the list of active regions
*/
public List<ActiveRegion> createActiveRegions( final int activeRegionExtension, final int maxRegionSize ) {
final double ACTIVE_PROB_THRESHOLD = 0.002; // TODO: needs to be set-able by the walker author
@ -119,14 +139,14 @@ public class ActivityProfile {
return Collections.emptyList();
} else if( isActiveList.size() == 1 ) {
// there's a single element, it's either active or inactive
boolean isActive = isActiveList.get(0) > ACTIVE_PROB_THRESHOLD;
boolean isActive = isActiveList.get(0).isActiveProb > ACTIVE_PROB_THRESHOLD;
returnList.addAll(createActiveRegion(isActive, 0, 0, activeRegionExtension, maxRegionSize));
} else {
// there are 2+ elements, divide these up into regions
boolean isActive = isActiveList.get(0) > ACTIVE_PROB_THRESHOLD;
boolean isActive = isActiveList.get(0).isActiveProb > ACTIVE_PROB_THRESHOLD;
int curStart = 0;
for(int iii = 1; iii < isActiveList.size(); iii++ ) {
final boolean thisStatus = isActiveList.get(iii) > ACTIVE_PROB_THRESHOLD;
final boolean thisStatus = isActiveList.get(iii).isActiveProb > ACTIVE_PROB_THRESHOLD;
if( isActive != thisStatus ) {
returnList.addAll(createActiveRegion(isActive, curStart, iii - 1, activeRegionExtension, maxRegionSize));
isActive = thisStatus;
@ -143,7 +163,7 @@ public class ActivityProfile {
* @param isActive should the region be active?
* @param curStart offset (0-based) from the start of this region
* @param curEnd offset (0-based) from the start of this region
* @param activeRegionExtension
* @param activeRegionExtension the amount of margin overlap in the active region
* @return a fully initialized ActiveRegion with the above properties
*/
private final List<ActiveRegion> createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize) {
@ -160,8 +180,8 @@ public class ActivityProfile {
int cutPoint = -1;
final int size = curEnd - curStart + 1;
for( int iii = curStart + (int)(size*0.25); iii < curEnd - (int)(size*0.25); iii++ ) {
if( isActiveList.get(iii) < minProb ) { minProb = isActiveList.get(iii); cutPoint = iii; }
for( int iii = curStart + (int)(size*0.15); iii < curEnd - (int)(size*0.15); iii++ ) {
if( isActiveList.get(iii).isActiveProb < minProb ) { minProb = isActiveList.get(iii).isActiveProb; cutPoint = iii; }
}
final List<ActiveRegion> leftList = createActiveRegion(isActive, curStart, cutPoint, activeRegionExtension, maxRegionSize, new ArrayList<ActiveRegion>());
final List<ActiveRegion> rightList = createActiveRegion(isActive, cutPoint+1, curEnd, activeRegionExtension, maxRegionSize, new ArrayList<ActiveRegion>());

View File

@ -0,0 +1,31 @@
package org.broadinstitute.sting.utils.activeregion;
/**
* Created with IntelliJ IDEA.
* User: rpoplin
* Date: 7/27/12
*/
public class ActivityProfileResult {
public double isActiveProb;
public ActivityProfileResultState resultState;
public Number resultValue;
public enum ActivityProfileResultState {
NONE,
HIGH_QUALITY_SOFT_CLIPS
}
public ActivityProfileResult( final double isActiveProb ) {
this.isActiveProb = isActiveProb;
this.resultState = ActivityProfileResultState.NONE;
this.resultValue = null;
}
public ActivityProfileResult( final double isActiveProb, final ActivityProfileResultState resultState, final Number resultValue ) {
this.isActiveProb = isActiveProb;
this.resultState = resultState;
this.resultValue = resultValue;
}
}

View File

@ -30,6 +30,7 @@ import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -405,6 +406,40 @@ public class AlignmentUtils {
return alignment;
}
public static int calcNumHighQualitySoftClips( final GATKSAMRecord read, final byte qualThreshold ) {
int numHQSoftClips = 0;
int alignPos = 0;
final Cigar cigar = read.getCigar();
final byte[] qual = read.getBaseQualities( EventType.BASE_SUBSTITUTION );
for( int iii = 0; iii < cigar.numCigarElements(); iii++ ) {
final CigarElement ce = cigar.getCigarElement(iii);
final int elementLength = ce.getLength();
switch( ce.getOperator() ) {
case S:
for( int jjj = 0; jjj < elementLength; jjj++ ) {
if( qual[alignPos++] > qualThreshold ) { numHQSoftClips++; }
}
break;
case M:
case I:
alignPos += elementLength;
break;
case H:
case P:
case D:
case N:
break;
default:
throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator());
}
}
return numHQSoftClips;
}
public static int calcAlignmentByteArrayOffset(final Cigar cigar, final PileupElement pileupElement, final int alignmentStart, final int refLocus) {
return calcAlignmentByteArrayOffset( cigar, pileupElement.getOffset(), pileupElement.isInsertionAtBeginningOfRead(), pileupElement.isDeletion(), alignmentStart, refLocus );
}

View File

@ -123,12 +123,12 @@ public class ActivityProfileUnitTest extends BaseTest {
for ( int i = 0; i < cfg.probs.size(); i++ ) {
double p = cfg.probs.get(i);
GenomeLoc loc = genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart() + i, cfg.regionStart.getStart() + i);
profile.add(loc, p);
profile.add(loc, new ActivityProfileResult(p));
}
Assert.assertEquals(profile.regionStartLoc, genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart(), cfg.regionStart.getStart() ));
Assert.assertEquals(profile.size(), cfg.probs.size());
Assert.assertEquals(profile.isActiveList, cfg.probs);
assertProbsAreEqual(profile.isActiveList, cfg.probs);
assertRegionsAreEqual(profile.createActiveRegions(0, 100), cfg.expectedRegions);
}
@ -140,5 +140,12 @@ public class ActivityProfileUnitTest extends BaseTest {
}
}
private void assertProbsAreEqual(List<ActivityProfileResult> actual, List<Double> expected) {
Assert.assertEquals(actual.size(), expected.size());
for ( int i = 0; i < actual.size(); i++ ) {
Assert.assertEquals(actual.get(i).isActiveProb, expected.get(i));
}
}
// todo -- test extensions
}