Merge branch 'master' of ssh://gsa4.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
e6b326c189
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
|
||||
|
|
@ -11,6 +12,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
|
@ -39,6 +41,7 @@ import java.util.Map;
|
|||
* @since 10/30/11
|
||||
*/
|
||||
|
||||
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
|
||||
public class CompareBAM extends LocusWalker<Map<CompareBAM.TestName, Boolean>, CompareBAM.TestResults> {
|
||||
@Argument(required = true, shortName = "rr", fullName = "reduced_readgroup", doc = "The read group ID corresponding to the compressed BAM being tested") public String reducedReadGroupID;
|
||||
|
|
|
|||
|
|
@ -321,7 +321,7 @@ public class GenotypingEngine {
|
|||
}
|
||||
|
||||
protected void mergeConsecutiveEventsBasedOnLD( final ArrayList<Haplotype> haplotypes, final TreeSet<Integer> startPosKeySet, final byte[] ref, final GenomeLoc refLoc ) {
|
||||
final int MAX_SIZE_TO_COMBINE = 10;
|
||||
final int MAX_SIZE_TO_COMBINE = 15;
|
||||
final double MERGE_EVENTS_R2_THRESHOLD = 0.95;
|
||||
if( startPosKeySet.size() <= 1 ) { return; }
|
||||
|
||||
|
|
@ -338,10 +338,10 @@ public class GenotypingEngine {
|
|||
boolean isBiallelic = true;
|
||||
VariantContext thisVC = null;
|
||||
VariantContext nextVC = null;
|
||||
int x11 = 0;
|
||||
int x12 = 0;
|
||||
int x21 = 0;
|
||||
int x22 = 0;
|
||||
double x11 = Double.NEGATIVE_INFINITY;
|
||||
double x12 = Double.NEGATIVE_INFINITY;
|
||||
double x21 = Double.NEGATIVE_INFINITY;
|
||||
double x22 = Double.NEGATIVE_INFINITY;
|
||||
|
||||
for( final Haplotype h : haplotypes ) {
|
||||
// only make complex substitutions out of consecutive biallelic sites
|
||||
|
|
@ -364,21 +364,24 @@ public class GenotypingEngine {
|
|||
}
|
||||
}
|
||||
// count up the co-occurrences of the events for the R^2 calculation
|
||||
// BUGBUG: use haplotype likelihoods per-sample to make this more accurate
|
||||
if( thisHapVC == null ) {
|
||||
if( nextHapVC == null ) { x11++; }
|
||||
else { x12++; }
|
||||
} else {
|
||||
if( nextHapVC == null ) { x21++; }
|
||||
else { x22++; }
|
||||
final ArrayList<Haplotype> haplotypeList = new ArrayList<Haplotype>();
|
||||
haplotypeList.add(h);
|
||||
for( final String sample : haplotypes.get(0).getSampleKeySet() ) {
|
||||
final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( haplotypeList, sample )[0][0];
|
||||
if( thisHapVC == null ) {
|
||||
if( nextHapVC == null ) { x11 = MathUtils.approximateLog10SumLog10(x11, haplotypeLikelihood); }
|
||||
else { x12 = MathUtils.approximateLog10SumLog10(x12, haplotypeLikelihood); }
|
||||
} else {
|
||||
if( nextHapVC == null ) { x21 = MathUtils.approximateLog10SumLog10(x21, haplotypeLikelihood); }
|
||||
else { x22 = MathUtils.approximateLog10SumLog10(x22, haplotypeLikelihood); }
|
||||
}
|
||||
}
|
||||
}
|
||||
if( thisVC == null || nextVC == null ) {
|
||||
continue;
|
||||
//throw new ReviewedStingException("StartPos TreeSet has an entry for an event that is found on no haplotype. start pos = " + thisStart + ", next pos = " + nextStart);
|
||||
}
|
||||
if( isBiallelic ) {
|
||||
final double R2 = calculateR2LD( x11, x12, x21, x22 );
|
||||
final double R2 = calculateR2LD( Math.pow(10.0, x11), Math.pow(10.0, x12), Math.pow(10.0, x21), Math.pow(10.0, x22) );
|
||||
if( DEBUG ) {
|
||||
System.out.println("Found consecutive biallelic events with R^2 = " + String.format("%.4f", R2));
|
||||
System.out.println("-- " + thisVC);
|
||||
|
|
@ -423,20 +426,21 @@ public class GenotypingEngine {
|
|||
protected static VariantContext createMergedVariantContext( final VariantContext thisVC, final VariantContext nextVC, final byte[] ref, final GenomeLoc refLoc ) {
|
||||
final int thisStart = thisVC.getStart();
|
||||
final int nextStart = nextVC.getStart();
|
||||
byte[] refBases = ( new byte[]{} );
|
||||
byte[] altBases = ( new byte[]{} );
|
||||
byte[] refBases = new byte[]{};
|
||||
byte[] altBases = new byte[]{};
|
||||
refBases = ArrayUtils.addAll(refBases, thisVC.getReference().getBases());
|
||||
altBases = ArrayUtils.addAll(altBases, thisVC.getAlternateAllele(0).getBases());
|
||||
for( int locus = thisStart + refBases.length; locus < nextStart; locus++ ) {
|
||||
int locus;
|
||||
for( locus = thisStart + refBases.length; locus < nextStart; locus++ ) {
|
||||
final byte refByte = ref[locus - refLoc.getStart()];
|
||||
refBases = ArrayUtils.add(refBases, refByte);
|
||||
altBases = ArrayUtils.add(altBases, refByte);
|
||||
}
|
||||
refBases = ArrayUtils.addAll(refBases, nextVC.getReference().getBases());
|
||||
refBases = ArrayUtils.addAll(refBases, ArrayUtils.subarray(nextVC.getReference().getBases(), locus > nextStart ? 1 : 0, nextVC.getReference().getBases().length)); // special case of deletion including the padding base of consecutive indel
|
||||
altBases = ArrayUtils.addAll(altBases, nextVC.getAlternateAllele(0).getBases());
|
||||
|
||||
int iii = 0;
|
||||
if( refBases.length == altBases.length ) { // special case of insertion + deletion of same length creates an MNP --> trim padding bases off the allele
|
||||
if( refBases.length == altBases.length ) { // insertion + deletion of same length creates an MNP --> trim common prefix bases off the beginning of the allele
|
||||
while( iii < refBases.length && refBases[iii] == altBases[iii] ) { iii++; }
|
||||
}
|
||||
final ArrayList<Allele> mergedAlleles = new ArrayList<Allele>();
|
||||
|
|
@ -445,12 +449,11 @@ public class GenotypingEngine {
|
|||
return new VariantContextBuilder("merged", thisVC.getChr(), thisVC.getStart() + iii, nextVC.getEnd(), mergedAlleles).make();
|
||||
}
|
||||
|
||||
@Requires({"x11 >= 0", "x12 >= 0", "x21 >= 0", "x22 >= 0"})
|
||||
protected static double calculateR2LD( final int x11, final int x12, final int x21, final int x22 ) {
|
||||
final int total = x11 + x12 + x21 + x22;
|
||||
final double pa1b1 = ((double) x11) / ((double) total);
|
||||
final double pa1b2 = ((double) x12) / ((double) total);
|
||||
final double pa2b1 = ((double) x21) / ((double) total);
|
||||
protected static double calculateR2LD( final double x11, final double x12, final double x21, final double x22 ) {
|
||||
final double total = x11 + x12 + x21 + x22;
|
||||
final double pa1b1 = x11 / total;
|
||||
final double pa1b2 = x12 / total;
|
||||
final double pa2b1 = x21 / total;
|
||||
final double pa1 = pa1b1 + pa1b2;
|
||||
final double pb1 = pa1b1 + pa2b1;
|
||||
return ((pa1b1 - pa1*pb1) * (pa1b1 - pa1*pb1)) / ( pa1 * (1.0 - pa1) * pb1 * (1.0 - pb1) );
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
|||
|
||||
import com.google.java.contract.Ensures;
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
|
|
@ -56,6 +57,7 @@ import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
|||
import org.broadinstitute.sting.utils.fragments.FragmentCollection;
|
||||
import org.broadinstitute.sting.utils.fragments.FragmentUtils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
|
@ -103,7 +105,7 @@ import java.util.*;
|
|||
|
||||
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@PartitionBy(PartitionType.LOCUS)
|
||||
@ActiveRegionExtension(extension=65, maxRegion=275)
|
||||
@ActiveRegionExtension(extension=65, maxRegion=300)
|
||||
public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implements AnnotatorCompatible {
|
||||
|
||||
/**
|
||||
|
|
@ -303,8 +305,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
public boolean wantsNonPrimaryReads() { return true; }
|
||||
|
||||
@Override
|
||||
@Ensures({"result >= 0.0", "result <= 1.0"})
|
||||
public double isActive( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context ) {
|
||||
@Ensures({"result.isActiveProb >= 0.0", "result.isActiveProb <= 1.0"})
|
||||
public ActivityProfileResult isActive( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context ) {
|
||||
|
||||
if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
||||
for( final VariantContext vc : tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()) ) {
|
||||
|
|
@ -313,21 +315,22 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
}
|
||||
}
|
||||
if( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ) {
|
||||
return 1.0;
|
||||
return new ActivityProfileResult(1.0);
|
||||
}
|
||||
}
|
||||
|
||||
if( USE_ALLELES_TRIGGER ) {
|
||||
return ( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 );
|
||||
return new ActivityProfileResult( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 );
|
||||
}
|
||||
|
||||
if( context == null ) { return 0.0; }
|
||||
if( context == null ) { return new ActivityProfileResult(0.0); }
|
||||
|
||||
final List<Allele> noCall = new ArrayList<Allele>(); // used to noCall all genotypes until the exact model is applied
|
||||
noCall.add(Allele.NO_CALL);
|
||||
|
||||
final Map<String, AlignmentContext> splitContexts = AlignmentContextUtils.splitContextBySampleName(context);
|
||||
final GenotypesContext genotypes = GenotypesContext.create(splitContexts.keySet().size());
|
||||
final MathUtils.RunningAverage averageHQSoftClips = new MathUtils.RunningAverage();
|
||||
for( final String sample : splitContexts.keySet() ) {
|
||||
final double[] genotypeLikelihoods = new double[3]; // ref versus non-ref (any event)
|
||||
Arrays.fill(genotypeLikelihoods, 0.0);
|
||||
|
|
@ -348,6 +351,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
if( p.getBase() != ref.getBase() || p.isDeletion() || p.isBeforeDeletedBase() || p.isAfterDeletedBase() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip() ) {
|
||||
AA = 2;
|
||||
BB = 0;
|
||||
if( p.isNextToSoftClip() ) {
|
||||
averageHQSoftClips.add(AlignmentUtils.calcNumHighQualitySoftClips(p.getRead(), (byte) 28));
|
||||
}
|
||||
}
|
||||
}
|
||||
genotypeLikelihoods[AA] += QualityUtils.qualToProbLog10(qual);
|
||||
|
|
@ -362,7 +368,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
alleles.add( FAKE_REF_ALLELE );
|
||||
alleles.add( FAKE_ALT_ALLELE );
|
||||
final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL);
|
||||
return ( vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() ) );
|
||||
final double isActiveProb = vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() );
|
||||
|
||||
return new ActivityProfileResult( isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileResult.ActivityProfileResultState.NONE, averageHQSoftClips.mean() );
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -407,7 +415,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
likelihoodCalculationEngine.computeReadLikelihoods( haplotypes, perSampleReadList );
|
||||
|
||||
// subset down to only the best haplotypes to be genotyped in all samples ( in GGA mode use all discovered haplotypes )
|
||||
final ArrayList<Haplotype> bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? likelihoodCalculationEngine.selectBestHaplotypes( haplotypes ) : haplotypes );
|
||||
final ArrayList<Haplotype> bestHaplotypes = haplotypes;// ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? likelihoodCalculationEngine.selectBestHaplotypes( haplotypes ) : haplotypes );
|
||||
|
||||
for( final Pair<VariantContext, HashMap<Allele, ArrayList<Haplotype>>> callResult :
|
||||
( GENOTYPE_FULL_ACTIVE_REGION && UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES
|
||||
|
|
@ -488,7 +496,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private void finalizeActiveRegion( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) {
|
||||
if( DEBUG ) { System.out.println("\nAssembling " + activeRegion.getExtendedLoc() + " with " + activeRegion.size() + " reads:"); }
|
||||
if( DEBUG ) { System.out.println("\nAssembling " + activeRegion.getLocation() + " with " + activeRegion.size() + " reads: (with overlap region = " + activeRegion.getExtendedLoc() + ")"); }
|
||||
final ArrayList<GATKSAMRecord> finalizedReadList = new ArrayList<GATKSAMRecord>();
|
||||
final FragmentCollection<GATKSAMRecord> fragmentCollection = FragmentUtils.create( ReadUtils.sortReadsByCoordinate(activeRegion.getReads()) );
|
||||
activeRegion.clearReads();
|
||||
|
|
@ -517,7 +525,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
|
|||
private List<GATKSAMRecord> filterNonPassingReads( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) {
|
||||
final ArrayList<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>();
|
||||
for( final GATKSAMRecord rec : activeRegion.getReads() ) {
|
||||
if( rec.getReadLength() < 24 || rec.getMappingQuality() <= 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) {
|
||||
if( rec.getReadLength() < 24 || rec.getMappingQuality() < 20 || BadMateFilter.hasBadMate(rec) || (keepRG != null && !rec.getReadGroup().getId().equals(keepRG)) ) {
|
||||
readsToRemove.add(rec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ import org.broadinstitute.sting.commandline.Argument;
|
|||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -42,6 +43,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
|||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||
|
|
@ -80,6 +82,7 @@ import java.util.*;
|
|||
* </pre>
|
||||
*
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@Reference(window=@Window(start=-HaplotypeResolver.ACTIVE_WINDOW,stop= HaplotypeResolver.ACTIVE_WINDOW))
|
||||
public class HaplotypeResolver extends RodWalker<Integer, Integer> {
|
||||
|
||||
|
|
|
|||
|
|
@ -103,7 +103,7 @@ public class LikelihoodCalculationEngine {
|
|||
readQuals[kkk] = ( readQuals[kkk] > (byte) read.getMappingQuality() ? (byte) read.getMappingQuality() : readQuals[kkk] ); // cap base quality by mapping quality
|
||||
//readQuals[kkk] = ( readQuals[kkk] > readInsQuals[kkk] ? readInsQuals[kkk] : readQuals[kkk] ); // cap base quality by base insertion quality, needs to be evaluated
|
||||
//readQuals[kkk] = ( readQuals[kkk] > readDelQuals[kkk] ? readDelQuals[kkk] : readQuals[kkk] ); // cap base quality by base deletion quality, needs to be evaluated
|
||||
readQuals[kkk] = ( readQuals[kkk] < (byte) 17 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] );
|
||||
readQuals[kkk] = ( readQuals[kkk] < (byte) 18 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] );
|
||||
}
|
||||
|
||||
for( int jjj = 0; jjj < numHaplotypes; jjj++ ) {
|
||||
|
|
@ -311,7 +311,7 @@ public class LikelihoodCalculationEngine {
|
|||
int hap1 = 0;
|
||||
int hap2 = 0;
|
||||
//double bestElement = Double.NEGATIVE_INFINITY;
|
||||
final int maxChosenHaplotypes = Math.min( 8, sampleKeySet.size() * 2 + 1 );
|
||||
final int maxChosenHaplotypes = Math.min( 9, sampleKeySet.size() * 2 + 1 );
|
||||
while( bestHaplotypesIndexList.size() < maxChosenHaplotypes ) {
|
||||
double maxElement = Double.NEGATIVE_INFINITY;
|
||||
for( int iii = 0; iii < numHaplotypes; iii++ ) {
|
||||
|
|
|
|||
|
|
@ -353,6 +353,16 @@ public class GenotypingEngineUnitTest extends BaseTest {
|
|||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||
|
||||
// deletion + insertion (abutting)
|
||||
thisVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","A").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1702, 1702).alleles("T","GCGCGC").make();
|
||||
truthVC = new VariantContextBuilder().loc("2", 1701, 1702).alleles("AT","AGCGCGC").source("merged").make();
|
||||
mergedVC = GenotypingEngine.createMergedVariantContext(thisVC, nextVC, ref, refLoc);
|
||||
logger.warn(truthVC + " == " + mergedVC);
|
||||
Assert.assertTrue(truthVC.hasSameAllelesAs(mergedVC));
|
||||
Assert.assertEquals(truthVC.getStart(), mergedVC.getStart());
|
||||
Assert.assertEquals(truthVC.getEnd(), mergedVC.getEnd());
|
||||
|
||||
// complex + complex
|
||||
thisVC = new VariantContextBuilder().loc("2", 1703, 1704).alleles("TC","AAA").make();
|
||||
nextVC = new VariantContextBuilder().loc("2", 1706, 1707).alleles("GG","AC").make();
|
||||
|
|
|
|||
|
|
@ -20,20 +20,17 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSample() {
|
||||
HCTest(CEUTRIO_BAM, "", "7b4e76934e0c911220b4e7da8776ab2b");
|
||||
HCTest(CEUTRIO_BAM, "", "eff4c820226abafcaa058c66585198a7");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerSingleSample() {
|
||||
HCTest(NA12878_BAM, "", "fcf0cea98a571d5e2d1dfa8b5edc599d");
|
||||
HCTest(NA12878_BAM, "", "2b40b314e6e63ae165186b55b14eee41");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGA() {
|
||||
// TODO -- Ryan, do you know why the md5s changed just for the rank sum tests?
|
||||
final String RyansMd5 = "ff370c42c8b09a29f1aeff5ac57c7ea6";
|
||||
final String EricsMd5 = "d8317f4589e8e0c48bcd087cdb75ce88";
|
||||
HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", EricsMd5);
|
||||
HCTest(CEUTRIO_BAM, "-gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "553870cc4d7e66f30862f8ae5dee01ff");
|
||||
}
|
||||
|
||||
private void HCTestComplexVariants(String bam, String args, String md5) {
|
||||
|
|
@ -44,8 +41,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleComplex() {
|
||||
HCTestComplexVariants(CEUTRIO_BAM, "", "6f9fda3ea82c5696bed1d48ee90cd76b");
|
||||
HCTestComplexVariants(CEUTRIO_BAM, "", "0936c41e8f006174f7cf27d97235133e");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -29,11 +29,13 @@ import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
|
|||
import org.broadinstitute.sting.alignment.bwa.BWTFiles;
|
||||
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
|
@ -46,6 +48,7 @@ import java.util.Iterator;
|
|||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
public class AlignmentValidation extends ReadWalker<Integer,Integer> {
|
||||
/**
|
||||
* The supporting BWT index generated using BWT.
|
||||
|
|
|
|||
|
|
@ -34,11 +34,13 @@ import org.broadinstitute.sting.alignment.bwa.BWTFiles;
|
|||
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -50,6 +52,7 @@ import java.io.File;
|
|||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@WalkerName("Align")
|
||||
public class AlignmentWalker extends ReadWalker<Integer,Integer> {
|
||||
@Argument(fullName="target_reference",shortName="target_ref",doc="The reference to which reads in the source file should be aligned. Alongside this reference should sit index files " +
|
||||
|
|
|
|||
|
|
@ -30,9 +30,11 @@ import org.broadinstitute.sting.alignment.bwa.BWTFiles;
|
|||
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
@ -48,6 +50,7 @@ import java.util.TreeMap;
|
|||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
public class CountBestAlignments extends ReadWalker<Integer,Integer> {
|
||||
/**
|
||||
* The supporting BWT index generated using BWT.
|
||||
|
|
|
|||
|
|
@ -131,6 +131,12 @@ public class CommandLineGATK extends CommandLineExecutable {
|
|||
// can't close tribble index when writing
|
||||
if ( message.indexOf("Unable to close index for") != -1 )
|
||||
exitSystemWithUserError(new UserException(t.getCause().getMessage()));
|
||||
|
||||
// disk is full
|
||||
if ( message.indexOf("No space left on device") != -1 )
|
||||
exitSystemWithUserError(new UserException(t.getMessage()));
|
||||
if ( t.getCause().getMessage().indexOf("No space left on device") != -1 )
|
||||
exitSystemWithUserError(new UserException(t.getCause().getMessage()));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -3,10 +3,12 @@ package org.broadinstitute.sting.gatk.examples;
|
|||
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
|
|
@ -17,8 +19,9 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Computes the coverage per sample.
|
||||
* Computes the coverage per sample for every position (use with -L argument!).
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
public class CoverageBySample extends LocusWalker<Integer, Integer> {
|
||||
@Output
|
||||
protected PrintStream out;
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.examples;
|
|||
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -35,6 +36,7 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
|||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.DiploidGenotype;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
@ -46,6 +48,7 @@ import java.io.PrintStream;
|
|||
*
|
||||
* @author aaron
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
|
||||
public class GATKPaperGenotyper extends LocusWalker<Integer,Long> implements TreeReducible<Long> {
|
||||
// the possible diploid genotype strings
|
||||
private static enum GENOTYPE { AA, AC, AG, AT, CC, CG, CT, GG, GT, TT }
|
||||
|
|
|
|||
|
|
@ -279,7 +279,6 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
*/
|
||||
private void lazyLoadNextAlignmentContext() {
|
||||
while (nextAlignmentContext == null && readStates.hasNext()) {
|
||||
// this call will set hasExtendedEvents to true if it picks up a read with indel right before the current position on the ref:
|
||||
readStates.collectPendingReads();
|
||||
|
||||
final GenomeLoc location = getLocation();
|
||||
|
|
@ -378,7 +377,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
CigarOperator op = state.stepForwardOnGenome();
|
||||
if (op == null) {
|
||||
// we discard the read only when we are past its end AND indel at the end of the read (if any) was
|
||||
// already processed. Keeping the read state that retunred null upon stepForwardOnGenome() is safe
|
||||
// already processed. Keeping the read state that returned null upon stepForwardOnGenome() is safe
|
||||
// as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag.
|
||||
it.remove(); // we've stepped off the end of the object
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.walkers.Walker;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfile;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
|
|
@ -69,8 +70,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
for(int iii = prevLoc.getStop() + 1; iii < location.getStart(); iii++ ) {
|
||||
final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii);
|
||||
if( initialIntervals == null || initialIntervals.overlaps( fakeLoc ) ) {
|
||||
final double isActiveProb = ( walker.hasPresetActiveRegions() && walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 );
|
||||
profile.add(fakeLoc, isActiveProb);
|
||||
profile.add(fakeLoc, new ActivityProfileResult( walker.hasPresetActiveRegions() && walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -86,8 +86,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
|
||||
// Call the walkers isActive function for this locus and add them to the list to be integrated later
|
||||
if( initialIntervals == null || initialIntervals.overlaps( location ) ) {
|
||||
final double isActiveProb = walkerActiveProb(walker, tracker, refContext, locus, location);
|
||||
profile.add(location, isActiveProb);
|
||||
profile.add(location, walkerActiveProb(walker, tracker, refContext, locus, location));
|
||||
}
|
||||
|
||||
// Grab all the previously unseen reads from this pileup and add them to the massive read list
|
||||
|
|
@ -144,11 +143,11 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private final double walkerActiveProb(final ActiveRegionWalker<M,T> walker,
|
||||
private final ActivityProfileResult walkerActiveProb(final ActiveRegionWalker<M,T> walker,
|
||||
final RefMetaDataTracker tracker, final ReferenceContext refContext,
|
||||
final AlignmentContext locus, final GenomeLoc location) {
|
||||
if ( walker.hasPresetActiveRegions() ) {
|
||||
return walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0;
|
||||
return new ActivityProfileResult(walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0);
|
||||
} else {
|
||||
return walker.isActive( tracker, refContext, locus );
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
|
|
@ -72,7 +73,7 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
|
|||
}
|
||||
|
||||
// Determine probability of active status over the AlignmentContext
|
||||
public abstract double isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
|
||||
public abstract ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
|
||||
|
||||
// Map over the ActiveRegion
|
||||
public abstract MapType map(final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion, final RefMetaDataTracker metaDataTracker);
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
|||
import net.sf.picard.util.PeekableIterator;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -36,6 +37,7 @@ import org.broadinstitute.sting.utils.SampleUtils;
|
|||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||
|
||||
|
|
@ -80,6 +82,7 @@ import java.util.*;
|
|||
* @author Mauricio Carneiro, Roger Zurawicki
|
||||
* @since 5/8/12
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@By(value = DataSource.READS)
|
||||
@PartitionBy(PartitionType.INTERVAL)
|
||||
public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.diagnostics.targets;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -33,9 +34,12 @@ import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
|
|||
import org.broadinstitute.sting.gatk.walkers.PartitionBy;
|
||||
import org.broadinstitute.sting.gatk.walkers.PartitionType;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
||||
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@PartitionBy(PartitionType.CONTIG)
|
||||
@ActiveRegionExtension(extension = 0, maxRegion = 50000)
|
||||
public class FindCoveredIntervals extends ActiveRegionWalker<GenomeLoc, Long> {
|
||||
|
|
@ -44,13 +48,13 @@ public class FindCoveredIntervals extends ActiveRegionWalker<GenomeLoc, Long> {
|
|||
|
||||
@Override
|
||||
// Look to see if the region has sufficient coverage
|
||||
public double isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
|
||||
public ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
|
||||
|
||||
int depth = ThresHolder.DEFAULTS.getFilteredCoverage(context.getBasePileup());
|
||||
|
||||
// note the linear probability scale
|
||||
int coverageThreshold = 20;
|
||||
return Math.min((double) depth / coverageThreshold, 1);
|
||||
return new ActivityProfileResult(Math.min((double) depth / coverageThreshold, 1));
|
||||
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -26,17 +26,20 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.fasta;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RefWalker;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
||||
/**
|
||||
* Calculates basic statistics about the reference sequence itself
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
public class FastaStats extends RefWalker<Byte, FastaStats.FastaStatistics> {
|
||||
@Output PrintStream out;
|
||||
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.commandline.Argument;
|
|||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -42,6 +43,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
|
@ -70,6 +72,7 @@ import java.util.*;
|
|||
* </pre>
|
||||
*
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
public class CountRODs extends RodWalker<CountRODs.Datum, Pair<ExpandingArrayList<Long>, Long>> implements TreeReducible<Pair<ExpandingArrayList<Long>, Long>> {
|
||||
@Output
|
||||
public PrintStream out;
|
||||
|
|
|
|||
|
|
@ -1596,7 +1596,17 @@ public class MathUtils {
|
|||
result += v1[k].doubleValue() * v2[k].doubleValue();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public static double dotProduct(double[] v1, double[] v2) {
|
||||
if (v1.length != v2.length)
|
||||
throw new UserException("BUG: vectors v1, v2 of different size in vectorSum()");
|
||||
|
||||
double result = 0.0;
|
||||
for (int k = 0; k < v1.length; k++)
|
||||
result += v1[k] * v2[k];
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public static double[] vectorLog10(double v1[]) {
|
||||
|
|
|
|||
|
|
@ -31,7 +31,6 @@ import org.broadinstitute.sting.utils.MathUtils;
|
|||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
|
|
@ -46,15 +45,15 @@ public class ActivityProfile {
|
|||
final GenomeLocParser parser;
|
||||
final boolean presetRegions;
|
||||
GenomeLoc regionStartLoc = null;
|
||||
final List<Double> isActiveList;
|
||||
final List<ActivityProfileResult> isActiveList;
|
||||
private GenomeLoc lastLoc = null;
|
||||
private static final int FILTER_SIZE = 65;
|
||||
private static final Double[] GaussianKernel;
|
||||
private static final int FILTER_SIZE = 80;
|
||||
private static final double[] GaussianKernel;
|
||||
|
||||
static {
|
||||
GaussianKernel = new Double[2*FILTER_SIZE + 1];
|
||||
GaussianKernel = new double[2*FILTER_SIZE + 1];
|
||||
for( int iii = 0; iii < 2*FILTER_SIZE + 1; iii++ ) {
|
||||
GaussianKernel[iii] = MathUtils.NormalDistribution(FILTER_SIZE, 40.0, iii);
|
||||
GaussianKernel[iii] = MathUtils.NormalDistribution(FILTER_SIZE, 55.0, iii);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -63,22 +62,22 @@ public class ActivityProfile {
|
|||
// todo -- add unit tests
|
||||
// TODO -- own preset regions
|
||||
public ActivityProfile(final GenomeLocParser parser, final boolean presetRegions) {
|
||||
this(parser, presetRegions, new ArrayList<Double>(), null);
|
||||
this(parser, presetRegions, new ArrayList<ActivityProfileResult>(), null);
|
||||
}
|
||||
|
||||
protected ActivityProfile(final GenomeLocParser parser, final boolean presetRegions, final List<Double> isActiveList, final GenomeLoc regionStartLoc) {
|
||||
protected ActivityProfile(final GenomeLocParser parser, final boolean presetRegions, final List<ActivityProfileResult> isActiveList, final GenomeLoc regionStartLoc) {
|
||||
this.parser = parser;
|
||||
this.presetRegions = presetRegions;
|
||||
this.isActiveList = isActiveList;
|
||||
this.regionStartLoc = regionStartLoc;
|
||||
}
|
||||
|
||||
public void add(final GenomeLoc loc, final double score) {
|
||||
public void add(final GenomeLoc loc, final ActivityProfileResult result) {
|
||||
if ( loc.size() != 1 )
|
||||
throw new ReviewedStingException("Bad add call to ActivityProfile: loc " + loc + " size != 1" );
|
||||
if ( lastLoc != null && loc.getStart() != lastLoc.getStop() + 1 )
|
||||
throw new ReviewedStingException("Bad add call to ActivityProfile: lastLoc added " + lastLoc + " and next is " + loc);
|
||||
isActiveList.add(score);
|
||||
isActiveList.add(result);
|
||||
if( regionStartLoc == null ) {
|
||||
regionStartLoc = loc;
|
||||
}
|
||||
|
|
@ -93,22 +92,43 @@ public class ActivityProfile {
|
|||
* @return a new ActivityProfile that's the band-pass filtered version of this profile
|
||||
*/
|
||||
public ActivityProfile bandPassFilter() {
|
||||
final Double[] activeProbArray = isActiveList.toArray(new Double[isActiveList.size()]);
|
||||
final Double[] filteredProbArray = new Double[activeProbArray.length];
|
||||
final double[] activeProbArray = new double[isActiveList.size()];
|
||||
int iii = 0;
|
||||
for( final ActivityProfileResult result : isActiveList ) {
|
||||
activeProbArray[iii++] = result.isActiveProb;
|
||||
}
|
||||
iii = 0;
|
||||
for( final ActivityProfileResult result : isActiveList ) {
|
||||
if( result.resultState.equals(ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS) ) { // special code to deal with the problem that high quality soft clipped bases aren't added to pileups
|
||||
final int numHQClips = result.resultValue.intValue();
|
||||
for( int jjj = Math.max(0, iii - numHQClips); jjj < Math.min(activeProbArray.length, iii+numHQClips); jjj++ ) {
|
||||
activeProbArray[jjj] = Math.max(activeProbArray[jjj], activeProbArray[iii]);
|
||||
}
|
||||
}
|
||||
iii++;
|
||||
}
|
||||
final double[] filteredProbArray = new double[activeProbArray.length];
|
||||
if( !presetRegions ) {
|
||||
for( int iii = 0; iii < activeProbArray.length; iii++ ) {
|
||||
final Double[] kernel = (Double[]) ArrayUtils.subarray(GaussianKernel, Math.max(FILTER_SIZE-iii, 0), Math.min(GaussianKernel.length,FILTER_SIZE + activeProbArray.length - iii));
|
||||
final Double[] activeProbSubArray = (Double[]) ArrayUtils.subarray(activeProbArray, Math.max(0,iii - FILTER_SIZE), Math.min(activeProbArray.length,iii + FILTER_SIZE + 1));
|
||||
for( iii = 0; iii < activeProbArray.length; iii++ ) {
|
||||
final double[] kernel = ArrayUtils.subarray(GaussianKernel, Math.max(FILTER_SIZE-iii, 0), Math.min(GaussianKernel.length,FILTER_SIZE + activeProbArray.length - iii));
|
||||
final double[] activeProbSubArray = ArrayUtils.subarray(activeProbArray, Math.max(0,iii - FILTER_SIZE), Math.min(activeProbArray.length,iii + FILTER_SIZE + 1));
|
||||
filteredProbArray[iii] = MathUtils.dotProduct(activeProbSubArray, kernel);
|
||||
}
|
||||
}
|
||||
return new ActivityProfile(parser, presetRegions, Arrays.asList(filteredProbArray), regionStartLoc);
|
||||
iii = 0;
|
||||
for( final double prob : filteredProbArray ) {
|
||||
final ActivityProfileResult result = isActiveList.get(iii++);
|
||||
result.isActiveProb = prob;
|
||||
result.resultState = ActivityProfileResult.ActivityProfileResultState.NONE;
|
||||
result.resultValue = null;
|
||||
}
|
||||
return new ActivityProfile(parser, presetRegions, isActiveList, regionStartLoc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Partition this profile into active regions
|
||||
* @param activeRegionExtension
|
||||
* @return
|
||||
* @param activeRegionExtension the amount of margin overlap in the active region
|
||||
* @return the list of active regions
|
||||
*/
|
||||
public List<ActiveRegion> createActiveRegions( final int activeRegionExtension, final int maxRegionSize ) {
|
||||
final double ACTIVE_PROB_THRESHOLD = 0.002; // TODO: needs to be set-able by the walker author
|
||||
|
|
@ -119,14 +139,14 @@ public class ActivityProfile {
|
|||
return Collections.emptyList();
|
||||
} else if( isActiveList.size() == 1 ) {
|
||||
// there's a single element, it's either active or inactive
|
||||
boolean isActive = isActiveList.get(0) > ACTIVE_PROB_THRESHOLD;
|
||||
boolean isActive = isActiveList.get(0).isActiveProb > ACTIVE_PROB_THRESHOLD;
|
||||
returnList.addAll(createActiveRegion(isActive, 0, 0, activeRegionExtension, maxRegionSize));
|
||||
} else {
|
||||
// there are 2+ elements, divide these up into regions
|
||||
boolean isActive = isActiveList.get(0) > ACTIVE_PROB_THRESHOLD;
|
||||
boolean isActive = isActiveList.get(0).isActiveProb > ACTIVE_PROB_THRESHOLD;
|
||||
int curStart = 0;
|
||||
for(int iii = 1; iii < isActiveList.size(); iii++ ) {
|
||||
final boolean thisStatus = isActiveList.get(iii) > ACTIVE_PROB_THRESHOLD;
|
||||
final boolean thisStatus = isActiveList.get(iii).isActiveProb > ACTIVE_PROB_THRESHOLD;
|
||||
if( isActive != thisStatus ) {
|
||||
returnList.addAll(createActiveRegion(isActive, curStart, iii - 1, activeRegionExtension, maxRegionSize));
|
||||
isActive = thisStatus;
|
||||
|
|
@ -143,7 +163,7 @@ public class ActivityProfile {
|
|||
* @param isActive should the region be active?
|
||||
* @param curStart offset (0-based) from the start of this region
|
||||
* @param curEnd offset (0-based) from the start of this region
|
||||
* @param activeRegionExtension
|
||||
* @param activeRegionExtension the amount of margin overlap in the active region
|
||||
* @return a fully initialized ActiveRegion with the above properties
|
||||
*/
|
||||
private final List<ActiveRegion> createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize) {
|
||||
|
|
@ -160,8 +180,8 @@ public class ActivityProfile {
|
|||
int cutPoint = -1;
|
||||
|
||||
final int size = curEnd - curStart + 1;
|
||||
for( int iii = curStart + (int)(size*0.25); iii < curEnd - (int)(size*0.25); iii++ ) {
|
||||
if( isActiveList.get(iii) < minProb ) { minProb = isActiveList.get(iii); cutPoint = iii; }
|
||||
for( int iii = curStart + (int)(size*0.15); iii < curEnd - (int)(size*0.15); iii++ ) {
|
||||
if( isActiveList.get(iii).isActiveProb < minProb ) { minProb = isActiveList.get(iii).isActiveProb; cutPoint = iii; }
|
||||
}
|
||||
final List<ActiveRegion> leftList = createActiveRegion(isActive, curStart, cutPoint, activeRegionExtension, maxRegionSize, new ArrayList<ActiveRegion>());
|
||||
final List<ActiveRegion> rightList = createActiveRegion(isActive, cutPoint+1, curEnd, activeRegionExtension, maxRegionSize, new ArrayList<ActiveRegion>());
|
||||
|
|
|
|||
|
|
@ -0,0 +1,31 @@
|
|||
package org.broadinstitute.sting.utils.activeregion;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: 7/27/12
|
||||
*/
|
||||
|
||||
public class ActivityProfileResult {
|
||||
public double isActiveProb;
|
||||
public ActivityProfileResultState resultState;
|
||||
public Number resultValue;
|
||||
|
||||
public enum ActivityProfileResultState {
|
||||
NONE,
|
||||
HIGH_QUALITY_SOFT_CLIPS
|
||||
}
|
||||
|
||||
public ActivityProfileResult( final double isActiveProb ) {
|
||||
this.isActiveProb = isActiveProb;
|
||||
this.resultState = ActivityProfileResultState.NONE;
|
||||
this.resultValue = null;
|
||||
}
|
||||
|
||||
public ActivityProfileResult( final double isActiveProb, final ActivityProfileResultState resultState, final Number resultValue ) {
|
||||
this.isActiveProb = isActiveProb;
|
||||
this.resultState = resultState;
|
||||
this.resultValue = resultValue;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -30,6 +30,7 @@ import net.sf.samtools.CigarElement;
|
|||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -405,6 +406,40 @@ public class AlignmentUtils {
|
|||
return alignment;
|
||||
}
|
||||
|
||||
public static int calcNumHighQualitySoftClips( final GATKSAMRecord read, final byte qualThreshold ) {
|
||||
|
||||
int numHQSoftClips = 0;
|
||||
int alignPos = 0;
|
||||
final Cigar cigar = read.getCigar();
|
||||
final byte[] qual = read.getBaseQualities( EventType.BASE_SUBSTITUTION );
|
||||
|
||||
for( int iii = 0; iii < cigar.numCigarElements(); iii++ ) {
|
||||
|
||||
final CigarElement ce = cigar.getCigarElement(iii);
|
||||
final int elementLength = ce.getLength();
|
||||
|
||||
switch( ce.getOperator() ) {
|
||||
case S:
|
||||
for( int jjj = 0; jjj < elementLength; jjj++ ) {
|
||||
if( qual[alignPos++] > qualThreshold ) { numHQSoftClips++; }
|
||||
}
|
||||
break;
|
||||
case M:
|
||||
case I:
|
||||
alignPos += elementLength;
|
||||
break;
|
||||
case H:
|
||||
case P:
|
||||
case D:
|
||||
case N:
|
||||
break;
|
||||
default:
|
||||
throw new ReviewedStingException("Unsupported cigar operator: " + ce.getOperator());
|
||||
}
|
||||
}
|
||||
return numHQSoftClips;
|
||||
}
|
||||
|
||||
public static int calcAlignmentByteArrayOffset(final Cigar cigar, final PileupElement pileupElement, final int alignmentStart, final int refLocus) {
|
||||
return calcAlignmentByteArrayOffset( cigar, pileupElement.getOffset(), pileupElement.isInsertionAtBeginningOfRead(), pileupElement.isDeletion(), alignmentStart, refLocus );
|
||||
}
|
||||
|
|
|
|||
|
|
@ -123,12 +123,12 @@ public class ActivityProfileUnitTest extends BaseTest {
|
|||
for ( int i = 0; i < cfg.probs.size(); i++ ) {
|
||||
double p = cfg.probs.get(i);
|
||||
GenomeLoc loc = genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart() + i, cfg.regionStart.getStart() + i);
|
||||
profile.add(loc, p);
|
||||
profile.add(loc, new ActivityProfileResult(p));
|
||||
}
|
||||
Assert.assertEquals(profile.regionStartLoc, genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart(), cfg.regionStart.getStart() ));
|
||||
|
||||
Assert.assertEquals(profile.size(), cfg.probs.size());
|
||||
Assert.assertEquals(profile.isActiveList, cfg.probs);
|
||||
assertProbsAreEqual(profile.isActiveList, cfg.probs);
|
||||
|
||||
assertRegionsAreEqual(profile.createActiveRegions(0, 100), cfg.expectedRegions);
|
||||
}
|
||||
|
|
@ -140,5 +140,12 @@ public class ActivityProfileUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
private void assertProbsAreEqual(List<ActivityProfileResult> actual, List<Double> expected) {
|
||||
Assert.assertEquals(actual.size(), expected.size());
|
||||
for ( int i = 0; i < actual.size(); i++ ) {
|
||||
Assert.assertEquals(actual.get(i).isActiveProb, expected.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
// todo -- test extensions
|
||||
}
|
||||
Loading…
Reference in New Issue