Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
e94fcf537b
|
|
@ -138,7 +138,7 @@ public class AlignmentContext implements HasGenomeLocation {
|
|||
* @return
|
||||
*/
|
||||
public boolean hasReads() {
|
||||
return basePileup != null && basePileup.size() > 0 ;
|
||||
return basePileup != null && basePileup.getNumberOfElements() > 0 ;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -146,7 +146,7 @@ public class AlignmentContext implements HasGenomeLocation {
|
|||
* @return
|
||||
*/
|
||||
public int size() {
|
||||
return basePileup.size();
|
||||
return basePileup.getNumberOfElements();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ public class AlignmentContextUtils {
|
|||
ReadBackedPileup pileupBySample = context.getPileup().getPileupForSample(sample);
|
||||
|
||||
// Don't add empty pileups to the split context.
|
||||
if(pileupBySample.size() == 0)
|
||||
if(pileupBySample.getNumberOfElements() == 0)
|
||||
continue;
|
||||
|
||||
if(sample != null)
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
*/
|
||||
@By(DataSource.READS)
|
||||
@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES})
|
||||
@PartitionBy(PartitionType.INTERVAL)
|
||||
@PartitionBy(PartitionType.LOCUS)
|
||||
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
|
||||
public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
||||
// Do we actually want to operate on the context?
|
||||
|
|
|
|||
|
|
@ -34,6 +34,12 @@ public enum PartitionType {
|
|||
*/
|
||||
NONE,
|
||||
|
||||
/**
|
||||
* The walker inputs can be chunked down to individual
|
||||
* reads.
|
||||
*/
|
||||
READ,
|
||||
|
||||
/**
|
||||
* The walker inputs can be chunked down to the
|
||||
* per-locus level.
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
|||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE_BASES})
|
||||
@PartitionBy(PartitionType.CONTIG)
|
||||
@PartitionBy(PartitionType.READ)
|
||||
public abstract class ReadWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
||||
public boolean requiresOrderedReads() { return false; }
|
||||
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ public class AlleleBalance extends InfoFieldAnnotation {
|
|||
continue;
|
||||
}
|
||||
// todo -- actually care about indel length from the pileup (agnostic at the moment)
|
||||
int refCount = indelPileup.size();
|
||||
int refCount = indelPileup.getNumberOfElements();
|
||||
int altCount = vc.isSimpleInsertion() ? indelPileup.getNumberOfInsertions() : indelPileup.getNumberOfDeletions();
|
||||
|
||||
if ( refCount + altCount == 0 ) {
|
||||
|
|
|
|||
|
|
@ -1,32 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Abstract base class for all annotations that are normalized by depth
|
||||
*/
|
||||
public abstract class AnnotationByDepth extends InfoFieldAnnotation {
|
||||
|
||||
|
||||
protected int annotationByVariantDepth(final Map<String, Genotype> genotypes, Map<String, AlignmentContext> stratifiedContexts) {
|
||||
int depth = 0;
|
||||
for ( Map.Entry<String, Genotype> genotype : genotypes.entrySet() ) {
|
||||
|
||||
// we care only about variant calls
|
||||
if ( genotype.getValue().isHomRef() )
|
||||
continue;
|
||||
|
||||
AlignmentContext context = stratifiedContexts.get(genotype.getKey());
|
||||
if ( context != null )
|
||||
depth += context.size();
|
||||
}
|
||||
|
||||
return depth;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -41,7 +41,7 @@ public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnno
|
|||
|
||||
int depth = 0;
|
||||
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() )
|
||||
depth += sample.getValue().size();
|
||||
depth += sample.getValue().getBasePileup().depthOfCoverage();
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%d", depth));
|
||||
return map;
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Hidden;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
|
|
@ -21,7 +21,7 @@ import java.util.Map;
|
|||
*
|
||||
* Low scores are indicative of false positive calls and artifacts.
|
||||
*/
|
||||
public class QualByDepth extends AnnotationByDepth implements StandardAnnotation {
|
||||
public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
|
|
@ -43,14 +43,13 @@ public class QualByDepth extends AnnotationByDepth implements StandardAnnotation
|
|||
if ( context == null )
|
||||
continue;
|
||||
|
||||
depth += context.size();
|
||||
depth += context.getBasePileup().depthOfCoverage();
|
||||
}
|
||||
|
||||
if ( depth == 0 )
|
||||
return null;
|
||||
|
||||
int qDepth = annotationByVariantDepth(genotypes, stratifiedContexts);
|
||||
double QD = 10.0 * vc.getNegLog10PError() / (double)qDepth;
|
||||
double QD = 10.0 * vc.getNegLog10PError() / (double)depth;
|
||||
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%.2f", QD));
|
||||
|
|
|
|||
|
|
@ -79,7 +79,7 @@ public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation {
|
|||
alleleCounts.put(allele.getBases()[0], 0);
|
||||
|
||||
ReadBackedPileup pileup = stratifiedContext.getBasePileup();
|
||||
int totalDepth = pileup.size();
|
||||
int totalDepth = pileup.getNumberOfElements();
|
||||
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), totalDepth); // put total depth in right away
|
||||
|
|
@ -119,7 +119,7 @@ public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation {
|
|||
ReadBackedExtendedEventPileup pileup = stratifiedContext.getExtendedEventPileup();
|
||||
if ( pileup == null )
|
||||
return null;
|
||||
int totalDepth = pileup.size();
|
||||
int totalDepth = pileup.getNumberOfElements();
|
||||
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), totalDepth); // put total depth in right away
|
||||
|
|
|
|||
|
|
@ -1,59 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* SB annotation value by depth of alt containing samples
|
||||
*/
|
||||
public class SBByDepth extends AnnotationByDepth {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
if (!vc.hasAttribute(VCFConstants.STRAND_BIAS_KEY))
|
||||
return null;
|
||||
|
||||
double sBias = vc.getAttributeAsDouble(VCFConstants.STRAND_BIAS_KEY, -1);
|
||||
|
||||
final Map<String, Genotype> genotypes = vc.getGenotypes();
|
||||
if ( genotypes == null || genotypes.size() == 0 )
|
||||
return null;
|
||||
|
||||
int sDepth = annotationByVariantDepth(genotypes, stratifiedContexts);
|
||||
if ( sDepth == 0 )
|
||||
return null;
|
||||
|
||||
|
||||
|
||||
double SbyD = (-sBias / (double)sDepth);
|
||||
if (SbyD > 0)
|
||||
SbyD = Math.log10(SbyD);
|
||||
else
|
||||
SbyD = -1000;
|
||||
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%.2f", SbyD));
|
||||
return map;
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() { return Arrays.asList("SBD"); }
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Strand Bias by Depth")); }
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -43,7 +43,7 @@ public class SpanningDeletions extends InfoFieldAnnotation implements StandardAn
|
|||
|
||||
if (pileup != null) {
|
||||
deletions += pileup.getNumberOfDeletions();
|
||||
depth += pileup.size();
|
||||
depth += pileup.getNumberOfElements();
|
||||
}
|
||||
}
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
|
|
|
|||
|
|
@ -113,6 +113,7 @@ import java.util.*;
|
|||
// todo -- allow for user to set linear binning (default is logarithmic)
|
||||
// todo -- formatting --> do something special for end bins in getQuantile(int[] foo), this gets mushed into the end+-1 bins for now
|
||||
@By(DataSource.REFERENCE)
|
||||
@PartitionBy(PartitionType.INTERVAL)
|
||||
public class DepthOfCoverageWalker extends LocusWalker<Map<DoCOutputType.Partition,Map<String,int[]>>, CoveragePartitioner> implements TreeReducible<CoveragePartitioner> {
|
||||
@Output
|
||||
@Multiplex(value=DoCOutputMultiplexer.class,arguments={"partitionTypes","refSeqGeneList","omitDepthOutput","omitIntervals","omitSampleSummary","omitLocusTable"})
|
||||
|
|
|
|||
|
|
@ -278,10 +278,10 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
|
|||
|
||||
if ( elt.isReducedRead() ) {
|
||||
// reduced read representation
|
||||
byte qual = elt.getReducedQual();
|
||||
byte qual = elt.getQual();
|
||||
if ( BaseUtils.isRegularBase( elt.getBase() )) {
|
||||
add(obsBase, qual, (byte)0, (byte)0, elt.getReducedCount()); // fast calculation of n identical likelihoods
|
||||
return elt.getReducedCount(); // we added nObs bases here
|
||||
add(obsBase, qual, (byte)0, (byte)0, elt.getRepresentativeCount()); // fast calculation of n identical likelihoods
|
||||
return elt.getRepresentativeCount(); // we added nObs bases here
|
||||
} else // odd bases or deletions => don't use them
|
||||
return 0;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -569,7 +569,7 @@ public class UnifiedGenotyperEngine {
|
|||
ReadBackedPileup pileup = rawContext.getBasePileup() .getMappingFilteredPileup(UAC.MIN_MAPPING_QUALTY_SCORE);
|
||||
|
||||
// don't call when there is no coverage
|
||||
if ( pileup.size() == 0 && UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES )
|
||||
if ( pileup.getNumberOfElements() == 0 && UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES )
|
||||
return null;
|
||||
|
||||
// stratify the AlignmentContext and cut by sample
|
||||
|
|
@ -586,7 +586,7 @@ public class UnifiedGenotyperEngine {
|
|||
ReadBackedExtendedEventPileup pileup = rawPileup.getMappingFilteredPileup(UAC.MIN_MAPPING_QUALTY_SCORE);
|
||||
|
||||
// don't call when there is no coverage
|
||||
if ( pileup.size() == 0 && UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES )
|
||||
if ( pileup.getNumberOfElements() == 0 && UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES )
|
||||
return null;
|
||||
|
||||
// stratify the AlignmentContext and cut by sample
|
||||
|
|
@ -602,7 +602,7 @@ public class UnifiedGenotyperEngine {
|
|||
for( final PileupElement p : rawContext.getBasePileup() ) {
|
||||
if( p.isDeletion() ) { numDeletions++; }
|
||||
}
|
||||
if( ((double) numDeletions) / ((double) rawContext.getBasePileup().size()) > UAC.MAX_DELETION_FRACTION ) {
|
||||
if( ((double) numDeletions) / ((double) rawContext.getBasePileup().getNumberOfElements()) > UAC.MAX_DELETION_FRACTION ) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
@ -649,9 +649,9 @@ public class UnifiedGenotyperEngine {
|
|||
if (isCovered) {
|
||||
AlignmentContext context = contexts.get(sample);
|
||||
if (context.hasBasePileup())
|
||||
depth = context.getBasePileup().size();
|
||||
depth = context.getBasePileup().depthOfCoverage();
|
||||
else if (context.hasExtendedEventPileup())
|
||||
depth = context.getExtendedEventPileup().size();
|
||||
depth = context.getExtendedEventPileup().depthOfCoverage();
|
||||
}
|
||||
|
||||
P_of_ref *= 1.0 - (theta / 2.0) * getRefBinomialProb(depth);
|
||||
|
|
|
|||
|
|
@ -32,17 +32,11 @@ import net.sf.samtools.SAMRecord;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileWriter;
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
|
|
@ -376,8 +370,8 @@ public class PairHMMIndelErrorModel {
|
|||
HashMap<PileupElement, LinkedHashMap<Allele,Double>> indelLikelihoodMap){
|
||||
|
||||
int numHaplotypes = haplotypeMap.size();
|
||||
final double readLikelihoods[][] = new double[pileup.size()][numHaplotypes];
|
||||
final int readCounts[] = new int[pileup.size()];
|
||||
final double readLikelihoods[][] = new double[pileup.getNumberOfElements()][numHaplotypes];
|
||||
final int readCounts[] = new int[pileup.getNumberOfElements()];
|
||||
int readIdx=0;
|
||||
|
||||
LinkedHashMap<Allele,double[]> gapOpenProbabilityMap = new LinkedHashMap<Allele,double[]>();
|
||||
|
|
@ -403,8 +397,7 @@ public class PairHMMIndelErrorModel {
|
|||
|
||||
for (PileupElement p: pileup) {
|
||||
// > 1 when the read is a consensus read representing multiple independent observations
|
||||
final boolean isReduced = p.isReducedRead();
|
||||
readCounts[readIdx] = isReduced ? p.getReducedCount() : 1;
|
||||
readCounts[readIdx] = p.getRepresentativeCount();
|
||||
|
||||
// check if we've already computed likelihoods for this pileup element (i.e. for this read at this location)
|
||||
if (indelLikelihoodMap.containsKey(p)) {
|
||||
|
|
@ -607,7 +600,7 @@ public class PairHMMIndelErrorModel {
|
|||
|
||||
if (DEBUG) {
|
||||
System.out.println("\nLikelihood summary");
|
||||
for (readIdx=0; readIdx < pileup.size(); readIdx++) {
|
||||
for (readIdx=0; readIdx < pileup.getNumberOfElements(); readIdx++) {
|
||||
System.out.format("Read Index: %d ",readIdx);
|
||||
for (int i=0; i < readLikelihoods[readIdx].length; i++)
|
||||
System.out.format("L%d: %f ",i,readLikelihoods[readIdx][i]);
|
||||
|
|
|
|||
|
|
@ -228,7 +228,7 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
|
|||
// make sure we're supposed to look for high entropy
|
||||
if ( mismatchThreshold > 0.0 &&
|
||||
mismatchThreshold <= 1.0 &&
|
||||
pileup.size() >= minReadsAtLocus &&
|
||||
pileup.getNumberOfElements() >= minReadsAtLocus &&
|
||||
(double)mismatchQualities / (double)totalQualities >= mismatchThreshold )
|
||||
hasPointEvent = true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -258,10 +258,10 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
|
|||
|
||||
int numReads = 0;
|
||||
if (context.hasBasePileup()) {
|
||||
numReads = context.getBasePileup().size();
|
||||
numReads = context.getBasePileup().getNumberOfElements();
|
||||
}
|
||||
else if (context.hasExtendedEventPileup()) {
|
||||
numReads = context.getExtendedEventPileup().size();
|
||||
numReads = context.getExtendedEventPileup().getNumberOfElements();
|
||||
}
|
||||
PhasingStats addInPhaseStats = new PhasingStats(numReads, 1);
|
||||
phaseStats.addIn(addInPhaseStats);
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@ public class ValidatingPileupWalker extends LocusWalker<Integer, ValidationStats
|
|||
}
|
||||
}
|
||||
|
||||
return pileup.size();
|
||||
return pileup.getNumberOfElements();
|
||||
}
|
||||
|
||||
private static String maybeSorted( final String x, boolean sortMe )
|
||||
|
|
@ -94,7 +94,7 @@ public class ValidatingPileupWalker extends LocusWalker<Integer, ValidationStats
|
|||
|
||||
public String pileupDiff(final ReadBackedPileup a, final SAMPileupFeature b, boolean orderDependent)
|
||||
{
|
||||
if ( a.size() != b.size() )
|
||||
if ( a.getNumberOfElements() != b.size() )
|
||||
return "Sizes not equal";
|
||||
GenomeLoc featureLocation = getToolkit().getGenomeLocParser().createGenomeLoc(b.getChr(),b.getStart(),b.getEnd());
|
||||
if ( a.getLocation().compareTo(featureLocation) != 0 )
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@ import java.util.*;
|
|||
*
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* A table delimited file containing the values of the requested fields in the VCF file
|
||||
* A tab-delimited file containing the values of the requested fields in the VCF file
|
||||
* </p>
|
||||
*
|
||||
* <h2>Examples</h2>
|
||||
|
|
@ -106,7 +106,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
|||
|
||||
/**
|
||||
* By default this tool only emits values for fields where the FILTER field is either PASS or . (unfiltered).
|
||||
* Throwing this flag will cause $WalkerName to emit values regardless of the FILTER field value.
|
||||
* Throwing this flag will cause VariantsToTable to emit values regardless of the FILTER field value.
|
||||
*/
|
||||
@Advanced
|
||||
@Argument(fullName="showFiltered", shortName="raw", doc="If provided, field values from filtered records will be included in the output", required=false)
|
||||
|
|
|
|||
|
|
@ -5,6 +5,10 @@ import com.google.java.contract.Requires;
|
|||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -174,6 +178,8 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenome
|
|||
return new GenomeLoc[] { new GenomeLoc(getContig(),contigIndex,getStart(),splitPoint-1), new GenomeLoc(getContig(),contigIndex,splitPoint,getStop()) };
|
||||
}
|
||||
|
||||
public GenomeLoc union( GenomeLoc that ) { return merge(that); }
|
||||
|
||||
@Requires("that != null")
|
||||
@Ensures("result != null")
|
||||
public GenomeLoc intersect( GenomeLoc that ) throws ReviewedStingException {
|
||||
|
|
@ -192,6 +198,79 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenome
|
|||
Math.min( getStop(), that.getStop()) );
|
||||
}
|
||||
|
||||
@Requires("that != null")
|
||||
public final List<GenomeLoc> subtract( final GenomeLoc that ) {
|
||||
if(GenomeLoc.isUnmapped(this) || GenomeLoc.isUnmapped(that)) {
|
||||
if(! GenomeLoc.isUnmapped(this) || !GenomeLoc.isUnmapped(that))
|
||||
throw new ReviewedStingException("Tried to intersect a mapped and an unmapped genome loc");
|
||||
return Arrays.asList(UNMAPPED);
|
||||
}
|
||||
|
||||
if (!(this.overlapsP(that))) {
|
||||
throw new ReviewedStingException("GenomeLoc::minus(): The two genome loc's need to overlap");
|
||||
}
|
||||
|
||||
if (equals(that)) {
|
||||
return Collections.emptyList();
|
||||
} else if (containsP(that)) {
|
||||
List<GenomeLoc> l = new ArrayList<GenomeLoc>(2);
|
||||
|
||||
/**
|
||||
* we have to create two new region, one for the before part, one for the after
|
||||
* The old region:
|
||||
* |----------------- old region (g) -------------|
|
||||
* |----- to delete (e) ------|
|
||||
*
|
||||
* product (two new regions):
|
||||
* |------| + |--------|
|
||||
*
|
||||
*/
|
||||
int afterStop = this.getStop(), afterStart = that.getStop() + 1;
|
||||
int beforeStop = that.getStart() - 1, beforeStart = this.getStart();
|
||||
if (afterStop - afterStart >= 0) {
|
||||
GenomeLoc after = new GenomeLoc(this.getContig(), getContigIndex(), afterStart, afterStop);
|
||||
l.add(after);
|
||||
}
|
||||
if (beforeStop - beforeStart >= 0) {
|
||||
GenomeLoc before = new GenomeLoc(this.getContig(), getContigIndex(), beforeStart, beforeStop);
|
||||
l.add(before);
|
||||
}
|
||||
|
||||
return l;
|
||||
} else if (that.containsP(this)) {
|
||||
/**
|
||||
* e completely contains g, delete g, but keep looking, there may be more regions
|
||||
* i.e.:
|
||||
* |--------------------- e --------------------|
|
||||
* |--- g ---| |---- others ----|
|
||||
*/
|
||||
return Collections.emptyList(); // don't need to do anything
|
||||
} else {
|
||||
/**
|
||||
* otherwise e overlaps some part of g
|
||||
*
|
||||
* figure out which region occurs first on the genome. I.e., is it:
|
||||
* |------------- g ----------|
|
||||
* |------------- e ----------|
|
||||
*
|
||||
* or:
|
||||
* |------------- g ----------|
|
||||
* |------------ e -----------|
|
||||
*
|
||||
*/
|
||||
|
||||
GenomeLoc n;
|
||||
if (that.getStart() < this.getStart()) {
|
||||
n = new GenomeLoc(this.getContig(), getContigIndex(), that.getStop() + 1, this.getStop());
|
||||
} else {
|
||||
n = new GenomeLoc(this.getContig(), getContigIndex(), this.getStart(), that.getStart() - 1);
|
||||
}
|
||||
|
||||
// replace g with the new region
|
||||
return Arrays.asList(n);
|
||||
}
|
||||
}
|
||||
|
||||
@Requires("that != null")
|
||||
public final boolean containsP(GenomeLoc that) {
|
||||
return onSameContig(that) && getStart() <= that.getStart() && getStop() >= that.getStop();
|
||||
|
|
@ -203,19 +282,14 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenome
|
|||
}
|
||||
|
||||
@Requires("that != null")
|
||||
public final int minus( final GenomeLoc that ) {
|
||||
@Ensures("result >= 0")
|
||||
public final int distance( final GenomeLoc that ) {
|
||||
if ( this.onSameContig(that) )
|
||||
return this.getStart() - that.getStart();
|
||||
return Math.abs(this.getStart() - that.getStart());
|
||||
else
|
||||
return Integer.MAX_VALUE;
|
||||
}
|
||||
|
||||
@Requires("that != null")
|
||||
@Ensures("result >= 0")
|
||||
public final int distance( final GenomeLoc that ) {
|
||||
return Math.abs(minus(that));
|
||||
}
|
||||
|
||||
@Requires({"left != null", "right != null"})
|
||||
public final boolean isBetween( final GenomeLoc left, final GenomeLoc right ) {
|
||||
return this.compareTo(left) > -1 && this.compareTo(right) < 1;
|
||||
|
|
|
|||
|
|
@ -215,7 +215,7 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
|||
|
||||
if ( p.overlapsP(e) ) {
|
||||
toProcess.pop();
|
||||
for ( GenomeLoc newP : subtractRegion(p, e) )
|
||||
for ( GenomeLoc newP : p.subtract(e) )
|
||||
toProcess.push(newP);
|
||||
} else if ( p.compareContigs(e) < 0 ) {
|
||||
good.add(toProcess.pop()); // p is now good
|
||||
|
|
@ -236,69 +236,6 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
|||
return createSetFromList(genomeLocParser,good);
|
||||
}
|
||||
|
||||
private static final List<GenomeLoc> EMPTY_LIST = new ArrayList<GenomeLoc>();
|
||||
private List<GenomeLoc> subtractRegion(GenomeLoc g, GenomeLoc e) {
|
||||
if (g.equals(e)) {
|
||||
return EMPTY_LIST;
|
||||
} else if (g.containsP(e)) {
|
||||
List<GenomeLoc> l = new ArrayList<GenomeLoc>();
|
||||
|
||||
/**
|
||||
* we have to create two new region, one for the before part, one for the after
|
||||
* The old region:
|
||||
* |----------------- old region (g) -------------|
|
||||
* |----- to delete (e) ------|
|
||||
*
|
||||
* product (two new regions):
|
||||
* |------| + |--------|
|
||||
*
|
||||
*/
|
||||
int afterStop = g.getStop(), afterStart = e.getStop() + 1;
|
||||
int beforeStop = e.getStart() - 1, beforeStart = g.getStart();
|
||||
if (afterStop - afterStart >= 0) {
|
||||
GenomeLoc after = genomeLocParser.createGenomeLoc(g.getContig(), afterStart, afterStop);
|
||||
l.add(after);
|
||||
}
|
||||
if (beforeStop - beforeStart >= 0) {
|
||||
GenomeLoc before = genomeLocParser.createGenomeLoc(g.getContig(), beforeStart, beforeStop);
|
||||
l.add(before);
|
||||
}
|
||||
|
||||
return l;
|
||||
} else if (e.containsP(g)) {
|
||||
/**
|
||||
* e completely contains g, delete g, but keep looking, there may be more regions
|
||||
* i.e.:
|
||||
* |--------------------- e --------------------|
|
||||
* |--- g ---| |---- others ----|
|
||||
*/
|
||||
return EMPTY_LIST; // don't need to do anything
|
||||
} else {
|
||||
/**
|
||||
* otherwise e overlaps some part of g
|
||||
*
|
||||
* figure out which region occurs first on the genome. I.e., is it:
|
||||
* |------------- g ----------|
|
||||
* |------------- e ----------|
|
||||
*
|
||||
* or:
|
||||
* |------------- g ----------|
|
||||
* |------------ e -----------|
|
||||
*
|
||||
*/
|
||||
|
||||
GenomeLoc n;
|
||||
if (e.getStart() < g.getStart()) {
|
||||
n = genomeLocParser.createGenomeLoc(g.getContig(), e.getStop() + 1, g.getStop());
|
||||
} else {
|
||||
n = genomeLocParser.createGenomeLoc(g.getContig(), g.getStart(), e.getStart() - 1);
|
||||
}
|
||||
|
||||
// replace g with the new region
|
||||
return Arrays.asList(n);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* a simple removal of an interval contained in this list. The interval must be identical to one in the list (no partial locations or overlapping)
|
||||
|
|
|
|||
|
|
@ -408,12 +408,12 @@ public class MathUtils {
|
|||
return Math.sqrt(rms);
|
||||
}
|
||||
|
||||
public static double rms(Collection<Double> l) {
|
||||
public static double rms(Collection<Integer> l) {
|
||||
if (l.size() == 0)
|
||||
return 0.0;
|
||||
|
||||
double rms = 0.0;
|
||||
for (Double i : l)
|
||||
for (int i : l)
|
||||
rms += i*i;
|
||||
rms /= l.size();
|
||||
return Math.sqrt(rms);
|
||||
|
|
|
|||
|
|
@ -586,6 +586,12 @@ public class Utils {
|
|||
return rcbases;
|
||||
}
|
||||
|
||||
static public final <T> List<T> reverse(final List<T> l) {
|
||||
final List<T> newL = new ArrayList<T>(l);
|
||||
Collections.reverse(newL);
|
||||
return newL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reverse an int array of bases
|
||||
*
|
||||
|
|
|
|||
|
|
@ -113,7 +113,7 @@ public class FragmentUtils {
|
|||
}
|
||||
|
||||
public final static FragmentCollection<PileupElement> create(ReadBackedPileup rbp) {
|
||||
return create(rbp, rbp.size(), PileupElementGetter);
|
||||
return create(rbp, rbp.getNumberOfElements(), PileupElementGetter);
|
||||
}
|
||||
|
||||
public final static FragmentCollection<SAMRecord> create(List<SAMRecord> reads) {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
package org.broadinstitute.sting.utils.interval;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.picard.util.Interval;
|
||||
import net.sf.picard.util.IntervalList;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
|
|
@ -8,6 +10,7 @@ import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -221,6 +224,44 @@ public class IntervalUtils {
|
|||
return GenomeLocSortedSet.createSetFromList(parser,intervals);
|
||||
}
|
||||
|
||||
/**
|
||||
* computes whether the test interval list is equivalent to master. To be equivalent, test must
|
||||
* contain GenomeLocs covering every base in master, exactly once. Note that this algorithm
|
||||
* assumes that master genomelocs are all discontiguous (i.e., we don't have locs like 1-3 and 4-6 but
|
||||
* rather just 1-6). In order to use this algorithm with contiguous genomelocs first merge them. The algorithm
|
||||
* doesn't assume that test has discontinuous genomelocs.
|
||||
*
|
||||
* Returns a null string if there are no differences, otherwise returns a string describing the difference
|
||||
* (useful for UnitTests). Assumes both lists are sorted
|
||||
*/
|
||||
public static final String equateIntervals(List<GenomeLoc> masterArg, List<GenomeLoc> testArg) {
|
||||
LinkedList<GenomeLoc> master = new LinkedList<GenomeLoc>(masterArg);
|
||||
LinkedList<GenomeLoc> test = new LinkedList<GenomeLoc>(testArg);
|
||||
|
||||
while ( ! master.isEmpty() ) { // there's still unchecked bases in master
|
||||
final GenomeLoc masterHead = master.pop();
|
||||
final GenomeLoc testHead = test.pop();
|
||||
|
||||
if ( testHead.overlapsP(masterHead) ) {
|
||||
// remove the parts of test that overlap master, and push the remaining
|
||||
// parts onto master for further comparison.
|
||||
for ( final GenomeLoc masterPart : Utils.reverse(masterHead.subtract(testHead)) ) {
|
||||
master.push(masterPart);
|
||||
}
|
||||
} else {
|
||||
// testHead is incompatible with masterHead, so we must have extra bases in testHead
|
||||
// that aren't in master
|
||||
return "Incompatible locs detected masterHead=" + masterHead + ", testHead=" + testHead;
|
||||
}
|
||||
}
|
||||
|
||||
if ( test.isEmpty() ) // everything is equal
|
||||
return null; // no differences
|
||||
else
|
||||
return "Remaining elements found in test: first=" + test.peek();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Check if string argument was intented as a file
|
||||
* Accepted file extensions: .bed .list, .picard, .interval_list, .intervals.
|
||||
|
|
@ -384,6 +425,92 @@ public class IntervalUtils {
|
|||
return splitIntervalsToSubLists(locs, splitPoints);
|
||||
}
|
||||
|
||||
@Requires({"locs != null", "numParts > 0"})
|
||||
@Ensures("result != null")
|
||||
public static List<List<GenomeLoc>> splitLocusIntervals(List<GenomeLoc> locs, int numParts) {
|
||||
// the ideal size of each split
|
||||
final long bp = IntervalUtils.intervalSize(locs);
|
||||
final long idealSplitSize = Math.max((long)Math.floor(bp / (1.0*numParts)), 1);
|
||||
|
||||
// algorithm:
|
||||
// split = ()
|
||||
// set size = 0
|
||||
// pop the head H off locs.
|
||||
// If size + size(H) < splitSize:
|
||||
// add H to split, continue
|
||||
// If size + size(H) == splitSize:
|
||||
// done with split, put in splits, restart
|
||||
// if size + size(H) > splitSize:
|
||||
// cut H into two pieces, first of which has splitSize - size bp
|
||||
// push both pieces onto locs, continue
|
||||
// The last split is special -- when you have only one split left, it gets all of the remaining locs
|
||||
// to deal with rounding issues
|
||||
final List<List<GenomeLoc>> splits = new ArrayList<List<GenomeLoc>>(numParts);
|
||||
|
||||
LinkedList<GenomeLoc> locsLinkedList = new LinkedList<GenomeLoc>(locs);
|
||||
while ( ! locsLinkedList.isEmpty() ) {
|
||||
if ( splits.size() + 1 == numParts ) {
|
||||
// the last one gets all of the remaining parts
|
||||
splits.add(new ArrayList<GenomeLoc>(locsLinkedList));
|
||||
locsLinkedList.clear();
|
||||
} else {
|
||||
final SplitLocusRecursive one = splitLocusIntervals1(locsLinkedList, idealSplitSize);
|
||||
splits.add(one.split);
|
||||
locsLinkedList = one.remaining;
|
||||
}
|
||||
}
|
||||
|
||||
return splits;
|
||||
}
|
||||
|
||||
@Requires({"remaining != null", "!remaining.isEmpty()", "idealSplitSize > 0"})
|
||||
@Ensures({"result != null"})
|
||||
final static SplitLocusRecursive splitLocusIntervals1(LinkedList<GenomeLoc> remaining, long idealSplitSize) {
|
||||
final List<GenomeLoc> split = new ArrayList<GenomeLoc>();
|
||||
long size = 0;
|
||||
|
||||
while ( ! remaining.isEmpty() ) {
|
||||
GenomeLoc head = remaining.pop();
|
||||
final long newSize = size + head.size();
|
||||
|
||||
if ( newSize == idealSplitSize ) {
|
||||
split.add(head);
|
||||
break; // we are done
|
||||
} else if ( newSize > idealSplitSize ) {
|
||||
final long remainingBp = idealSplitSize - size;
|
||||
final long cutPoint = head.getStart() + remainingBp;
|
||||
GenomeLoc[] parts = head.split((int)cutPoint);
|
||||
remaining.push(parts[1]);
|
||||
remaining.push(parts[0]);
|
||||
// when we go around, head.size' = idealSplitSize - size
|
||||
// so newSize' = splitSize + head.size' = size + (idealSplitSize - size) = idealSplitSize
|
||||
} else {
|
||||
split.add(head);
|
||||
size = newSize;
|
||||
}
|
||||
}
|
||||
|
||||
return new SplitLocusRecursive(split, remaining);
|
||||
}
|
||||
|
||||
private final static class SplitLocusRecursive {
|
||||
final List<GenomeLoc> split;
|
||||
final LinkedList<GenomeLoc> remaining;
|
||||
|
||||
@Requires({"split != null", "remaining != null"})
|
||||
private SplitLocusRecursive(final List<GenomeLoc> split, final LinkedList<GenomeLoc> remaining) {
|
||||
this.split = split;
|
||||
this.remaining = remaining;
|
||||
}
|
||||
}
|
||||
|
||||
public static List<GenomeLoc> flattenSplitIntervals(List<List<GenomeLoc>> splits) {
|
||||
final List<GenomeLoc> locs = new ArrayList<GenomeLoc>();
|
||||
for ( final List<GenomeLoc> split : splits )
|
||||
locs.addAll(split);
|
||||
return locs;
|
||||
}
|
||||
|
||||
private static void addFixedSplit(List<Integer> splitPoints, List<GenomeLoc> locs, long locsSize, int startIndex, int stopIndex, int numParts) {
|
||||
if (numParts < 2)
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
protected final PileupElementTracker<PE> pileupElementTracker;
|
||||
|
||||
protected int size = 0; // cached value of the size of the pileup
|
||||
protected int abstractSize = -1; // cached value of the abstract size of the pileup
|
||||
protected int nDeletions = 0; // cached value of the number of deletions
|
||||
protected int nMQ0Reads = 0; // cached value of the number of MQ0 reads
|
||||
|
||||
|
|
@ -145,8 +146,16 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
}
|
||||
}
|
||||
|
||||
protected void calculateAbstractSize() {
|
||||
abstractSize = 0;
|
||||
for ( PileupElement p : pileupElementTracker ) {
|
||||
abstractSize += p.getRepresentativeCount();
|
||||
}
|
||||
}
|
||||
|
||||
protected void addPileupToCumulativeStats(AbstractReadBackedPileup<RBP,PE> pileup) {
|
||||
size += pileup.size();
|
||||
size += pileup.getNumberOfElements();
|
||||
abstractSize += pileup.depthOfCoverage();
|
||||
nDeletions += pileup.getNumberOfDeletions();
|
||||
nMQ0Reads += pileup.getNumberOfMappingQualityZeroReads();
|
||||
}
|
||||
|
|
@ -574,7 +583,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
*/
|
||||
@Override
|
||||
public RBP getDownsampledPileup(int desiredCoverage) {
|
||||
if ( size() <= desiredCoverage )
|
||||
if ( getNumberOfElements() <= desiredCoverage )
|
||||
return (RBP)this;
|
||||
|
||||
// randomly choose numbers corresponding to positions in the reads list
|
||||
|
|
@ -727,13 +736,23 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
}
|
||||
|
||||
/**
|
||||
* @return the number of elements in this pileup
|
||||
* @return the number of physical elements in this pileup
|
||||
*/
|
||||
@Override
|
||||
public int size() {
|
||||
public int getNumberOfElements() {
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the number of abstract elements in this pileup
|
||||
*/
|
||||
@Override
|
||||
public int depthOfCoverage() {
|
||||
if ( abstractSize == -1 )
|
||||
calculateAbstractSize();
|
||||
return abstractSize;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if there are 0 elements in the pileup, false otherwise
|
||||
*/
|
||||
|
|
@ -806,7 +825,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
*/
|
||||
@Override
|
||||
public List<SAMRecord> getReads() {
|
||||
List<SAMRecord> reads = new ArrayList<SAMRecord>(size());
|
||||
List<SAMRecord> reads = new ArrayList<SAMRecord>(getNumberOfElements());
|
||||
for ( PileupElement pile : this ) { reads.add(pile.getRead()); }
|
||||
return reads;
|
||||
}
|
||||
|
|
@ -817,7 +836,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
*/
|
||||
@Override
|
||||
public List<Integer> getOffsets() {
|
||||
List<Integer> offsets = new ArrayList<Integer>(size());
|
||||
List<Integer> offsets = new ArrayList<Integer>(getNumberOfElements());
|
||||
for ( PileupElement pile : this ) { offsets.add(pile.getOffset()); }
|
||||
return offsets;
|
||||
}
|
||||
|
|
@ -828,7 +847,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
*/
|
||||
@Override
|
||||
public byte[] getBases() {
|
||||
byte[] v = new byte[size()];
|
||||
byte[] v = new byte[getNumberOfElements()];
|
||||
int pos = 0;
|
||||
for ( PileupElement pile : pileupElementTracker ) { v[pos++] = pile.getBase(); }
|
||||
return v;
|
||||
|
|
@ -840,7 +859,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
*/
|
||||
@Override
|
||||
public byte[] getQuals() {
|
||||
byte[] v = new byte[size()];
|
||||
byte[] v = new byte[getNumberOfElements()];
|
||||
int pos = 0;
|
||||
for ( PileupElement pile : pileupElementTracker ) { v[pos++] = pile.getQual(); }
|
||||
return v;
|
||||
|
|
@ -852,7 +871,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
|
|||
*/
|
||||
@Override
|
||||
public byte[] getMappingQuals() {
|
||||
byte[] v = new byte[size()];
|
||||
byte[] v = new byte[getNumberOfElements()];
|
||||
int pos = 0;
|
||||
for ( PileupElement pile : pileupElementTracker ) { v[pos++] = (byte)pile.getRead().getMappingQuality(); }
|
||||
return v;
|
||||
|
|
|
|||
|
|
@ -100,13 +100,8 @@ public class PileupElement implements Comparable<PileupElement> {
|
|||
return ((GATKSAMRecord)read).isReducedRead();
|
||||
}
|
||||
|
||||
public int getReducedCount() {
|
||||
if ( ! isReducedRead() ) throw new IllegalArgumentException("Cannot get reduced count for non-reduced read " + getRead().getReadName());
|
||||
return ((GATKSAMRecord)read).getReducedCount(offset);
|
||||
public int getRepresentativeCount() {
|
||||
return isReducedRead() ? ((GATKSAMRecord)read).getReducedCount(offset) : 1;
|
||||
}
|
||||
|
||||
public byte getReducedQual() {
|
||||
if ( ! isReducedRead() ) throw new IllegalArgumentException("Cannot get reduced qual for non-reduced read " + getRead().getReadName());
|
||||
return getQual();
|
||||
}
|
||||
}
|
||||
|
|
@ -155,7 +155,7 @@ public interface ReadBackedExtendedEventPileup extends ReadBackedPileup {
|
|||
/**
|
||||
* @return the number of elements in this pileup
|
||||
*/
|
||||
public int size();
|
||||
public int getNumberOfElements();
|
||||
|
||||
/**
|
||||
* @return the location of this pileup
|
||||
|
|
|
|||
|
|
@ -133,7 +133,7 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<
|
|||
*/
|
||||
@Override
|
||||
public byte[] getEvents() {
|
||||
byte[] v = new byte[size()];
|
||||
byte[] v = new byte[getNumberOfElements()];
|
||||
int i = 0;
|
||||
for ( ExtendedEventPileupElement e : this.toExtendedIterable() ) {
|
||||
switch ( e.getType() ) {
|
||||
|
|
|
|||
|
|
@ -169,9 +169,14 @@ public interface ReadBackedPileup extends Iterable<PileupElement>, HasGenomeLoca
|
|||
public int getNumberOfMappingQualityZeroReads();
|
||||
|
||||
/**
|
||||
* @return the number of elements in this pileup
|
||||
* @return the number of physical elements in this pileup (a reduced read is counted just once)
|
||||
*/
|
||||
public int size();
|
||||
public int getNumberOfElements();
|
||||
|
||||
/**
|
||||
* @return the number of abstract elements in this pileup (reduced reads are expanded to count all reads that they represent)
|
||||
*/
|
||||
public int depthOfCoverage();
|
||||
|
||||
/**
|
||||
* @return true if there are 0 elements in the pileup, false otherwise
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -43,6 +43,7 @@ import java.util.Map;
|
|||
*
|
||||
*/
|
||||
public class GATKSAMRecord extends BAMRecord {
|
||||
public static final String REDUCED_READ_QUALITY_TAG = "RR";
|
||||
// the SAMRecord data we're caching
|
||||
private String mReadString = null;
|
||||
private GATKSAMReadGroupRecord mReadGroup = null;
|
||||
|
|
@ -151,7 +152,7 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
|
||||
public byte[] getReducedReadCounts() {
|
||||
if ( ! retrievedReduceReadCounts ) {
|
||||
reducedReadCounts = getByteArrayAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG);
|
||||
reducedReadCounts = getByteArrayAttribute(REDUCED_READ_QUALITY_TAG);
|
||||
retrievedReduceReadCounts = true;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -51,8 +51,6 @@ public class ReadUtils {
|
|||
//
|
||||
// ----------------------------------------------------------------------------------------------------
|
||||
|
||||
public static final String REDUCED_READ_QUALITY_TAG = "RQ";
|
||||
|
||||
// ----------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// General utilities
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
|
|||
|
||||
ReadBackedExtendedEventPileup pileup = context.getExtendedEventPileup().getBaseFilteredPileup(10);
|
||||
Assert.assertEquals(pileup.getLocation().getStart(), 5, "Extended event pileup at wrong location");
|
||||
Assert.assertEquals(pileup.size(), 3, "Pileup size is incorrect");
|
||||
Assert.assertEquals(pileup.getNumberOfElements(), 3, "Pileup size is incorrect");
|
||||
|
||||
foundExtendedEventPileup = true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ public class ReadUtilsUnitTest extends BaseTest {
|
|||
reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length());
|
||||
reducedRead.setReadBases(BASES.getBytes());
|
||||
reducedRead.setBaseQualityString(QUALS);
|
||||
reducedRead.setAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG, REDUCED_READ_COUNTS);
|
||||
reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_QUALITY_TAG, REDUCED_READ_COUNTS);
|
||||
}
|
||||
|
||||
private void testReadBasesAndQuals(SAMRecord read, int expectedStart, int expectedStop) {
|
||||
|
|
@ -65,7 +65,7 @@ public class ReadUtilsUnitTest extends BaseTest {
|
|||
Assert.assertFalse(readp.isReducedRead());
|
||||
|
||||
Assert.assertTrue(reducedreadp.isReducedRead());
|
||||
Assert.assertEquals(reducedreadp.getReducedCount(), REDUCED_READ_COUNTS[0]);
|
||||
Assert.assertEquals(reducedreadp.getReducedQual(), readp.getQual());
|
||||
Assert.assertEquals(reducedreadp.getRepresentativeCount(), REDUCED_READ_COUNTS[0]);
|
||||
Assert.assertEquals(reducedreadp.getQual(), readp.getQual());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.utils.interval;
|
||||
|
||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||
import net.sf.picard.util.IntervalUtil;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
|
||||
|
|
@ -131,6 +132,135 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
Assert.assertEquals(totalSize, sumOfSplitSizes, "Split intervals don't contain the exact number of bases in the origianl intervals");
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
//
|
||||
// splitLocusIntervals tests
|
||||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
||||
/** large scale tests for many intervals */
|
||||
private class SplitLocusIntervalsTest extends TestDataProvider {
|
||||
final List<GenomeLoc> originalIntervals;
|
||||
final public int parts;
|
||||
|
||||
private SplitLocusIntervalsTest(final String name, List<GenomeLoc> originalIntervals, final int parts) {
|
||||
super(SplitLocusIntervalsTest.class, name);
|
||||
this.parts = parts;
|
||||
this.originalIntervals = originalIntervals;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("%s parts=%d", super.toString(), parts);
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "IntervalRepartitionTest")
|
||||
public Object[][] createIntervalRepartitionTest() {
|
||||
for ( int parts : Arrays.asList(1, 2, 3, 10, 13, 100, 151, 1000, 10000) ) {
|
||||
//for ( int parts : Arrays.asList(10) ) {
|
||||
new SplitLocusIntervalsTest("hg19RefLocs", hg19ReferenceLocs, parts);
|
||||
new SplitLocusIntervalsTest("hg19ExomeLocs", hg19exomeIntervals, parts);
|
||||
}
|
||||
|
||||
return SplitLocusIntervalsTest.getTests(SplitLocusIntervalsTest.class);
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "IntervalRepartitionTest")
|
||||
public void testIntervalRepartition(SplitLocusIntervalsTest test) {
|
||||
List<List<GenomeLoc>> splitByLocus = IntervalUtils.splitLocusIntervals(test.originalIntervals, test.parts);
|
||||
Assert.assertEquals(splitByLocus.size(), test.parts, "SplitLocusIntervals failed to generate correct number of intervals");
|
||||
List<GenomeLoc> flat = IntervalUtils.flattenSplitIntervals(splitByLocus);
|
||||
|
||||
// test overall size
|
||||
final long originalSize = IntervalUtils.intervalSize(test.originalIntervals);
|
||||
final long flatSize = IntervalUtils.intervalSize(flat);
|
||||
Assert.assertEquals(flatSize, originalSize, "SplitLocusIntervals locs cover an incorrect number of bases");
|
||||
|
||||
// test size of each split
|
||||
final long ideal = (long)Math.floor(originalSize / (1.0 * test.parts));
|
||||
final long maxSize = ideal + (originalSize % test.parts) * test.parts; // no more than N * rounding error in size
|
||||
for ( final List<GenomeLoc> split : splitByLocus ) {
|
||||
final long splitSize = IntervalUtils.intervalSize(split);
|
||||
Assert.assertTrue(splitSize >= ideal && splitSize <= maxSize,
|
||||
String.format("SplitLocusIntervals interval (start=%s) has size %d outside of bounds ideal=%d, max=%d",
|
||||
split.get(0), splitSize, ideal, maxSize));
|
||||
}
|
||||
|
||||
// test that every base in original is covered once by a base in split by locus intervals
|
||||
String diff = IntervalUtils.equateIntervals(test.originalIntervals, flat);
|
||||
Assert.assertNull(diff, diff);
|
||||
}
|
||||
|
||||
/** small scale tests where the expected cuts are enumerated upfront for testing */
|
||||
private class SplitLocusIntervalsSmallTest extends TestDataProvider {
|
||||
final List<GenomeLoc> original;
|
||||
final public int parts;
|
||||
final public int expectedParts;
|
||||
final List<GenomeLoc> expected;
|
||||
|
||||
private SplitLocusIntervalsSmallTest(final String name, List<GenomeLoc> originalIntervals, final int parts, List<GenomeLoc> expected) {
|
||||
this(name, originalIntervals, parts, expected, parts);
|
||||
}
|
||||
|
||||
private SplitLocusIntervalsSmallTest(final String name, List<GenomeLoc> originalIntervals, final int parts, List<GenomeLoc> expected, int expectedParts) {
|
||||
super(SplitLocusIntervalsSmallTest.class, name);
|
||||
this.parts = parts;
|
||||
this.expectedParts = expectedParts;
|
||||
this.original = originalIntervals;
|
||||
this.expected = expected;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("%s parts=%d", super.toString(), parts);
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "SplitLocusIntervalsSmallTest")
|
||||
public Object[][] createSplitLocusIntervalsSmallTest() {
|
||||
GenomeLoc bp01_10 = hg19GenomeLocParser.createGenomeLoc("1", 1, 10);
|
||||
|
||||
GenomeLoc bp1_5 = hg19GenomeLocParser.createGenomeLoc("1", 1, 5);
|
||||
GenomeLoc bp6_10 = hg19GenomeLocParser.createGenomeLoc("1", 6, 10);
|
||||
new SplitLocusIntervalsSmallTest("cut into two", Arrays.asList(bp01_10), 2, Arrays.asList(bp1_5, bp6_10));
|
||||
|
||||
GenomeLoc bp20_30 = hg19GenomeLocParser.createGenomeLoc("1", 20, 30);
|
||||
new SplitLocusIntervalsSmallTest("two in two", Arrays.asList(bp01_10, bp20_30), 2, Arrays.asList(bp01_10, bp20_30));
|
||||
|
||||
GenomeLoc bp1_7 = hg19GenomeLocParser.createGenomeLoc("1", 1, 7);
|
||||
GenomeLoc bp8_10 = hg19GenomeLocParser.createGenomeLoc("1", 8, 10);
|
||||
GenomeLoc bp20_23 = hg19GenomeLocParser.createGenomeLoc("1", 20, 23);
|
||||
GenomeLoc bp24_30 = hg19GenomeLocParser.createGenomeLoc("1", 24, 30);
|
||||
new SplitLocusIntervalsSmallTest("two in three", Arrays.asList(bp01_10, bp20_30), 3,
|
||||
Arrays.asList(bp1_7, bp8_10, bp20_23, bp24_30));
|
||||
|
||||
GenomeLoc bp1_2 = hg19GenomeLocParser.createGenomeLoc("1", 1, 2);
|
||||
GenomeLoc bp1_1 = hg19GenomeLocParser.createGenomeLoc("1", 1, 1);
|
||||
GenomeLoc bp2_2 = hg19GenomeLocParser.createGenomeLoc("1", 2, 2);
|
||||
new SplitLocusIntervalsSmallTest("too many pieces", Arrays.asList(bp1_2), 5, Arrays.asList(bp1_1, bp2_2), 2);
|
||||
|
||||
new SplitLocusIntervalsSmallTest("emptyList", Collections.<GenomeLoc>emptyList(), 5, Collections.<GenomeLoc>emptyList(), 0);
|
||||
|
||||
return SplitLocusIntervalsSmallTest.getTests(SplitLocusIntervalsSmallTest.class);
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "SplitLocusIntervalsSmallTest")
|
||||
public void splitLocusIntervalsSmallTest(SplitLocusIntervalsSmallTest test) {
|
||||
List<List<GenomeLoc>> splitByLocus = IntervalUtils.splitLocusIntervals(test.original, test.parts);
|
||||
Assert.assertEquals(splitByLocus.size(), test.expectedParts, "SplitLocusIntervals failed to generate correct number of intervals");
|
||||
List<GenomeLoc> flat = IntervalUtils.flattenSplitIntervals(splitByLocus);
|
||||
|
||||
// test sizes
|
||||
final long originalSize = IntervalUtils.intervalSize(test.original);
|
||||
final long splitSize = IntervalUtils.intervalSize(flat);
|
||||
Assert.assertEquals(splitSize, originalSize, "SplitLocusIntervals locs cover an incorrect number of bases");
|
||||
|
||||
Assert.assertEquals(flat, test.expected, "SplitLocusIntervals locs not expected intervals");
|
||||
}
|
||||
|
||||
//
|
||||
// Misc. tests
|
||||
//
|
||||
|
||||
@Test(expectedExceptions=UserException.class)
|
||||
public void testMergeListsBySetOperatorNoOverlap() {
|
||||
// a couple of lists we'll use for the testing
|
||||
|
|
|
|||
|
|
@ -102,7 +102,7 @@ public class ReadBackedPileupUnitTest {
|
|||
|
||||
ReadBackedPileup nullRgPileup = pileup.getPileupForReadGroup(null);
|
||||
List<SAMRecord> nullRgReads = nullRgPileup.getReads();
|
||||
Assert.assertEquals(nullRgPileup.size(), 3, "Wrong number of reads in null read group");
|
||||
Assert.assertEquals(nullRgPileup.getNumberOfElements(), 3, "Wrong number of reads in null read group");
|
||||
Assert.assertEquals(nullRgReads.get(0), read1, "Read " + read1.getReadName() + " should be in null rg but isn't");
|
||||
Assert.assertEquals(nullRgReads.get(1), read2, "Read " + read2.getReadName() + " should be in null rg but isn't");
|
||||
Assert.assertEquals(nullRgReads.get(2), read3, "Read " + read3.getReadName() + " should be in null rg but isn't");
|
||||
|
|
@ -187,7 +187,7 @@ public class ReadBackedPileupUnitTest {
|
|||
ReadBackedPileup pileup = new ReadBackedPileupImpl(null,sampleToPileupMap);
|
||||
|
||||
ReadBackedPileup sample2Pileup = pileup.getPileupForSample(sample2);
|
||||
Assert.assertEquals(sample2Pileup.size(),1,"Sample 2 pileup has wrong number of elements");
|
||||
Assert.assertEquals(sample2Pileup.getNumberOfElements(),1,"Sample 2 pileup has wrong number of elements");
|
||||
Assert.assertEquals(sample2Pileup.getReads().get(0),read2,"Sample 2 pileup has incorrect read");
|
||||
|
||||
ReadBackedPileup missingSamplePileup = pileup.getPileupForSample("missing");
|
||||
|
|
|
|||
|
|
@ -35,6 +35,8 @@ class ContigScatterFunction extends GATKScatterFunction with InProcessFunction {
|
|||
// Include unmapped reads by default.
|
||||
this.includeUnmapped = true
|
||||
|
||||
override def scatterCount = if (intervalFilesExist) super.scatterCount min this.maxIntervals else super.scatterCount
|
||||
|
||||
protected override def maxIntervals = {
|
||||
GATKScatterFunction.getGATKIntervals(this.referenceSequence, this.intervals).contigs.size
|
||||
}
|
||||
|
|
@ -44,3 +46,4 @@ class ContigScatterFunction extends GATKScatterFunction with InProcessFunction {
|
|||
IntervalUtils.scatterContigIntervals(gi.samFileHeader, gi.locs, this.scatterOutputFiles)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -53,9 +53,6 @@ trait GATKScatterFunction extends ScatterFunction {
|
|||
/** Whether the last scatter job should also include any unmapped reads. */
|
||||
protected var includeUnmapped: Boolean = _
|
||||
|
||||
/** The total number of clone jobs that will be created. */
|
||||
override def scatterCount = if (intervalFilesExist) super.scatterCount min this.maxIntervals else super.scatterCount
|
||||
|
||||
override def init() {
|
||||
this.originalGATK = this.originalFunction.asInstanceOf[CommandLineGATK]
|
||||
this.referenceSequence = this.originalGATK.reference_sequence
|
||||
|
|
|
|||
|
|
@ -35,6 +35,8 @@ class IntervalScatterFunction extends GATKScatterFunction with InProcessFunction
|
|||
protected override def maxIntervals =
|
||||
GATKScatterFunction.getGATKIntervals(this.referenceSequence, this.intervals).locs.size
|
||||
|
||||
override def scatterCount = if (intervalFilesExist) super.scatterCount min this.maxIntervals else super.scatterCount
|
||||
|
||||
def run() {
|
||||
val gi = GATKScatterFunction.getGATKIntervals(this.referenceSequence, this.intervals)
|
||||
val splits = IntervalUtils.splitFixedIntervals(gi.locs, this.scatterOutputFiles.size)
|
||||
|
|
|
|||
|
|
@ -24,8 +24,21 @@
|
|||
|
||||
package org.broadinstitute.sting.queue.extensions.gatk
|
||||
|
||||
import collection.JavaConversions._
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils
|
||||
import org.broadinstitute.sting.queue.function.InProcessFunction
|
||||
|
||||
/**
|
||||
* For now returns an IntervalScatterFunction.
|
||||
* TODO: A scatter function that divides down to the locus level.
|
||||
* A scatter function that divides down to the locus level.
|
||||
*/
|
||||
class LocusScatterFunction extends IntervalScatterFunction {}
|
||||
//class LocusScatterFunction extends IntervalScatterFunction { }
|
||||
|
||||
class LocusScatterFunction extends GATKScatterFunction with InProcessFunction {
|
||||
protected override def maxIntervals = scatterCount
|
||||
|
||||
def run() {
|
||||
val gi = GATKScatterFunction.getGATKIntervals(this.referenceSequence, this.intervals)
|
||||
val splits = IntervalUtils.splitLocusIntervals(gi.locs, this.scatterOutputFiles.size)
|
||||
IntervalUtils.scatterFixedIntervals(gi.samFileHeader, splits, this.scatterOutputFiles)
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.extensions.gatk
|
||||
|
||||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Currently ReadScatterFunction only does ContigScattering, but it
|
||||
* could in principle do something more aggressive.
|
||||
*/
|
||||
class ReadScatterFunction extends ContigScatterFunction { }
|
||||
|
||||
Loading…
Reference in New Issue