Support for list of known CNVs in VariantEval
-- VariantSummary now includes novelty of CNVs by reciprocal overlap detection using the standard variant eval -knownCNVs argument -- Genericizes loading for intervals into interval tree by chromosome -- GenomeLoc methods for reciprocal overlap detection, with unit tests
This commit is contained in:
parent
28b286ad39
commit
3060a4a15e
|
|
@ -1,10 +1,12 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.picard.util.IntervalTree;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
|
|
@ -30,6 +32,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
|||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||
|
|
@ -189,6 +192,13 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
@Input(fullName="stratIntervals", shortName="stratIntervals", doc="File containing tribble-readable features for the IntervalStratificiation", required=false)
|
||||
public IntervalBinding<Feature> intervalsFile = null;
|
||||
|
||||
/**
|
||||
* File containing tribble-readable features containing known CNVs. For use with VariantSummary table.
|
||||
*/
|
||||
@Input(fullName="knownCNVs", shortName="knownCNVs", doc="File containing tribble-readable features describing a known list of copy number variants", required=false)
|
||||
public IntervalBinding<Feature> knownCNVsFile = null;
|
||||
Map<String, IntervalTree<GenomeLoc>> knownCNVsByContig = Collections.emptyMap();
|
||||
|
||||
// Variables
|
||||
private Set<SortableJexlVCMatchExp> jexlExpressions = new TreeSet<SortableJexlVCMatchExp>();
|
||||
|
||||
|
|
@ -295,6 +305,28 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
throw new ReviewedStingException(String.format("The ancestral alignments file, '%s', could not be found", ancestralAlignmentsFile.getAbsolutePath()));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// initialize CNVs
|
||||
if ( knownCNVsFile != null ) {
|
||||
knownCNVsByContig = createIntervalTreeByContig(knownCNVsFile);
|
||||
}
|
||||
}
|
||||
|
||||
public final Map<String, IntervalTree<GenomeLoc>> createIntervalTreeByContig(final IntervalBinding<Feature> intervals) {
|
||||
final Map<String, IntervalTree<GenomeLoc>> byContig = new HashMap<String, IntervalTree<GenomeLoc>>();
|
||||
|
||||
final List<GenomeLoc> locs = intervals.getIntervals(getToolkit());
|
||||
|
||||
// set up the map from contig -> interval tree
|
||||
for ( final String contig : getContigNames() )
|
||||
byContig.put(contig, new IntervalTree<GenomeLoc>());
|
||||
|
||||
for ( final GenomeLoc loc : locs ) {
|
||||
byContig.get(loc.getContig()).put(loc.getStart(), loc.getStop(), loc);
|
||||
}
|
||||
|
||||
return byContig;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -549,14 +581,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
|
||||
public Set<SortableJexlVCMatchExp> getJexlExpressions() { return jexlExpressions; }
|
||||
|
||||
public List<GenomeLoc> getIntervals() {
|
||||
if ( intervalsFile == null )
|
||||
throw new UserException.MissingArgument("stratIntervals", "Must be provided when IntervalStratification is enabled");
|
||||
|
||||
return intervalsFile.getIntervals(getToolkit());
|
||||
}
|
||||
|
||||
|
||||
public Set<String> getContigNames() {
|
||||
final TreeSet<String> contigs = new TreeSet<String>();
|
||||
for( final SAMSequenceRecord r : getToolkit().getReferenceDataSource().getReference().getSequenceDictionary().getSequences()) {
|
||||
|
|
@ -568,4 +592,8 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
public GenomeLocParser getGenomeLocParser() {
|
||||
return getToolkit().getGenomeLocParser();
|
||||
}
|
||||
|
||||
public GenomeAnalysisEngine getToolkit() {
|
||||
return super.getToolkit();
|
||||
}
|
||||
}
|
||||
|
|
@ -24,25 +24,38 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
||||
|
||||
import net.sf.picard.util.IntervalTree;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.EnumMap;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
|
||||
@Analysis(description = "1000 Genomes Phase I summary of variants table")
|
||||
public class VariantSummary extends VariantEvaluator implements StandardEval {
|
||||
final protected static Logger logger = Logger.getLogger(VariantSummary.class);
|
||||
|
||||
private final static int MAX_INDEL_LENGTH = 50;
|
||||
private final static double MIN_CNV_OVERLAP = 0.5;
|
||||
private VariantEvalWalker walker;
|
||||
|
||||
public enum Type {
|
||||
SNP, INDEL, CNV
|
||||
}
|
||||
|
||||
Map<String, IntervalTree<GenomeLoc>> knownCNVs = null;
|
||||
|
||||
// basic counts on various rates found
|
||||
@DataPoint(description = "Number of samples")
|
||||
public long nSamples = 0;
|
||||
|
|
@ -86,10 +99,10 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
|||
|
||||
private final static String ALL = "ALL";
|
||||
|
||||
private class TypeSampleMap extends EnumMap<VariantContext.Type, Map<String, Integer>> {
|
||||
private class TypeSampleMap extends EnumMap<Type, Map<String, Integer>> {
|
||||
public TypeSampleMap(final Collection<String> samples) {
|
||||
super(VariantContext.Type.class);
|
||||
for ( VariantContext.Type type : VariantContext.Type.values() ) {
|
||||
super(Type.class);
|
||||
for ( Type type : Type.values() ) {
|
||||
Map<String, Integer> bySample = new HashMap<String, Integer>(samples.size());
|
||||
for ( final String sample : samples ) {
|
||||
bySample.put(sample, 0);
|
||||
|
|
@ -99,16 +112,16 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
|||
}
|
||||
}
|
||||
|
||||
public final void inc(final VariantContext.Type type, final String sample) {
|
||||
public final void inc(final Type type, final String sample) {
|
||||
final int count = this.get(type).get(sample);
|
||||
get(type).put(sample, count + 1);
|
||||
}
|
||||
|
||||
public final int all(VariantContext.Type type) {
|
||||
public final int all(Type type) {
|
||||
return get(type).get(ALL);
|
||||
}
|
||||
|
||||
public final int meanValue(VariantContext.Type type) {
|
||||
public final int meanValue(Type type) {
|
||||
long sum = 0;
|
||||
int n = 0;
|
||||
for ( final Map.Entry<String, Integer> pair : get(type).entrySet() ) {
|
||||
|
|
@ -120,7 +133,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
|||
return (int)(Math.round(sum / (1.0 * n)));
|
||||
}
|
||||
|
||||
public final double ratioValue(VariantContext.Type type, TypeSampleMap denoms, boolean allP) {
|
||||
public final double ratioValue(Type type, TypeSampleMap denoms, boolean allP) {
|
||||
double sum = 0;
|
||||
int n = 0;
|
||||
for ( final String sample : get(type).keySet() ) {
|
||||
|
|
@ -137,6 +150,8 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
|||
|
||||
|
||||
public void initialize(VariantEvalWalker walker) {
|
||||
this.walker = walker;
|
||||
|
||||
nSamples = walker.getSampleNamesForEvaluation().size();
|
||||
countsPerSample = new TypeSampleMap(walker.getSampleNamesForEvaluation());
|
||||
transitionsPerSample = new TypeSampleMap(walker.getSampleNamesForEvaluation());
|
||||
|
|
@ -144,6 +159,13 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
|||
allVariantCounts = new TypeSampleMap(walker.getSampleNamesForEvaluation());
|
||||
knownVariantCounts = new TypeSampleMap(walker.getSampleNamesForEvaluation());
|
||||
depthPerSample = new TypeSampleMap(walker.getSampleNamesForEvaluation());
|
||||
|
||||
if ( walker.knownCNVsFile != null ) {
|
||||
knownCNVs = walker.createIntervalTreeByContig(walker.knownCNVsFile);
|
||||
final List<GenomeLoc> locs = walker.knownCNVsFile.getIntervals(walker.getToolkit());
|
||||
logger.info(String.format("Creating known CNV list %s containing %d intervals covering %d bp",
|
||||
walker.knownCNVsFile.getSource(), locs.size(), IntervalUtils.intervalSize(locs)));
|
||||
}
|
||||
}
|
||||
|
||||
@Override public boolean enabled() { return true; }
|
||||
|
|
@ -156,44 +178,77 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
|||
nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
|
||||
}
|
||||
|
||||
private final Type getType(VariantContext vc) {
|
||||
switch (vc.getType()) {
|
||||
case SNP:
|
||||
return Type.SNP;
|
||||
case INDEL:
|
||||
for ( int l : vc.getIndelLengths() )
|
||||
if ( l > MAX_INDEL_LENGTH )
|
||||
return Type.CNV;
|
||||
return Type.INDEL;
|
||||
case SYMBOLIC:
|
||||
return Type.CNV;
|
||||
default:
|
||||
throw new UserException.BadInput("Unexpected variant context type: " + vc);
|
||||
}
|
||||
}
|
||||
|
||||
private final boolean overlapsKnownCNV(VariantContext cnv) {
|
||||
final GenomeLoc loc = walker.getGenomeLocParser().createGenomeLoc(cnv, true);
|
||||
IntervalTree<GenomeLoc> intervalTree = knownCNVs.get(loc.getContig());
|
||||
|
||||
final Iterator<IntervalTree.Node<GenomeLoc>> nodeIt = intervalTree.overlappers(loc.getStart(), loc.getStop());
|
||||
while ( nodeIt.hasNext() ) {
|
||||
final double overlapP = loc.reciprocialOverlapFraction(nodeIt.next().getValue());
|
||||
if ( overlapP > MIN_CNV_OVERLAP )
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( eval == null || eval.isMonomorphicInSamples() ) return null;
|
||||
|
||||
final Type type = getType(eval);
|
||||
|
||||
TypeSampleMap titvTable = null;
|
||||
|
||||
switch (eval.getType()) {
|
||||
case SNP:
|
||||
titvTable = VariantContextUtils.isTransition(eval) ? transitionsPerSample : transversionsPerSample;
|
||||
titvTable.inc(eval.getType(), ALL);
|
||||
case INDEL:
|
||||
case SYMBOLIC:
|
||||
allVariantCounts.inc(eval.getType(), ALL);
|
||||
if ( comp != null )
|
||||
knownVariantCounts.inc(eval.getType(), ALL);
|
||||
if ( eval.hasAttribute(VCFConstants.DEPTH_KEY) )
|
||||
depthPerSample.inc(eval.getType(), ALL);
|
||||
break;
|
||||
default:
|
||||
throw new UserException.BadInput("Unexpected variant context type: " + eval);
|
||||
// update DP, if possible
|
||||
if ( eval.hasAttribute(VCFConstants.DEPTH_KEY) )
|
||||
depthPerSample.inc(type, ALL);
|
||||
|
||||
// update counts
|
||||
allVariantCounts.inc(type, ALL);
|
||||
|
||||
// type specific calculations
|
||||
if ( type == Type.SNP ) {
|
||||
titvTable = VariantContextUtils.isTransition(eval) ? transitionsPerSample : transversionsPerSample;
|
||||
titvTable.inc(type, ALL);
|
||||
}
|
||||
|
||||
// novelty calculation
|
||||
if ( comp != null || (type == Type.CNV && overlapsKnownCNV(eval)))
|
||||
knownVariantCounts.inc(type, ALL);
|
||||
|
||||
// per sample metrics
|
||||
for (final Genotype g : eval.getGenotypes()) {
|
||||
if ( ! g.isNoCall() && ! g.isHomRef() ) {
|
||||
countsPerSample.inc(eval.getType(), g.getSampleName());
|
||||
countsPerSample.inc(type, g.getSampleName());
|
||||
|
||||
// update transition / transversion ratio
|
||||
if ( titvTable != null ) titvTable.inc(eval.getType(), g.getSampleName());
|
||||
if ( titvTable != null ) titvTable.inc(type, g.getSampleName());
|
||||
|
||||
if ( g.hasAttribute(VCFConstants.DEPTH_KEY) )
|
||||
depthPerSample.inc(eval.getType(), g.getSampleName());
|
||||
depthPerSample.inc(type, g.getSampleName());
|
||||
}
|
||||
}
|
||||
|
||||
return null; // we don't capture any interesting sites
|
||||
}
|
||||
|
||||
private final String noveltyRate(VariantContext.Type type) {
|
||||
private final String noveltyRate(Type type) {
|
||||
final int all = allVariantCounts.all(type);
|
||||
final int known = knownVariantCounts.all(type);
|
||||
final int novel = all - known;
|
||||
|
|
@ -202,22 +257,22 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
|||
}
|
||||
|
||||
public void finalizeEvaluation() {
|
||||
nSNPs = allVariantCounts.all(VariantContext.Type.SNP);
|
||||
nIndels = allVariantCounts.all(VariantContext.Type.INDEL);
|
||||
nSVs = allVariantCounts.all(VariantContext.Type.SYMBOLIC);
|
||||
nSNPs = allVariantCounts.all(Type.SNP);
|
||||
nIndels = allVariantCounts.all(Type.INDEL);
|
||||
nSVs = allVariantCounts.all(Type.CNV);
|
||||
|
||||
TiTvRatio = transitionsPerSample.ratioValue(VariantContext.Type.SNP, transversionsPerSample, true);
|
||||
TiTvRatioPerSample = transitionsPerSample.ratioValue(VariantContext.Type.SNP, transversionsPerSample, false);
|
||||
TiTvRatio = transitionsPerSample.ratioValue(Type.SNP, transversionsPerSample, true);
|
||||
TiTvRatioPerSample = transitionsPerSample.ratioValue(Type.SNP, transversionsPerSample, false);
|
||||
|
||||
nSNPsPerSample = countsPerSample.meanValue(VariantContext.Type.SNP);
|
||||
nIndelsPerSample = countsPerSample.meanValue(VariantContext.Type.INDEL);
|
||||
nSVsPerSample = countsPerSample.meanValue(VariantContext.Type.SYMBOLIC);
|
||||
nSNPsPerSample = countsPerSample.meanValue(Type.SNP);
|
||||
nIndelsPerSample = countsPerSample.meanValue(Type.INDEL);
|
||||
nSVsPerSample = countsPerSample.meanValue(Type.CNV);
|
||||
|
||||
SNPNoveltyRate = noveltyRate(VariantContext.Type.SNP);
|
||||
IndelNoveltyRate = noveltyRate(VariantContext.Type.INDEL);
|
||||
SVNoveltyRate = noveltyRate(VariantContext.Type.SYMBOLIC);
|
||||
SNPNoveltyRate = noveltyRate(Type.SNP);
|
||||
IndelNoveltyRate = noveltyRate(Type.INDEL);
|
||||
SVNoveltyRate = noveltyRate(Type.CNV);
|
||||
|
||||
SNPDPPerSample = depthPerSample.meanValue(VariantContext.Type.SNP);
|
||||
IndelDPPerSample = depthPerSample.meanValue(VariantContext.Type.INDEL);
|
||||
SNPDPPerSample = depthPerSample.meanValue(Type.SNP);
|
||||
IndelDPPerSample = depthPerSample.meanValue(Type.INDEL);
|
||||
}
|
||||
}
|
||||
|
|
@ -54,26 +54,23 @@ import java.util.*;
|
|||
*/
|
||||
public class IntervalStratification extends VariantStratifier {
|
||||
final protected static Logger logger = Logger.getLogger(IntervalStratification.class);
|
||||
final Map<String, IntervalTree<Boolean>> intervalTreeByContig = new HashMap<String, IntervalTree<Boolean>>();
|
||||
Map<String, IntervalTree<GenomeLoc>> intervalTreeByContig = null;
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
final List<GenomeLoc> locs = getVariantEvalWalker().getIntervals();
|
||||
if ( getVariantEvalWalker().intervalsFile == null )
|
||||
throw new UserException.MissingArgument("stratIntervals", "Must be provided when IntervalStratification is enabled");
|
||||
|
||||
final List<GenomeLoc> locs = getVariantEvalWalker().intervalsFile.getIntervals(getVariantEvalWalker().getToolkit());
|
||||
|
||||
if ( locs.isEmpty() )
|
||||
throw new UserException.BadArgumentValue("stratIntervals", "Contains no intervals. Perhaps the file is malformed or empty?");
|
||||
|
||||
intervalTreeByContig = getVariantEvalWalker().createIntervalTreeByContig(getVariantEvalWalker().intervalsFile);
|
||||
|
||||
logger.info(String.format("Creating IntervalStratification %s containing %d intervals covering %d bp",
|
||||
getVariantEvalWalker().intervalsFile.getSource(), locs.size(), IntervalUtils.intervalSize(locs)));
|
||||
|
||||
// set up the map from contig -> interval tree
|
||||
for ( final String contig : getVariantEvalWalker().getContigNames() )
|
||||
intervalTreeByContig.put(contig, new IntervalTree<Boolean>());
|
||||
|
||||
for ( final GenomeLoc loc : locs ) {
|
||||
intervalTreeByContig.get(loc.getContig()).put(loc.getStart(), loc.getStop(), true);
|
||||
}
|
||||
|
||||
states = new ArrayList<String>(Arrays.asList("all", "overlaps.intervals", "outside.intervals"));
|
||||
}
|
||||
|
||||
|
|
@ -82,8 +79,8 @@ public class IntervalStratification extends VariantStratifier {
|
|||
|
||||
if (eval != null) {
|
||||
final GenomeLoc loc = getVariantEvalWalker().getGenomeLocParser().createGenomeLoc(eval, true);
|
||||
IntervalTree<Boolean> intervalTree = intervalTreeByContig.get(loc.getContig());
|
||||
IntervalTree.Node<Boolean> node = intervalTree.minOverlapper(loc.getStart(), loc.getStop());
|
||||
IntervalTree<GenomeLoc> intervalTree = intervalTreeByContig.get(loc.getContig());
|
||||
IntervalTree.Node<GenomeLoc> node = intervalTree.minOverlapper(loc.getStart(), loc.getStop());
|
||||
//logger.info(String.format("Overlap %s found %s", loc, node));
|
||||
relevantStates.add( node != null ? "overlaps.intervals" : "outside.intervals");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -440,4 +440,29 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenome
|
|||
return stop - start + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* reciprocialOverlap: what is the min. percent of gl1 and gl2 covered by both
|
||||
*
|
||||
* gl1.s ---------- gk1.e
|
||||
* gl2.s ---------- gl2.e
|
||||
* 100%
|
||||
*
|
||||
* gl1.s ---------- gk1.e
|
||||
* gl2.s ---------- gl2.e
|
||||
* 50%
|
||||
*
|
||||
* gl1.s ---------- gk1.e
|
||||
* gl2.s -------------------- gl2.e
|
||||
* 25% (50% for gl1 but only 25% for gl2)
|
||||
*/
|
||||
public final double reciprocialOverlapFraction(final GenomeLoc o) {
|
||||
if ( overlapsP(o) )
|
||||
return Math.min(overlapPercent(this, o), overlapPercent(o, this));
|
||||
else
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
private final static double overlapPercent(final GenomeLoc gl1, final GenomeLoc gl2) {
|
||||
return (1.0 * gl1.intersect(gl2).size()) / gl1.size();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -134,7 +134,7 @@ public abstract class BaseTest {
|
|||
*/
|
||||
public static class TestDataProvider {
|
||||
private static final Map<Class, List<Object>> tests = new HashMap<Class, List<Object>>();
|
||||
private final String name;
|
||||
private String name;
|
||||
|
||||
/**
|
||||
* Create a new TestDataProvider instance bound to the class variable C
|
||||
|
|
@ -151,6 +151,10 @@ public abstract class BaseTest {
|
|||
this(c, "");
|
||||
}
|
||||
|
||||
public void setName(final String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return all of the data providers in the form expected by TestNG of type class C
|
||||
* @param c
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
|||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
|
|
@ -150,4 +151,64 @@ public class GenomeLocUnitTest extends BaseTest {
|
|||
Assert.assertEquals(twoUnmappedMixed.size(),2,"Wrong number of elements in list.");
|
||||
Assert.assertEquals(twoUnmappedMixed,Arrays.asList(chr1,unmapped),"List sorted in wrong order");
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
//
|
||||
// testing overlap detection
|
||||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
||||
private class ReciprocalOverlapProvider extends TestDataProvider {
|
||||
GenomeLoc gl1, gl2;
|
||||
int overlapSize;
|
||||
double overlapFraction;
|
||||
|
||||
private ReciprocalOverlapProvider(int start1, int stop1, int start2, int stop2) {
|
||||
super(ReciprocalOverlapProvider.class);
|
||||
gl1 = genomeLocParser.createGenomeLoc("chr1", start1, stop1);
|
||||
gl2 = genomeLocParser.createGenomeLoc("chr1", start2, stop2);
|
||||
|
||||
int shared = 0;
|
||||
for ( int i = start1; i <= stop1; i++ ) {
|
||||
if ( i >= start2 && i <= stop2 )
|
||||
shared++;
|
||||
}
|
||||
|
||||
this.overlapSize = shared;
|
||||
this.overlapFraction = Math.min((1.0*shared)/gl1.size(), (1.0*shared)/gl2.size());
|
||||
super.setName(String.format("%d-%d / %d-%d overlap=%d / %.2f", start1, stop1, start2, stop2, overlapSize, overlapFraction));
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "ReciprocalOverlapProvider")
|
||||
public Object[][] makeReciprocalOverlapProvider() {
|
||||
for ( int start1 = 1; start1 <= 10; start1++ ) {
|
||||
for ( int stop1 = start1; stop1 <= 10; stop1++ ) {
|
||||
new ReciprocalOverlapProvider(start1, stop1, 1, 10);
|
||||
new ReciprocalOverlapProvider(start1, stop1, 5, 10);
|
||||
new ReciprocalOverlapProvider(start1, stop1, 5, 7);
|
||||
new ReciprocalOverlapProvider(start1, stop1, 5, 15);
|
||||
new ReciprocalOverlapProvider(start1, stop1, 11, 20);
|
||||
|
||||
new ReciprocalOverlapProvider(1, 10, start1, stop1);
|
||||
new ReciprocalOverlapProvider(5, 10, start1, stop1);
|
||||
new ReciprocalOverlapProvider(5, 7, start1, stop1);
|
||||
new ReciprocalOverlapProvider(5, 15, start1, stop1);
|
||||
new ReciprocalOverlapProvider(11, 20, start1, stop1);
|
||||
}
|
||||
}
|
||||
|
||||
return ReciprocalOverlapProvider.getTests(ReciprocalOverlapProvider.class);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "ReciprocalOverlapProvider")
|
||||
public void testReciprocalOverlapProvider(ReciprocalOverlapProvider cfg) {
|
||||
if ( cfg.overlapSize == 0 ) {
|
||||
Assert.assertFalse(cfg.gl1.overlapsP(cfg.gl2));
|
||||
} else {
|
||||
Assert.assertTrue(cfg.gl1.overlapsP(cfg.gl2));
|
||||
Assert.assertEquals(cfg.gl1.intersect(cfg.gl2).size(), cfg.overlapSize);
|
||||
Assert.assertEquals(cfg.gl1.reciprocialOverlapFraction(cfg.gl2), cfg.overlapFraction);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue