Support for list of known CNVs in VariantEval
-- VariantSummary now includes novelty of CNVs by reciprocal overlap detection using the standard variant eval -knownCNVs argument -- Genericizes loading for intervals into interval tree by chromosome -- GenomeLoc methods for reciprocal overlap detection, with unit tests
This commit is contained in:
parent
28b286ad39
commit
3060a4a15e
|
|
@ -1,10 +1,12 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.varianteval;
|
package org.broadinstitute.sting.gatk.walkers.varianteval;
|
||||||
|
|
||||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
import net.sf.picard.util.IntervalTree;
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broad.tribble.Feature;
|
import org.broad.tribble.Feature;
|
||||||
import org.broadinstitute.sting.commandline.*;
|
import org.broadinstitute.sting.commandline.*;
|
||||||
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
|
import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
|
@ -30,6 +32,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||||
|
|
@ -189,6 +192,13 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
||||||
@Input(fullName="stratIntervals", shortName="stratIntervals", doc="File containing tribble-readable features for the IntervalStratificiation", required=false)
|
@Input(fullName="stratIntervals", shortName="stratIntervals", doc="File containing tribble-readable features for the IntervalStratificiation", required=false)
|
||||||
public IntervalBinding<Feature> intervalsFile = null;
|
public IntervalBinding<Feature> intervalsFile = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* File containing tribble-readable features containing known CNVs. For use with VariantSummary table.
|
||||||
|
*/
|
||||||
|
@Input(fullName="knownCNVs", shortName="knownCNVs", doc="File containing tribble-readable features describing a known list of copy number variants", required=false)
|
||||||
|
public IntervalBinding<Feature> knownCNVsFile = null;
|
||||||
|
Map<String, IntervalTree<GenomeLoc>> knownCNVsByContig = Collections.emptyMap();
|
||||||
|
|
||||||
// Variables
|
// Variables
|
||||||
private Set<SortableJexlVCMatchExp> jexlExpressions = new TreeSet<SortableJexlVCMatchExp>();
|
private Set<SortableJexlVCMatchExp> jexlExpressions = new TreeSet<SortableJexlVCMatchExp>();
|
||||||
|
|
||||||
|
|
@ -295,6 +305,28 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
||||||
throw new ReviewedStingException(String.format("The ancestral alignments file, '%s', could not be found", ancestralAlignmentsFile.getAbsolutePath()));
|
throw new ReviewedStingException(String.format("The ancestral alignments file, '%s', could not be found", ancestralAlignmentsFile.getAbsolutePath()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// initialize CNVs
|
||||||
|
if ( knownCNVsFile != null ) {
|
||||||
|
knownCNVsByContig = createIntervalTreeByContig(knownCNVsFile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public final Map<String, IntervalTree<GenomeLoc>> createIntervalTreeByContig(final IntervalBinding<Feature> intervals) {
|
||||||
|
final Map<String, IntervalTree<GenomeLoc>> byContig = new HashMap<String, IntervalTree<GenomeLoc>>();
|
||||||
|
|
||||||
|
final List<GenomeLoc> locs = intervals.getIntervals(getToolkit());
|
||||||
|
|
||||||
|
// set up the map from contig -> interval tree
|
||||||
|
for ( final String contig : getContigNames() )
|
||||||
|
byContig.put(contig, new IntervalTree<GenomeLoc>());
|
||||||
|
|
||||||
|
for ( final GenomeLoc loc : locs ) {
|
||||||
|
byContig.get(loc.getContig()).put(loc.getStart(), loc.getStop(), loc);
|
||||||
|
}
|
||||||
|
|
||||||
|
return byContig;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -549,14 +581,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
||||||
|
|
||||||
public Set<SortableJexlVCMatchExp> getJexlExpressions() { return jexlExpressions; }
|
public Set<SortableJexlVCMatchExp> getJexlExpressions() { return jexlExpressions; }
|
||||||
|
|
||||||
public List<GenomeLoc> getIntervals() {
|
|
||||||
if ( intervalsFile == null )
|
|
||||||
throw new UserException.MissingArgument("stratIntervals", "Must be provided when IntervalStratification is enabled");
|
|
||||||
|
|
||||||
return intervalsFile.getIntervals(getToolkit());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public Set<String> getContigNames() {
|
public Set<String> getContigNames() {
|
||||||
final TreeSet<String> contigs = new TreeSet<String>();
|
final TreeSet<String> contigs = new TreeSet<String>();
|
||||||
for( final SAMSequenceRecord r : getToolkit().getReferenceDataSource().getReference().getSequenceDictionary().getSequences()) {
|
for( final SAMSequenceRecord r : getToolkit().getReferenceDataSource().getReference().getSequenceDictionary().getSequences()) {
|
||||||
|
|
@ -568,4 +592,8 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
||||||
public GenomeLocParser getGenomeLocParser() {
|
public GenomeLocParser getGenomeLocParser() {
|
||||||
return getToolkit().getGenomeLocParser();
|
return getToolkit().getGenomeLocParser();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public GenomeAnalysisEngine getToolkit() {
|
||||||
|
return super.getToolkit();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -24,25 +24,38 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
||||||
|
|
||||||
|
import net.sf.picard.util.IntervalTree;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.*;
|
||||||
import java.util.EnumMap;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
@Analysis(description = "1000 Genomes Phase I summary of variants table")
|
@Analysis(description = "1000 Genomes Phase I summary of variants table")
|
||||||
public class VariantSummary extends VariantEvaluator implements StandardEval {
|
public class VariantSummary extends VariantEvaluator implements StandardEval {
|
||||||
|
final protected static Logger logger = Logger.getLogger(VariantSummary.class);
|
||||||
|
|
||||||
|
private final static int MAX_INDEL_LENGTH = 50;
|
||||||
|
private final static double MIN_CNV_OVERLAP = 0.5;
|
||||||
|
private VariantEvalWalker walker;
|
||||||
|
|
||||||
|
public enum Type {
|
||||||
|
SNP, INDEL, CNV
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<String, IntervalTree<GenomeLoc>> knownCNVs = null;
|
||||||
|
|
||||||
// basic counts on various rates found
|
// basic counts on various rates found
|
||||||
@DataPoint(description = "Number of samples")
|
@DataPoint(description = "Number of samples")
|
||||||
public long nSamples = 0;
|
public long nSamples = 0;
|
||||||
|
|
@ -86,10 +99,10 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
||||||
|
|
||||||
private final static String ALL = "ALL";
|
private final static String ALL = "ALL";
|
||||||
|
|
||||||
private class TypeSampleMap extends EnumMap<VariantContext.Type, Map<String, Integer>> {
|
private class TypeSampleMap extends EnumMap<Type, Map<String, Integer>> {
|
||||||
public TypeSampleMap(final Collection<String> samples) {
|
public TypeSampleMap(final Collection<String> samples) {
|
||||||
super(VariantContext.Type.class);
|
super(Type.class);
|
||||||
for ( VariantContext.Type type : VariantContext.Type.values() ) {
|
for ( Type type : Type.values() ) {
|
||||||
Map<String, Integer> bySample = new HashMap<String, Integer>(samples.size());
|
Map<String, Integer> bySample = new HashMap<String, Integer>(samples.size());
|
||||||
for ( final String sample : samples ) {
|
for ( final String sample : samples ) {
|
||||||
bySample.put(sample, 0);
|
bySample.put(sample, 0);
|
||||||
|
|
@ -99,16 +112,16 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public final void inc(final VariantContext.Type type, final String sample) {
|
public final void inc(final Type type, final String sample) {
|
||||||
final int count = this.get(type).get(sample);
|
final int count = this.get(type).get(sample);
|
||||||
get(type).put(sample, count + 1);
|
get(type).put(sample, count + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
public final int all(VariantContext.Type type) {
|
public final int all(Type type) {
|
||||||
return get(type).get(ALL);
|
return get(type).get(ALL);
|
||||||
}
|
}
|
||||||
|
|
||||||
public final int meanValue(VariantContext.Type type) {
|
public final int meanValue(Type type) {
|
||||||
long sum = 0;
|
long sum = 0;
|
||||||
int n = 0;
|
int n = 0;
|
||||||
for ( final Map.Entry<String, Integer> pair : get(type).entrySet() ) {
|
for ( final Map.Entry<String, Integer> pair : get(type).entrySet() ) {
|
||||||
|
|
@ -120,7 +133,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
||||||
return (int)(Math.round(sum / (1.0 * n)));
|
return (int)(Math.round(sum / (1.0 * n)));
|
||||||
}
|
}
|
||||||
|
|
||||||
public final double ratioValue(VariantContext.Type type, TypeSampleMap denoms, boolean allP) {
|
public final double ratioValue(Type type, TypeSampleMap denoms, boolean allP) {
|
||||||
double sum = 0;
|
double sum = 0;
|
||||||
int n = 0;
|
int n = 0;
|
||||||
for ( final String sample : get(type).keySet() ) {
|
for ( final String sample : get(type).keySet() ) {
|
||||||
|
|
@ -137,6 +150,8 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
||||||
|
|
||||||
|
|
||||||
public void initialize(VariantEvalWalker walker) {
|
public void initialize(VariantEvalWalker walker) {
|
||||||
|
this.walker = walker;
|
||||||
|
|
||||||
nSamples = walker.getSampleNamesForEvaluation().size();
|
nSamples = walker.getSampleNamesForEvaluation().size();
|
||||||
countsPerSample = new TypeSampleMap(walker.getSampleNamesForEvaluation());
|
countsPerSample = new TypeSampleMap(walker.getSampleNamesForEvaluation());
|
||||||
transitionsPerSample = new TypeSampleMap(walker.getSampleNamesForEvaluation());
|
transitionsPerSample = new TypeSampleMap(walker.getSampleNamesForEvaluation());
|
||||||
|
|
@ -144,6 +159,13 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
||||||
allVariantCounts = new TypeSampleMap(walker.getSampleNamesForEvaluation());
|
allVariantCounts = new TypeSampleMap(walker.getSampleNamesForEvaluation());
|
||||||
knownVariantCounts = new TypeSampleMap(walker.getSampleNamesForEvaluation());
|
knownVariantCounts = new TypeSampleMap(walker.getSampleNamesForEvaluation());
|
||||||
depthPerSample = new TypeSampleMap(walker.getSampleNamesForEvaluation());
|
depthPerSample = new TypeSampleMap(walker.getSampleNamesForEvaluation());
|
||||||
|
|
||||||
|
if ( walker.knownCNVsFile != null ) {
|
||||||
|
knownCNVs = walker.createIntervalTreeByContig(walker.knownCNVsFile);
|
||||||
|
final List<GenomeLoc> locs = walker.knownCNVsFile.getIntervals(walker.getToolkit());
|
||||||
|
logger.info(String.format("Creating known CNV list %s containing %d intervals covering %d bp",
|
||||||
|
walker.knownCNVsFile.getSource(), locs.size(), IntervalUtils.intervalSize(locs)));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override public boolean enabled() { return true; }
|
@Override public boolean enabled() { return true; }
|
||||||
|
|
@ -156,44 +178,77 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
||||||
nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
|
nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private final Type getType(VariantContext vc) {
|
||||||
|
switch (vc.getType()) {
|
||||||
|
case SNP:
|
||||||
|
return Type.SNP;
|
||||||
|
case INDEL:
|
||||||
|
for ( int l : vc.getIndelLengths() )
|
||||||
|
if ( l > MAX_INDEL_LENGTH )
|
||||||
|
return Type.CNV;
|
||||||
|
return Type.INDEL;
|
||||||
|
case SYMBOLIC:
|
||||||
|
return Type.CNV;
|
||||||
|
default:
|
||||||
|
throw new UserException.BadInput("Unexpected variant context type: " + vc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private final boolean overlapsKnownCNV(VariantContext cnv) {
|
||||||
|
final GenomeLoc loc = walker.getGenomeLocParser().createGenomeLoc(cnv, true);
|
||||||
|
IntervalTree<GenomeLoc> intervalTree = knownCNVs.get(loc.getContig());
|
||||||
|
|
||||||
|
final Iterator<IntervalTree.Node<GenomeLoc>> nodeIt = intervalTree.overlappers(loc.getStart(), loc.getStop());
|
||||||
|
while ( nodeIt.hasNext() ) {
|
||||||
|
final double overlapP = loc.reciprocialOverlapFraction(nodeIt.next().getValue());
|
||||||
|
if ( overlapP > MIN_CNV_OVERLAP )
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
if ( eval == null || eval.isMonomorphicInSamples() ) return null;
|
if ( eval == null || eval.isMonomorphicInSamples() ) return null;
|
||||||
|
|
||||||
|
final Type type = getType(eval);
|
||||||
|
|
||||||
TypeSampleMap titvTable = null;
|
TypeSampleMap titvTable = null;
|
||||||
|
|
||||||
switch (eval.getType()) {
|
// update DP, if possible
|
||||||
case SNP:
|
if ( eval.hasAttribute(VCFConstants.DEPTH_KEY) )
|
||||||
titvTable = VariantContextUtils.isTransition(eval) ? transitionsPerSample : transversionsPerSample;
|
depthPerSample.inc(type, ALL);
|
||||||
titvTable.inc(eval.getType(), ALL);
|
|
||||||
case INDEL:
|
// update counts
|
||||||
case SYMBOLIC:
|
allVariantCounts.inc(type, ALL);
|
||||||
allVariantCounts.inc(eval.getType(), ALL);
|
|
||||||
if ( comp != null )
|
// type specific calculations
|
||||||
knownVariantCounts.inc(eval.getType(), ALL);
|
if ( type == Type.SNP ) {
|
||||||
if ( eval.hasAttribute(VCFConstants.DEPTH_KEY) )
|
titvTable = VariantContextUtils.isTransition(eval) ? transitionsPerSample : transversionsPerSample;
|
||||||
depthPerSample.inc(eval.getType(), ALL);
|
titvTable.inc(type, ALL);
|
||||||
break;
|
|
||||||
default:
|
|
||||||
throw new UserException.BadInput("Unexpected variant context type: " + eval);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// novelty calculation
|
||||||
|
if ( comp != null || (type == Type.CNV && overlapsKnownCNV(eval)))
|
||||||
|
knownVariantCounts.inc(type, ALL);
|
||||||
|
|
||||||
// per sample metrics
|
// per sample metrics
|
||||||
for (final Genotype g : eval.getGenotypes()) {
|
for (final Genotype g : eval.getGenotypes()) {
|
||||||
if ( ! g.isNoCall() && ! g.isHomRef() ) {
|
if ( ! g.isNoCall() && ! g.isHomRef() ) {
|
||||||
countsPerSample.inc(eval.getType(), g.getSampleName());
|
countsPerSample.inc(type, g.getSampleName());
|
||||||
|
|
||||||
// update transition / transversion ratio
|
// update transition / transversion ratio
|
||||||
if ( titvTable != null ) titvTable.inc(eval.getType(), g.getSampleName());
|
if ( titvTable != null ) titvTable.inc(type, g.getSampleName());
|
||||||
|
|
||||||
if ( g.hasAttribute(VCFConstants.DEPTH_KEY) )
|
if ( g.hasAttribute(VCFConstants.DEPTH_KEY) )
|
||||||
depthPerSample.inc(eval.getType(), g.getSampleName());
|
depthPerSample.inc(type, g.getSampleName());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return null; // we don't capture any interesting sites
|
return null; // we don't capture any interesting sites
|
||||||
}
|
}
|
||||||
|
|
||||||
private final String noveltyRate(VariantContext.Type type) {
|
private final String noveltyRate(Type type) {
|
||||||
final int all = allVariantCounts.all(type);
|
final int all = allVariantCounts.all(type);
|
||||||
final int known = knownVariantCounts.all(type);
|
final int known = knownVariantCounts.all(type);
|
||||||
final int novel = all - known;
|
final int novel = all - known;
|
||||||
|
|
@ -202,22 +257,22 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void finalizeEvaluation() {
|
public void finalizeEvaluation() {
|
||||||
nSNPs = allVariantCounts.all(VariantContext.Type.SNP);
|
nSNPs = allVariantCounts.all(Type.SNP);
|
||||||
nIndels = allVariantCounts.all(VariantContext.Type.INDEL);
|
nIndels = allVariantCounts.all(Type.INDEL);
|
||||||
nSVs = allVariantCounts.all(VariantContext.Type.SYMBOLIC);
|
nSVs = allVariantCounts.all(Type.CNV);
|
||||||
|
|
||||||
TiTvRatio = transitionsPerSample.ratioValue(VariantContext.Type.SNP, transversionsPerSample, true);
|
TiTvRatio = transitionsPerSample.ratioValue(Type.SNP, transversionsPerSample, true);
|
||||||
TiTvRatioPerSample = transitionsPerSample.ratioValue(VariantContext.Type.SNP, transversionsPerSample, false);
|
TiTvRatioPerSample = transitionsPerSample.ratioValue(Type.SNP, transversionsPerSample, false);
|
||||||
|
|
||||||
nSNPsPerSample = countsPerSample.meanValue(VariantContext.Type.SNP);
|
nSNPsPerSample = countsPerSample.meanValue(Type.SNP);
|
||||||
nIndelsPerSample = countsPerSample.meanValue(VariantContext.Type.INDEL);
|
nIndelsPerSample = countsPerSample.meanValue(Type.INDEL);
|
||||||
nSVsPerSample = countsPerSample.meanValue(VariantContext.Type.SYMBOLIC);
|
nSVsPerSample = countsPerSample.meanValue(Type.CNV);
|
||||||
|
|
||||||
SNPNoveltyRate = noveltyRate(VariantContext.Type.SNP);
|
SNPNoveltyRate = noveltyRate(Type.SNP);
|
||||||
IndelNoveltyRate = noveltyRate(VariantContext.Type.INDEL);
|
IndelNoveltyRate = noveltyRate(Type.INDEL);
|
||||||
SVNoveltyRate = noveltyRate(VariantContext.Type.SYMBOLIC);
|
SVNoveltyRate = noveltyRate(Type.CNV);
|
||||||
|
|
||||||
SNPDPPerSample = depthPerSample.meanValue(VariantContext.Type.SNP);
|
SNPDPPerSample = depthPerSample.meanValue(Type.SNP);
|
||||||
IndelDPPerSample = depthPerSample.meanValue(VariantContext.Type.INDEL);
|
IndelDPPerSample = depthPerSample.meanValue(Type.INDEL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -54,26 +54,23 @@ import java.util.*;
|
||||||
*/
|
*/
|
||||||
public class IntervalStratification extends VariantStratifier {
|
public class IntervalStratification extends VariantStratifier {
|
||||||
final protected static Logger logger = Logger.getLogger(IntervalStratification.class);
|
final protected static Logger logger = Logger.getLogger(IntervalStratification.class);
|
||||||
final Map<String, IntervalTree<Boolean>> intervalTreeByContig = new HashMap<String, IntervalTree<Boolean>>();
|
Map<String, IntervalTree<GenomeLoc>> intervalTreeByContig = null;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
final List<GenomeLoc> locs = getVariantEvalWalker().getIntervals();
|
if ( getVariantEvalWalker().intervalsFile == null )
|
||||||
|
throw new UserException.MissingArgument("stratIntervals", "Must be provided when IntervalStratification is enabled");
|
||||||
|
|
||||||
|
final List<GenomeLoc> locs = getVariantEvalWalker().intervalsFile.getIntervals(getVariantEvalWalker().getToolkit());
|
||||||
|
|
||||||
if ( locs.isEmpty() )
|
if ( locs.isEmpty() )
|
||||||
throw new UserException.BadArgumentValue("stratIntervals", "Contains no intervals. Perhaps the file is malformed or empty?");
|
throw new UserException.BadArgumentValue("stratIntervals", "Contains no intervals. Perhaps the file is malformed or empty?");
|
||||||
|
|
||||||
|
intervalTreeByContig = getVariantEvalWalker().createIntervalTreeByContig(getVariantEvalWalker().intervalsFile);
|
||||||
|
|
||||||
logger.info(String.format("Creating IntervalStratification %s containing %d intervals covering %d bp",
|
logger.info(String.format("Creating IntervalStratification %s containing %d intervals covering %d bp",
|
||||||
getVariantEvalWalker().intervalsFile.getSource(), locs.size(), IntervalUtils.intervalSize(locs)));
|
getVariantEvalWalker().intervalsFile.getSource(), locs.size(), IntervalUtils.intervalSize(locs)));
|
||||||
|
|
||||||
// set up the map from contig -> interval tree
|
|
||||||
for ( final String contig : getVariantEvalWalker().getContigNames() )
|
|
||||||
intervalTreeByContig.put(contig, new IntervalTree<Boolean>());
|
|
||||||
|
|
||||||
for ( final GenomeLoc loc : locs ) {
|
|
||||||
intervalTreeByContig.get(loc.getContig()).put(loc.getStart(), loc.getStop(), true);
|
|
||||||
}
|
|
||||||
|
|
||||||
states = new ArrayList<String>(Arrays.asList("all", "overlaps.intervals", "outside.intervals"));
|
states = new ArrayList<String>(Arrays.asList("all", "overlaps.intervals", "outside.intervals"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -82,8 +79,8 @@ public class IntervalStratification extends VariantStratifier {
|
||||||
|
|
||||||
if (eval != null) {
|
if (eval != null) {
|
||||||
final GenomeLoc loc = getVariantEvalWalker().getGenomeLocParser().createGenomeLoc(eval, true);
|
final GenomeLoc loc = getVariantEvalWalker().getGenomeLocParser().createGenomeLoc(eval, true);
|
||||||
IntervalTree<Boolean> intervalTree = intervalTreeByContig.get(loc.getContig());
|
IntervalTree<GenomeLoc> intervalTree = intervalTreeByContig.get(loc.getContig());
|
||||||
IntervalTree.Node<Boolean> node = intervalTree.minOverlapper(loc.getStart(), loc.getStop());
|
IntervalTree.Node<GenomeLoc> node = intervalTree.minOverlapper(loc.getStart(), loc.getStop());
|
||||||
//logger.info(String.format("Overlap %s found %s", loc, node));
|
//logger.info(String.format("Overlap %s found %s", loc, node));
|
||||||
relevantStates.add( node != null ? "overlaps.intervals" : "outside.intervals");
|
relevantStates.add( node != null ? "overlaps.intervals" : "outside.intervals");
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -440,4 +440,29 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenome
|
||||||
return stop - start + 1;
|
return stop - start + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* reciprocialOverlap: what is the min. percent of gl1 and gl2 covered by both
|
||||||
|
*
|
||||||
|
* gl1.s ---------- gk1.e
|
||||||
|
* gl2.s ---------- gl2.e
|
||||||
|
* 100%
|
||||||
|
*
|
||||||
|
* gl1.s ---------- gk1.e
|
||||||
|
* gl2.s ---------- gl2.e
|
||||||
|
* 50%
|
||||||
|
*
|
||||||
|
* gl1.s ---------- gk1.e
|
||||||
|
* gl2.s -------------------- gl2.e
|
||||||
|
* 25% (50% for gl1 but only 25% for gl2)
|
||||||
|
*/
|
||||||
|
public final double reciprocialOverlapFraction(final GenomeLoc o) {
|
||||||
|
if ( overlapsP(o) )
|
||||||
|
return Math.min(overlapPercent(this, o), overlapPercent(o, this));
|
||||||
|
else
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
private final static double overlapPercent(final GenomeLoc gl1, final GenomeLoc gl2) {
|
||||||
|
return (1.0 * gl1.intersect(gl2).size()) / gl1.size();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -134,7 +134,7 @@ public abstract class BaseTest {
|
||||||
*/
|
*/
|
||||||
public static class TestDataProvider {
|
public static class TestDataProvider {
|
||||||
private static final Map<Class, List<Object>> tests = new HashMap<Class, List<Object>>();
|
private static final Map<Class, List<Object>> tests = new HashMap<Class, List<Object>>();
|
||||||
private final String name;
|
private String name;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new TestDataProvider instance bound to the class variable C
|
* Create a new TestDataProvider instance bound to the class variable C
|
||||||
|
|
@ -151,6 +151,10 @@ public abstract class BaseTest {
|
||||||
this(c, "");
|
this(c, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setName(final String name) {
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return all of the data providers in the form expected by TestNG of type class C
|
* Return all of the data providers in the form expected by TestNG of type class C
|
||||||
* @param c
|
* @param c
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.BeforeClass;
|
import org.testng.annotations.BeforeClass;
|
||||||
|
import org.testng.annotations.DataProvider;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||||
|
|
@ -150,4 +151,64 @@ public class GenomeLocUnitTest extends BaseTest {
|
||||||
Assert.assertEquals(twoUnmappedMixed.size(),2,"Wrong number of elements in list.");
|
Assert.assertEquals(twoUnmappedMixed.size(),2,"Wrong number of elements in list.");
|
||||||
Assert.assertEquals(twoUnmappedMixed,Arrays.asList(chr1,unmapped),"List sorted in wrong order");
|
Assert.assertEquals(twoUnmappedMixed,Arrays.asList(chr1,unmapped),"List sorted in wrong order");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// testing overlap detection
|
||||||
|
//
|
||||||
|
// -------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private class ReciprocalOverlapProvider extends TestDataProvider {
|
||||||
|
GenomeLoc gl1, gl2;
|
||||||
|
int overlapSize;
|
||||||
|
double overlapFraction;
|
||||||
|
|
||||||
|
private ReciprocalOverlapProvider(int start1, int stop1, int start2, int stop2) {
|
||||||
|
super(ReciprocalOverlapProvider.class);
|
||||||
|
gl1 = genomeLocParser.createGenomeLoc("chr1", start1, stop1);
|
||||||
|
gl2 = genomeLocParser.createGenomeLoc("chr1", start2, stop2);
|
||||||
|
|
||||||
|
int shared = 0;
|
||||||
|
for ( int i = start1; i <= stop1; i++ ) {
|
||||||
|
if ( i >= start2 && i <= stop2 )
|
||||||
|
shared++;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.overlapSize = shared;
|
||||||
|
this.overlapFraction = Math.min((1.0*shared)/gl1.size(), (1.0*shared)/gl2.size());
|
||||||
|
super.setName(String.format("%d-%d / %d-%d overlap=%d / %.2f", start1, stop1, start2, stop2, overlapSize, overlapFraction));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "ReciprocalOverlapProvider")
|
||||||
|
public Object[][] makeReciprocalOverlapProvider() {
|
||||||
|
for ( int start1 = 1; start1 <= 10; start1++ ) {
|
||||||
|
for ( int stop1 = start1; stop1 <= 10; stop1++ ) {
|
||||||
|
new ReciprocalOverlapProvider(start1, stop1, 1, 10);
|
||||||
|
new ReciprocalOverlapProvider(start1, stop1, 5, 10);
|
||||||
|
new ReciprocalOverlapProvider(start1, stop1, 5, 7);
|
||||||
|
new ReciprocalOverlapProvider(start1, stop1, 5, 15);
|
||||||
|
new ReciprocalOverlapProvider(start1, stop1, 11, 20);
|
||||||
|
|
||||||
|
new ReciprocalOverlapProvider(1, 10, start1, stop1);
|
||||||
|
new ReciprocalOverlapProvider(5, 10, start1, stop1);
|
||||||
|
new ReciprocalOverlapProvider(5, 7, start1, stop1);
|
||||||
|
new ReciprocalOverlapProvider(5, 15, start1, stop1);
|
||||||
|
new ReciprocalOverlapProvider(11, 20, start1, stop1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ReciprocalOverlapProvider.getTests(ReciprocalOverlapProvider.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "ReciprocalOverlapProvider")
|
||||||
|
public void testReciprocalOverlapProvider(ReciprocalOverlapProvider cfg) {
|
||||||
|
if ( cfg.overlapSize == 0 ) {
|
||||||
|
Assert.assertFalse(cfg.gl1.overlapsP(cfg.gl2));
|
||||||
|
} else {
|
||||||
|
Assert.assertTrue(cfg.gl1.overlapsP(cfg.gl2));
|
||||||
|
Assert.assertEquals(cfg.gl1.intersect(cfg.gl2).size(), cfg.overlapSize);
|
||||||
|
Assert.assertEquals(cfg.gl1.reciprocialOverlapFraction(cfg.gl2), cfg.overlapFraction);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue