Computes SNP density over the genome. Doesn't work with intervals
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2735 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
9decd20f46
commit
0a7426c29c
|
|
@ -0,0 +1,114 @@
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers.diagnostics;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RODRecordList;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.utils.PackageUtils;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.Pair;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeEncoding;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.varianteval.VariantAnalysis;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.diagnostics.newvarianteval.VariantEvaluation;
|
||||||
|
import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContextAdaptors;
|
||||||
|
import org.apache.commons.jexl.ExpressionFactory;
|
||||||
|
import org.apache.commons.jexl.Expression;
|
||||||
|
import org.apache.commons.jexl.JexlHelper;
|
||||||
|
import org.apache.commons.jexl.JexlContext;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Computes the density of SNPs passing and failing filters in intervals on the genome and emits a table for display
|
||||||
|
*/
|
||||||
|
@By(DataSource.REFERENCE)
|
||||||
|
@Requires(value={},referenceMetaData=@RMD(name="eval",type=RodVCF.class))
|
||||||
|
public class SNPDensity extends RefWalker<Pair<VariantContext, GenomeLoc>, SNPDensity.Counter> {
|
||||||
|
@Argument(fullName="granularity", shortName="granularity", doc="", required=false)
|
||||||
|
private int granularity = 1000000;
|
||||||
|
|
||||||
|
public void initialize() {
|
||||||
|
out.printf("chr middlePos linearPos nSNPs nSNPsFiltered unfiltered.density filtered.density%n");
|
||||||
|
}
|
||||||
|
|
||||||
|
public class Counter {
|
||||||
|
GenomeLoc firstLoc = null;
|
||||||
|
long linearOffset = 0;
|
||||||
|
int nSNPsCalled = 0;
|
||||||
|
int nSNPsFiltered = 0;
|
||||||
|
|
||||||
|
public Counter(Long linearOffset) {
|
||||||
|
this.linearOffset = linearOffset;
|
||||||
|
//System.out.printf("linear offset %d%n", linearOffset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Pair<VariantContext, GenomeLoc> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
VariantContext vc = null;
|
||||||
|
|
||||||
|
RODRecordList<ReferenceOrderedDatum> vcfList = tracker.getTrackData("eval", null);
|
||||||
|
if (vcfList != null) {
|
||||||
|
for (ReferenceOrderedDatum d : vcfList) {
|
||||||
|
RodVCF vcfRecord = (RodVCF)d;
|
||||||
|
vc = VariantContextAdaptors.vcfToVariantContext(vcfRecord);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return new Pair<VariantContext, GenomeLoc>(vc, context.getLocation());
|
||||||
|
}
|
||||||
|
|
||||||
|
public Counter reduceInit() {
|
||||||
|
return new Counter(0L);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void printLine(Counter sum) {
|
||||||
|
long offset = granularity / 2 - 1;
|
||||||
|
long chrOffset = sum.firstLoc.getStart() + offset;
|
||||||
|
out.printf("%s %d %d %d %d %.2e %.2e%n",
|
||||||
|
sum.firstLoc.getContig(),
|
||||||
|
chrOffset,
|
||||||
|
sum.linearOffset + offset,
|
||||||
|
sum.nSNPsCalled, sum.nSNPsFiltered,
|
||||||
|
(1.0 * sum.nSNPsCalled) / granularity, (1.0 * sum.nSNPsFiltered) / granularity);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Counter reduce(Pair<VariantContext, GenomeLoc> p, Counter sum) {
|
||||||
|
if ( p == null )
|
||||||
|
return sum;
|
||||||
|
|
||||||
|
// System.out.printf("%s %s %d%n", c.getLocation(), sum.firstLoc, sum.nSNPsSeen);
|
||||||
|
VariantContext c = p.getFirst();
|
||||||
|
GenomeLoc loc = p.getSecond();
|
||||||
|
|
||||||
|
if ( sum.firstLoc != null ) {
|
||||||
|
long dist = loc.distance(sum.firstLoc);
|
||||||
|
// System.out.printf(" dist = %d%n", dist);
|
||||||
|
if ( dist > granularity ) {
|
||||||
|
printLine(sum);
|
||||||
|
sum = new Counter(sum.linearOffset + granularity);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( sum.firstLoc == null ) sum.firstLoc = loc;
|
||||||
|
|
||||||
|
sum.nSNPsCalled += c != null && c.isNotFiltered() ? 1 : 0;
|
||||||
|
sum.nSNPsFiltered += c != null && c.isFiltered() ? 1 : 0;
|
||||||
|
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void onTraversalDone(Counter sum) {
|
||||||
|
printLine(sum);
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue