Computes SNP density over the genome. Doesn't work with intervals

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2735 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-01-29 15:36:49 +00:00
parent 9decd20f46
commit 0a7426c29c
1 changed files with 114 additions and 0 deletions

View File

@ -0,0 +1,114 @@
package org.broadinstitute.sting.playground.gatk.walkers.diagnostics;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.RODRecordList;
import org.broadinstitute.sting.gatk.refdata.RodVCF;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.PackageUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeEncoding;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
import org.broadinstitute.sting.playground.gatk.walkers.varianteval.VariantAnalysis;
import org.broadinstitute.sting.playground.gatk.walkers.diagnostics.newvarianteval.VariantEvaluation;
import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContext;
import org.broadinstitute.sting.oneoffprojects.variantcontext.VariantContextAdaptors;
import org.apache.commons.jexl.ExpressionFactory;
import org.apache.commons.jexl.Expression;
import org.apache.commons.jexl.JexlHelper;
import org.apache.commons.jexl.JexlContext;
import java.util.*;
/**
* Computes the density of SNPs passing and failing filters in intervals on the genome and emits a table for display
*/
@By(DataSource.REFERENCE)
@Requires(value={},referenceMetaData=@RMD(name="eval",type=RodVCF.class))
public class SNPDensity extends RefWalker<Pair<VariantContext, GenomeLoc>, SNPDensity.Counter> {
@Argument(fullName="granularity", shortName="granularity", doc="", required=false)
private int granularity = 1000000;
public void initialize() {
out.printf("chr middlePos linearPos nSNPs nSNPsFiltered unfiltered.density filtered.density%n");
}
public class Counter {
GenomeLoc firstLoc = null;
long linearOffset = 0;
int nSNPsCalled = 0;
int nSNPsFiltered = 0;
public Counter(Long linearOffset) {
this.linearOffset = linearOffset;
//System.out.printf("linear offset %d%n", linearOffset);
}
}
public Pair<VariantContext, GenomeLoc> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
VariantContext vc = null;
RODRecordList<ReferenceOrderedDatum> vcfList = tracker.getTrackData("eval", null);
if (vcfList != null) {
for (ReferenceOrderedDatum d : vcfList) {
RodVCF vcfRecord = (RodVCF)d;
vc = VariantContextAdaptors.vcfToVariantContext(vcfRecord);
break;
}
}
return new Pair<VariantContext, GenomeLoc>(vc, context.getLocation());
}
public Counter reduceInit() {
return new Counter(0L);
}
private void printLine(Counter sum) {
long offset = granularity / 2 - 1;
long chrOffset = sum.firstLoc.getStart() + offset;
out.printf("%s %d %d %d %d %.2e %.2e%n",
sum.firstLoc.getContig(),
chrOffset,
sum.linearOffset + offset,
sum.nSNPsCalled, sum.nSNPsFiltered,
(1.0 * sum.nSNPsCalled) / granularity, (1.0 * sum.nSNPsFiltered) / granularity);
}
public Counter reduce(Pair<VariantContext, GenomeLoc> p, Counter sum) {
if ( p == null )
return sum;
// System.out.printf("%s %s %d%n", c.getLocation(), sum.firstLoc, sum.nSNPsSeen);
VariantContext c = p.getFirst();
GenomeLoc loc = p.getSecond();
if ( sum.firstLoc != null ) {
long dist = loc.distance(sum.firstLoc);
// System.out.printf(" dist = %d%n", dist);
if ( dist > granularity ) {
printLine(sum);
sum = new Counter(sum.linearOffset + granularity);
}
}
if ( sum.firstLoc == null ) sum.firstLoc = loc;
sum.nSNPsCalled += c != null && c.isNotFiltered() ? 1 : 0;
sum.nSNPsFiltered += c != null && c.isFiltered() ? 1 : 0;
return sum;
}
public void onTraversalDone(Counter sum) {
printLine(sum);
}
}