Checkin for Eric (IndelDBRateWalker is a prelude to a VariantEval module for comparisons for indels)
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3234 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
c7b4f78316
commit
e7334ec11f
|
|
@ -110,4 +110,8 @@ public class IndelAnnotator extends RodWalker<Integer,Long>{
|
||||||
return i + j;
|
return i + j;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void onTraversalDone(Long l) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -0,0 +1,258 @@
|
||||||
|
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.Reference;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.Window;
|
||||||
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderLine;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||||
|
|
||||||
|
import java.io.PrintStream;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
|
||||||
|
*
|
||||||
|
* @Author chartl
|
||||||
|
* @Date Apr 21, 2010
|
||||||
|
*/
|
||||||
|
public class IndelDBRateWalker extends RodWalker<OverlapTable,OverlapTabulator> {
|
||||||
|
|
||||||
|
@Argument(fullName="indelWindow",doc="size of the window in which to look for indels; max 40",required=false)
|
||||||
|
int indelWindow = 10;
|
||||||
|
@Argument(fullName="writeVCF",doc="Writes \"overlapping\" variants to this vcf",required=false)
|
||||||
|
PrintStream outVCF;
|
||||||
|
|
||||||
|
VCFWriter vcfWriter;
|
||||||
|
|
||||||
|
private List<VariantContext> compContexts = new ArrayList<VariantContext>(50); // not going to be more than 50 contexts in a size-40 window
|
||||||
|
private List<VariantContext> evalContexts = new ArrayList<VariantContext>(50);
|
||||||
|
|
||||||
|
public void initialize() {
|
||||||
|
if ( indelWindow > 40 ) {
|
||||||
|
throw new StingException("Indel windows have a maximum size of 40");
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( outVCF != null ) {
|
||||||
|
vcfWriter = new VCFWriter(outVCF);
|
||||||
|
Set<VCFHeaderLine> header = new HashSet<VCFHeaderLine>();
|
||||||
|
header.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||||
|
VCFHeader vcfHeader = new VCFHeader(header, SampleUtils.getUniqueSamplesFromRods(getToolkit()));
|
||||||
|
vcfWriter.writeHeader(vcfHeader);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public OverlapTabulator reduceInit() {
|
||||||
|
return new OverlapTabulator();
|
||||||
|
}
|
||||||
|
|
||||||
|
public OverlapTabulator reduce(OverlapTable oTable, OverlapTabulator tabulator) {
|
||||||
|
if ( oTable != null ) {
|
||||||
|
tabulator.update(oTable);
|
||||||
|
}
|
||||||
|
|
||||||
|
return tabulator;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void finalUpdate(OverlapTabulator tab) {
|
||||||
|
while ( ! evalContexts.isEmpty() ) {
|
||||||
|
tab.update(emptyOverlapTable());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void onTraversalDone(OverlapTabulator tabulation) {
|
||||||
|
finalUpdate(tabulation);
|
||||||
|
out.printf("%s\t%s\t%s\t%s\t%s%n","Num_eval_sites","Num_comp_within_2bp","Num_comp_within_4bp","Num_comp_within_window","Total_comp_%");
|
||||||
|
out.printf("%s",tabulation.tabulateString());
|
||||||
|
}
|
||||||
|
|
||||||
|
public OverlapTable map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
// step 1: get the eval and compare contexts
|
||||||
|
if ( tracker == null ) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
Object eval = tracker.getReferenceMetaData("eval") != null &&
|
||||||
|
tracker.getReferenceMetaData("eval").size() != 0 ? tracker.getReferenceMetaData("eval").get(0) : null ;
|
||||||
|
VariantContext evalContext;
|
||||||
|
if ( eval != null ) {
|
||||||
|
evalContext = VariantContextAdaptors.toVariantContext("eval",eval);
|
||||||
|
} else {
|
||||||
|
evalContext = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
Object comp = tracker.getReferenceMetaData("comp") != null &&
|
||||||
|
tracker.getReferenceMetaData("comp").size() != 0 ? tracker.getReferenceMetaData("comp").get(0) : null;
|
||||||
|
VariantContext compContext;
|
||||||
|
if ( comp != null ) {
|
||||||
|
compContext = VariantContextAdaptors.toVariantContext("comp",comp);
|
||||||
|
} else {
|
||||||
|
compContext = null;
|
||||||
|
}
|
||||||
|
// step 2: add indel contexts to the queue
|
||||||
|
addToQueue(compContexts,compContext);
|
||||||
|
addToQueue(evalContexts,evalContext);
|
||||||
|
// step 3: check to see if we have exceeded the window size for the top eval contexts of the queue
|
||||||
|
// and do the work
|
||||||
|
return getOverlapTable(ref);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addToQueue(List<VariantContext> queue, VariantContext con) {
|
||||||
|
if ( con != null && con.isIndel() ) {
|
||||||
|
queue.add(con);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public OverlapTable getOverlapTable(ReferenceContext ref) {
|
||||||
|
// step 1: check that the eval queue is non-empty and that we are outside the window
|
||||||
|
if ( evalContexts.isEmpty() || evalContexts.get(0).getLocation().distance(ref.getLocus()) <= indelWindow ) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
// step 2: discard all comp variations which come before the window
|
||||||
|
while ( ! compContexts.isEmpty() && compContexts.get(0).getLocation().isBefore(ref.getLocus()) &&
|
||||||
|
compContexts.get(0).getLocation().distance(ref.getLocus()) > indelWindow) {
|
||||||
|
compContexts.remove(0);
|
||||||
|
}
|
||||||
|
// step 3: see if there are any contexts left; if so then they must be within the window
|
||||||
|
if ( ! compContexts.isEmpty() ) {
|
||||||
|
return nonEmptyOverlapTable();
|
||||||
|
} else {
|
||||||
|
return emptyOverlapTable();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public OverlapTable emptyOverlapTable() {
|
||||||
|
// only eval, no comp
|
||||||
|
OverlapTable ot = new OverlapTable();
|
||||||
|
ot.setEvalSizeAndType(evalContexts.get(0));
|
||||||
|
return ot;
|
||||||
|
}
|
||||||
|
|
||||||
|
public OverlapTable nonEmptyOverlapTable() {
|
||||||
|
if ( vcfWriter != null ) {
|
||||||
|
int i = 0;
|
||||||
|
while ( i < compContexts.size() && compContexts.get(i).getLocation().isBefore(evalContexts.get(0).getLocation())) {
|
||||||
|
vcfWriter.addRecord(VariantContextAdaptors.toVCF(compContexts.get(i),(char)compContexts.get(i).getReference().getBases()[0]));
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
vcfWriter.addRecord(VariantContextAdaptors.toVCF(evalContexts.get(0),(char) evalContexts.get(0).getReference().getBases()[0]));
|
||||||
|
while ( i < compContexts.size() && compContexts.get(i).getLocation().distance(evalContexts.get(0).getLocation()) <= indelWindow) {
|
||||||
|
vcfWriter.addRecord(VariantContextAdaptors.toVCF(compContexts.get(i),(char) compContexts.get(i).getReference().getBases()[0]));
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
OverlapTable ot = new OverlapTable();
|
||||||
|
ot.setCompOverlaps(compContexts.size());
|
||||||
|
ot.setDistances(compContexts,evalContexts.get(0), indelWindow);
|
||||||
|
return ot;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
class OverlapTable {
|
||||||
|
|
||||||
|
private int numOverlaps;
|
||||||
|
private ExpandingArrayList<Integer> distances; // currently unused
|
||||||
|
private int evalSize;
|
||||||
|
private boolean isDeletion;
|
||||||
|
|
||||||
|
public OverlapTable() {
|
||||||
|
numOverlaps = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setEvalSizeAndType(VariantContext context) {
|
||||||
|
int size = context.getAlternateAllele(0).length();
|
||||||
|
evalSize = size;
|
||||||
|
isDeletion = context.isDeletion();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCompOverlaps(int overlaps) {
|
||||||
|
numOverlaps = overlaps;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setDistances(List<VariantContext> comps, VariantContext eval, int winsize) {
|
||||||
|
distances = new ExpandingArrayList<Integer>();
|
||||||
|
for ( VariantContext comp : comps ) {
|
||||||
|
if ( comp.getLocation().distance(eval.getLocation()) <= winsize ) {
|
||||||
|
distances.add(comp.getLocation().distance(eval.getLocation()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getNumOverlaps() {
|
||||||
|
return numOverlaps;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getSize() {
|
||||||
|
return evalSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isDeletion() {
|
||||||
|
return isDeletion;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ExpandingArrayList<Integer> getDistances() {
|
||||||
|
return distances;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class OverlapTabulator {
|
||||||
|
HashMap<Integer,Long> hitsToCounts;
|
||||||
|
int totalEvalVariants;
|
||||||
|
|
||||||
|
public OverlapTabulator() {
|
||||||
|
hitsToCounts = new HashMap<Integer,Long>();
|
||||||
|
totalEvalVariants = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void update(OverlapTable table) {
|
||||||
|
totalEvalVariants++;
|
||||||
|
if ( table.getNumOverlaps() != 0 ) {
|
||||||
|
if ( hitsToCounts.containsKey(table.getNumOverlaps()) ) {
|
||||||
|
hitsToCounts.put(table.getNumOverlaps(),hitsToCounts.get(table.getNumOverlaps())+1);
|
||||||
|
} else {
|
||||||
|
hitsToCounts.put(table.getNumOverlaps(),1l);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String tabulateString() {
|
||||||
|
StringBuffer sb = new StringBuffer();
|
||||||
|
sb.append(totalEvalVariants);
|
||||||
|
sb.append("\t");
|
||||||
|
long lt2counts = 0l;
|
||||||
|
long lt4counts = 0l;
|
||||||
|
long totalCounts = 0l;
|
||||||
|
for ( int i = 0; i < 40; i ++ ) {
|
||||||
|
long counts = 0;
|
||||||
|
if ( hitsToCounts.containsKey(i) ) {
|
||||||
|
counts = hitsToCounts.get(i);
|
||||||
|
}
|
||||||
|
if ( i <= 2 ) {
|
||||||
|
lt2counts += counts;
|
||||||
|
}
|
||||||
|
if ( i <= 4 ) {
|
||||||
|
lt4counts += counts;
|
||||||
|
}
|
||||||
|
totalCounts += counts;
|
||||||
|
}
|
||||||
|
sb.append(lt2counts);
|
||||||
|
sb.append("\t");
|
||||||
|
sb.append(lt4counts);
|
||||||
|
sb.append("\t");
|
||||||
|
sb.append(totalCounts);
|
||||||
|
sb.append("\t");
|
||||||
|
sb.append(String.format("%.2f", 100*( (double) totalCounts)/( (double) totalEvalVariants)));
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue