VariantEvalWalker now takes indels if you throw the -dels flag. IndelLengthHistogram appears to be working properly, it is turned off by default (as it is experimental) but you can turn it on in your own repository.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3443 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
a0bf1753f9
commit
f9efc1248c
|
|
@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.playground.utils.report.tags.Analysis;
|
||||
import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
|
||||
import org.broadinstitute.sting.playground.utils.report.utils.TableType;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
|
@ -14,6 +15,7 @@ import org.broadinstitute.sting.utils.StingException;
|
|||
* @Author chartl
|
||||
* @Date May 26, 2010
|
||||
*/
|
||||
@Analysis(name = "Indel length histograms", description = "Shows the distrbution of insertion/deletion event lengths (negative for deletion, positive for insertion)")
|
||||
public class IndelLengthHistogram extends VariantEvaluator {
|
||||
private final int SIZE_LIMIT = 50;
|
||||
@DataPoint(name="indelLengthHistogram",description="Histogram of indel lengths")
|
||||
|
|
@ -27,11 +29,11 @@ public class IndelLengthHistogram extends VariantEvaluator {
|
|||
private Integer[] colKeys;
|
||||
private int limit;
|
||||
private String[] rowKeys = {"EventLength"};
|
||||
private int[] indelHistogram;
|
||||
private Integer[] indelHistogram;
|
||||
|
||||
public IndelHistogram(int limit) {
|
||||
colKeys = initColKeys(limit);
|
||||
indelHistogram = new int[limit*2];
|
||||
indelHistogram = initHistogram(limit);
|
||||
this.limit = limit;
|
||||
}
|
||||
|
||||
|
|
@ -50,7 +52,7 @@ public class IndelLengthHistogram extends VariantEvaluator {
|
|||
private Integer[] initColKeys(int size) {
|
||||
Integer[] cK = new Integer[size*2+1];
|
||||
int index = 0;
|
||||
for ( int i = -size; i < size; i ++ ) {
|
||||
for ( int i = -size; i <= size; i ++ ) {
|
||||
cK[index] = i;
|
||||
index++;
|
||||
}
|
||||
|
|
@ -58,6 +60,15 @@ public class IndelLengthHistogram extends VariantEvaluator {
|
|||
return cK;
|
||||
}
|
||||
|
||||
private Integer[] initHistogram(int size) {
|
||||
Integer[] hist = new Integer[size*2+1];
|
||||
for ( int i = 0; i < 2*size+1; i ++ ) {
|
||||
hist[i] = 0;
|
||||
}
|
||||
|
||||
return hist;
|
||||
}
|
||||
|
||||
public String getName() { return "indelHistTable"; }
|
||||
|
||||
public void update(int eLength) {
|
||||
|
|
@ -81,12 +92,14 @@ public class IndelLengthHistogram extends VariantEvaluator {
|
|||
public int getComparisonOrder() { return 1; } // need only the evals
|
||||
|
||||
public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
//System.out.println("Update1 called");
|
||||
if ( ! vc1.isBiallelic() && vc1.isIndel() ) {
|
||||
veWalker.getLogger().warn("[IndelLengthHistogram] Non-biallelic indel at "+ref.getLocus()+" ignored.");
|
||||
return vc1.toString(); // biallelic sites are output
|
||||
}
|
||||
|
||||
if ( vc1.isIndel() ) {
|
||||
//System.out.println("Is indel");
|
||||
if ( vc1.isInsertion() ) {
|
||||
indelHistogram.update(vc1.getAlternateAllele(0).length());
|
||||
} else if ( vc1.isDeletion() ) {
|
||||
|
|
|
|||
|
|
@ -35,7 +35,9 @@ import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrde
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.gatk.walkers.Reference;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Window;
|
||||
import org.broadinstitute.sting.playground.utils.report.ReportMarshaller;
|
||||
import org.broadinstitute.sting.playground.utils.report.VE2ReportFactory;
|
||||
import org.broadinstitute.sting.playground.utils.report.templates.ReportFormat;
|
||||
|
|
@ -103,6 +105,7 @@ import java.util.*;
|
|||
/**
|
||||
* Test routine for new VariantContext object
|
||||
*/
|
||||
@Reference(window=@Window(start=-50,stop=50))
|
||||
public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -152,10 +155,12 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
|||
public double minQualScore = NO_MIN_QUAL_SCORE;
|
||||
@Argument(shortName = "Qcomp", fullName="minPhredConfidenceScoreForComp", doc="Minimum confidence score to consider a comp SNP a variant", required=false)
|
||||
public double minCompQualScore = NO_MIN_QUAL_SCORE;
|
||||
@Argument(shortName = "dels", fullName="indelCalls", doc="evaluate indels rather than SNPs", required = false)
|
||||
public boolean dels = false;
|
||||
|
||||
// Right now we will only be looking at SNPS
|
||||
// todo -- enable INDEL variant contexts, there's no reason not to but the integration tests need to be updated
|
||||
EnumSet<VariantContext.Type> ALLOW_VARIANT_CONTEXT_TYPES = EnumSet.of(VariantContext.Type.SNP, VariantContext.Type.NO_VARIATION); //, VariantContext.Type.INDEL);
|
||||
EnumSet<VariantContext.Type> ALLOW_VARIANT_CONTEXT_TYPES = EnumSet.of(VariantContext.Type.SNP, VariantContext.Type.NO_VARIATION);
|
||||
|
||||
@Argument(shortName="rsID", fullName="rsID", doc="If provided, list of rsID and build number for capping known snps by their build date", required=false)
|
||||
protected String rsIDFile = null;
|
||||
|
|
@ -256,6 +261,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
|||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public void initialize() {
|
||||
if ( dels ) {
|
||||
ALLOW_VARIANT_CONTEXT_TYPES = EnumSet.of(VariantContext.Type.INDEL, VariantContext.Type.NO_VARIATION);
|
||||
}
|
||||
ReportFormat.AcceptableOutputType type = (outputLocation == null) ? ReportFormat.AcceptableOutputType.STREAM : ReportFormat.AcceptableOutputType.FILE;
|
||||
if (!VE2ReportFactory.isCompatibleWithOutputType(type,reportType))
|
||||
throw new StingException("The report format requested is not compatible with your output location. You specified a " + type + " output type which isn't an option for " + reportType);
|
||||
|
|
@ -349,7 +357,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
|||
classMap.put(c.getSimpleName(), c);
|
||||
|
||||
if ( USE_NO_MODULES ) {
|
||||
evaluationClasses = new ArrayList<Class<? extends VariantEvaluator>>(0);
|
||||
evaluationClasses = new ArrayList<Class<? extends VariantEvaluator>>(0);
|
||||
} else if ( modulesToUse.length == 0 ) {
|
||||
evaluationClasses = new ArrayList<Class<? extends VariantEvaluator>>(classMap.values());
|
||||
} else {
|
||||
|
|
@ -442,6 +450,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
|||
//System.out.printf("map at %s with %d skipped%n", context.getLocation(), context.getSkippedBases());
|
||||
|
||||
Map<String, VariantContext> vcs = getVariantContexts(ref, tracker, context);
|
||||
//System.out.println("vcs has size "+vcs.size());
|
||||
//Collection<VariantContext> comps = getCompVariantContexts(tracker, context);
|
||||
|
||||
// to enable walking over pairs where eval or comps have no elements
|
||||
|
|
@ -581,6 +590,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
|||
// todo -- allow the variant evaluation to specify the type of variants it wants to see and only take the first such record at a site
|
||||
Map<String, VariantContext> bindings = new HashMap<String, VariantContext>();
|
||||
if ( tracker != null ) {
|
||||
//System.out.println("Tracker is not null");
|
||||
bindVariantContexts(ref, bindings, evalNames, tracker, context, false);
|
||||
bindVariantContexts(ref, bindings, compNames, tracker, context, true);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ public class DesignFileGeneratorWalker extends RodWalker<Long,Long> {
|
|||
|
||||
private HashMap<GenomeLoc,IntervalInfoBuilder> intervalBuffer = new HashMap<GenomeLoc,IntervalInfoBuilder>();
|
||||
private HashSet<rodRefSeq> refseqBuffer = new HashSet<rodRefSeq>();
|
||||
private HashMap<String,BEDFeature> currentBedFeatures;
|
||||
private HashMap<String,BEDFeature> currentBedFeatures = new HashMap<String,BEDFeature>();
|
||||
|
||||
public Long map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
// three items to look up: interval_list, refseq, gene*
|
||||
|
|
@ -220,18 +220,20 @@ class IntervalInfoBuilder {
|
|||
buf.append("\t");
|
||||
}
|
||||
buf.append(geneNames.get(geneIndex));
|
||||
buf.append("[");
|
||||
if ( exonsByGene.get(geneNames.get(geneIndex)).size() > 0 ) {
|
||||
for ( int exonIndex = 0; exonIndex < exonsByGene.get(geneNames.get(geneIndex)).size(); exonIndex++ ) {
|
||||
if ( exonIndex > 0 ) {
|
||||
buf.append(',');
|
||||
}
|
||||
buf.append(String.format("exon_%d",exonNumbersByGene.get(geneNames.get(geneIndex)).get(exonIndex)));
|
||||
}
|
||||
} else {
|
||||
buf.append("Intron/UTR");
|
||||
if ( ! geneNames.get(geneIndex).startsWith("gene")) {
|
||||
buf.append("[");
|
||||
if ( exonsByGene.get(geneNames.get(geneIndex)).size() > 0 ) {
|
||||
for ( int exonIndex = 0; exonIndex < exonsByGene.get(geneNames.get(geneIndex)).size(); exonIndex++ ) {
|
||||
if ( exonIndex > 0 ) {
|
||||
buf.append(',');
|
||||
}
|
||||
buf.append(String.format("exon_%d",exonNumbersByGene.get(geneNames.get(geneIndex)).get(exonIndex)));
|
||||
}
|
||||
} else {
|
||||
buf.append("Intron/UTR");
|
||||
}
|
||||
buf.append("]");
|
||||
}
|
||||
buf.append("]");
|
||||
}
|
||||
|
||||
return buf.toString();
|
||||
|
|
|
|||
Loading…
Reference in New Issue