VariantEvalWalker now takes indels if you throw the -dels flag. IndelLengthHistogram appears to be working properly, it is turned off by default (as it is experimental) but you can turn it on in your own repository.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3443 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2010-05-26 20:03:14 +00:00
parent a0bf1753f9
commit f9efc1248c
3 changed files with 42 additions and 17 deletions

View File

@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.playground.utils.report.tags.Analysis;
import org.broadinstitute.sting.playground.utils.report.tags.DataPoint; import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
import org.broadinstitute.sting.playground.utils.report.utils.TableType; import org.broadinstitute.sting.playground.utils.report.utils.TableType;
import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.StingException;
@ -14,6 +15,7 @@ import org.broadinstitute.sting.utils.StingException;
* @Author chartl * @Author chartl
* @Date May 26, 2010 * @Date May 26, 2010
*/ */
@Analysis(name = "Indel length histograms", description = "Shows the distrbution of insertion/deletion event lengths (negative for deletion, positive for insertion)")
public class IndelLengthHistogram extends VariantEvaluator { public class IndelLengthHistogram extends VariantEvaluator {
private final int SIZE_LIMIT = 50; private final int SIZE_LIMIT = 50;
@DataPoint(name="indelLengthHistogram",description="Histogram of indel lengths") @DataPoint(name="indelLengthHistogram",description="Histogram of indel lengths")
@ -27,11 +29,11 @@ public class IndelLengthHistogram extends VariantEvaluator {
private Integer[] colKeys; private Integer[] colKeys;
private int limit; private int limit;
private String[] rowKeys = {"EventLength"}; private String[] rowKeys = {"EventLength"};
private int[] indelHistogram; private Integer[] indelHistogram;
public IndelHistogram(int limit) { public IndelHistogram(int limit) {
colKeys = initColKeys(limit); colKeys = initColKeys(limit);
indelHistogram = new int[limit*2]; indelHistogram = initHistogram(limit);
this.limit = limit; this.limit = limit;
} }
@ -50,7 +52,7 @@ public class IndelLengthHistogram extends VariantEvaluator {
private Integer[] initColKeys(int size) { private Integer[] initColKeys(int size) {
Integer[] cK = new Integer[size*2+1]; Integer[] cK = new Integer[size*2+1];
int index = 0; int index = 0;
for ( int i = -size; i < size; i ++ ) { for ( int i = -size; i <= size; i ++ ) {
cK[index] = i; cK[index] = i;
index++; index++;
} }
@ -58,6 +60,15 @@ public class IndelLengthHistogram extends VariantEvaluator {
return cK; return cK;
} }
private Integer[] initHistogram(int size) {
Integer[] hist = new Integer[size*2+1];
for ( int i = 0; i < 2*size+1; i ++ ) {
hist[i] = 0;
}
return hist;
}
public String getName() { return "indelHistTable"; } public String getName() { return "indelHistTable"; }
public void update(int eLength) { public void update(int eLength) {
@ -81,12 +92,14 @@ public class IndelLengthHistogram extends VariantEvaluator {
public int getComparisonOrder() { return 1; } // need only the evals public int getComparisonOrder() { return 1; } // need only the evals
public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
//System.out.println("Update1 called");
if ( ! vc1.isBiallelic() && vc1.isIndel() ) { if ( ! vc1.isBiallelic() && vc1.isIndel() ) {
veWalker.getLogger().warn("[IndelLengthHistogram] Non-biallelic indel at "+ref.getLocus()+" ignored."); veWalker.getLogger().warn("[IndelLengthHistogram] Non-biallelic indel at "+ref.getLocus()+" ignored.");
return vc1.toString(); // biallelic sites are output return vc1.toString(); // biallelic sites are output
} }
if ( vc1.isIndel() ) { if ( vc1.isIndel() ) {
//System.out.println("Is indel");
if ( vc1.isInsertion() ) { if ( vc1.isInsertion() ) {
indelHistogram.update(vc1.getAlternateAllele(0).length()); indelHistogram.update(vc1.getAlternateAllele(0).length());
} else if ( vc1.isDeletion() ) { } else if ( vc1.isDeletion() ) {

View File

@ -35,7 +35,9 @@ import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrde
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors; import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
import org.broadinstitute.sting.gatk.walkers.Reference;
import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.Window;
import org.broadinstitute.sting.playground.utils.report.ReportMarshaller; import org.broadinstitute.sting.playground.utils.report.ReportMarshaller;
import org.broadinstitute.sting.playground.utils.report.VE2ReportFactory; import org.broadinstitute.sting.playground.utils.report.VE2ReportFactory;
import org.broadinstitute.sting.playground.utils.report.templates.ReportFormat; import org.broadinstitute.sting.playground.utils.report.templates.ReportFormat;
@ -103,6 +105,7 @@ import java.util.*;
/** /**
* Test routine for new VariantContext object * Test routine for new VariantContext object
*/ */
@Reference(window=@Window(start=-50,stop=50))
public class VariantEvalWalker extends RodWalker<Integer, Integer> { public class VariantEvalWalker extends RodWalker<Integer, Integer> {
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
// //
@ -152,10 +155,12 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
public double minQualScore = NO_MIN_QUAL_SCORE; public double minQualScore = NO_MIN_QUAL_SCORE;
@Argument(shortName = "Qcomp", fullName="minPhredConfidenceScoreForComp", doc="Minimum confidence score to consider a comp SNP a variant", required=false) @Argument(shortName = "Qcomp", fullName="minPhredConfidenceScoreForComp", doc="Minimum confidence score to consider a comp SNP a variant", required=false)
public double minCompQualScore = NO_MIN_QUAL_SCORE; public double minCompQualScore = NO_MIN_QUAL_SCORE;
@Argument(shortName = "dels", fullName="indelCalls", doc="evaluate indels rather than SNPs", required = false)
public boolean dels = false;
// Right now we will only be looking at SNPS // Right now we will only be looking at SNPS
// todo -- enable INDEL variant contexts, there's no reason not to but the integration tests need to be updated // todo -- enable INDEL variant contexts, there's no reason not to but the integration tests need to be updated
EnumSet<VariantContext.Type> ALLOW_VARIANT_CONTEXT_TYPES = EnumSet.of(VariantContext.Type.SNP, VariantContext.Type.NO_VARIATION); //, VariantContext.Type.INDEL); EnumSet<VariantContext.Type> ALLOW_VARIANT_CONTEXT_TYPES = EnumSet.of(VariantContext.Type.SNP, VariantContext.Type.NO_VARIATION);
@Argument(shortName="rsID", fullName="rsID", doc="If provided, list of rsID and build number for capping known snps by their build date", required=false) @Argument(shortName="rsID", fullName="rsID", doc="If provided, list of rsID and build number for capping known snps by their build date", required=false)
protected String rsIDFile = null; protected String rsIDFile = null;
@ -256,6 +261,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
public void initialize() { public void initialize() {
if ( dels ) {
ALLOW_VARIANT_CONTEXT_TYPES = EnumSet.of(VariantContext.Type.INDEL, VariantContext.Type.NO_VARIATION);
}
ReportFormat.AcceptableOutputType type = (outputLocation == null) ? ReportFormat.AcceptableOutputType.STREAM : ReportFormat.AcceptableOutputType.FILE; ReportFormat.AcceptableOutputType type = (outputLocation == null) ? ReportFormat.AcceptableOutputType.STREAM : ReportFormat.AcceptableOutputType.FILE;
if (!VE2ReportFactory.isCompatibleWithOutputType(type,reportType)) if (!VE2ReportFactory.isCompatibleWithOutputType(type,reportType))
throw new StingException("The report format requested is not compatible with your output location. You specified a " + type + " output type which isn't an option for " + reportType); throw new StingException("The report format requested is not compatible with your output location. You specified a " + type + " output type which isn't an option for " + reportType);
@ -349,7 +357,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
classMap.put(c.getSimpleName(), c); classMap.put(c.getSimpleName(), c);
if ( USE_NO_MODULES ) { if ( USE_NO_MODULES ) {
evaluationClasses = new ArrayList<Class<? extends VariantEvaluator>>(0); evaluationClasses = new ArrayList<Class<? extends VariantEvaluator>>(0);
} else if ( modulesToUse.length == 0 ) { } else if ( modulesToUse.length == 0 ) {
evaluationClasses = new ArrayList<Class<? extends VariantEvaluator>>(classMap.values()); evaluationClasses = new ArrayList<Class<? extends VariantEvaluator>>(classMap.values());
} else { } else {
@ -442,6 +450,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
//System.out.printf("map at %s with %d skipped%n", context.getLocation(), context.getSkippedBases()); //System.out.printf("map at %s with %d skipped%n", context.getLocation(), context.getSkippedBases());
Map<String, VariantContext> vcs = getVariantContexts(ref, tracker, context); Map<String, VariantContext> vcs = getVariantContexts(ref, tracker, context);
//System.out.println("vcs has size "+vcs.size());
//Collection<VariantContext> comps = getCompVariantContexts(tracker, context); //Collection<VariantContext> comps = getCompVariantContexts(tracker, context);
// to enable walking over pairs where eval or comps have no elements // to enable walking over pairs where eval or comps have no elements
@ -581,6 +590,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
// todo -- allow the variant evaluation to specify the type of variants it wants to see and only take the first such record at a site // todo -- allow the variant evaluation to specify the type of variants it wants to see and only take the first such record at a site
Map<String, VariantContext> bindings = new HashMap<String, VariantContext>(); Map<String, VariantContext> bindings = new HashMap<String, VariantContext>();
if ( tracker != null ) { if ( tracker != null ) {
//System.out.println("Tracker is not null");
bindVariantContexts(ref, bindings, evalNames, tracker, context, false); bindVariantContexts(ref, bindings, evalNames, tracker, context, false);
bindVariantContexts(ref, bindings, compNames, tracker, context, true); bindVariantContexts(ref, bindings, compNames, tracker, context, true);
} }

View File

@ -32,7 +32,7 @@ public class DesignFileGeneratorWalker extends RodWalker<Long,Long> {
private HashMap<GenomeLoc,IntervalInfoBuilder> intervalBuffer = new HashMap<GenomeLoc,IntervalInfoBuilder>(); private HashMap<GenomeLoc,IntervalInfoBuilder> intervalBuffer = new HashMap<GenomeLoc,IntervalInfoBuilder>();
private HashSet<rodRefSeq> refseqBuffer = new HashSet<rodRefSeq>(); private HashSet<rodRefSeq> refseqBuffer = new HashSet<rodRefSeq>();
private HashMap<String,BEDFeature> currentBedFeatures; private HashMap<String,BEDFeature> currentBedFeatures = new HashMap<String,BEDFeature>();
public Long map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public Long map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
// three items to look up: interval_list, refseq, gene* // three items to look up: interval_list, refseq, gene*
@ -220,18 +220,20 @@ class IntervalInfoBuilder {
buf.append("\t"); buf.append("\t");
} }
buf.append(geneNames.get(geneIndex)); buf.append(geneNames.get(geneIndex));
buf.append("["); if ( ! geneNames.get(geneIndex).startsWith("gene")) {
if ( exonsByGene.get(geneNames.get(geneIndex)).size() > 0 ) { buf.append("[");
for ( int exonIndex = 0; exonIndex < exonsByGene.get(geneNames.get(geneIndex)).size(); exonIndex++ ) { if ( exonsByGene.get(geneNames.get(geneIndex)).size() > 0 ) {
if ( exonIndex > 0 ) { for ( int exonIndex = 0; exonIndex < exonsByGene.get(geneNames.get(geneIndex)).size(); exonIndex++ ) {
buf.append(','); if ( exonIndex > 0 ) {
} buf.append(',');
buf.append(String.format("exon_%d",exonNumbersByGene.get(geneNames.get(geneIndex)).get(exonIndex))); }
} buf.append(String.format("exon_%d",exonNumbersByGene.get(geneNames.get(geneIndex)).get(exonIndex)));
} else { }
buf.append("Intron/UTR"); } else {
buf.append("Intron/UTR");
}
buf.append("]");
} }
buf.append("]");
} }
return buf.toString(); return buf.toString();