From 77243d0df1590dfebac9bfc88dd1cc14ffed201a Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 13 Mar 2012 16:31:51 -0400 Subject: [PATCH] Splitting up the MultiallelicSummary module into the standard part for use by all and the dev piece used just by me --- .../evaluators/MultiallelicAFs.java | 116 ++---------------- .../evaluators/MultiallelicSummary.java | 80 +----------- 2 files changed, 9 insertions(+), 187 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicAFs.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicAFs.java index 056b54945..7ed179c32 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicAFs.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicAFs.java @@ -40,57 +40,13 @@ import org.broadinstitute.sting.utils.variantcontext.*; import java.util.*; @Analysis(description = "Evaluation summary for multi-allelic variants") -public class MultiallelicSummary extends VariantEvaluator { // implements StandardEval { - final protected static Logger logger = Logger.getLogger(MultiallelicSummary.class); +public class MultiallelicAFs extends VariantEvaluator { + final protected static Logger logger = Logger.getLogger(MultiallelicAFs.class); public enum Type { SNP, INDEL } - // basic counts on various rates found - @DataPoint(description = "Number of processed loci", format = "%d") - public long nProcessedLoci = 0; - - @DataPoint(description = "Number of SNPs", format = "%d") - public int nSNPs = 0; - @DataPoint(description = "Number of multi-allelic SNPs", format = "%d") - public int nMultiSNPs = 0; - @DataPoint(description = "% processed sites that are multi-allelic SNPs", format = "%.5f") - public double processedMultiSnpRatio = 0; - @DataPoint(description = "% SNP sites that are multi-allelic", format = "%.3f") - public double variantMultiSnpRatio = 0; - - @DataPoint(description = "Number of Indels", format = "%d") - public int nIndels = 0; - @DataPoint(description = "Number of multi-allelic Indels", format = "%d") - public int nMultiIndels = 0; - @DataPoint(description = "% processed sites that are multi-allelic Indels", format = "%.5f") - public double processedMultiIndelRatio = 0; - @DataPoint(description = "% Indel sites that are multi-allelic", format = "%.3f") - public double variantMultiIndelRatio = 0; - - @DataPoint(description = "Number of Transitions", format = "%d") - public int nTi = 0; - @DataPoint(description = "Number of Transversions", format = "%d") - public int nTv = 0; - @DataPoint(description = "Overall TiTv ratio", format = "%.2f") - public double TiTvRatio = 0; - - @DataPoint(description = "Multi-allelic SNPs partially known", format = "%d") - public int knownSNPsPartial = 0; - @DataPoint(description = "Multi-allelic SNPs completely known", format = "%d") - public int knownSNPsComplete = 0; - @DataPoint(description = "Multi-allelic SNP Novelty Rate") - public String SNPNoveltyRate = "NA"; - - //TODO -- implement me - //@DataPoint(description = "Multi-allelic Indels partially known", format = "%d") - public int knownIndelsPartial = 0; - //@DataPoint(description = "Multi-allelic Indels completely known", format = "%d") - public int knownIndelsComplete = 0; - //@DataPoint(description = "Multi-allelic Indel Novelty Rate") - public String indelNoveltyRate = "NA"; - @DataPoint(description="Histogram of allele frequencies for most common SNP alternate allele") AFHistogram AFhistogramMaxSnp = new AFHistogram(); @@ -154,32 +110,22 @@ public class MultiallelicSummary extends VariantEvaluator { // implements Standa return 2; } - public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1); - } + public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {} public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if ( eval == null || eval.isMonomorphicInSamples() ) return null; + if ( !eval.isBiallelic() ) + return null; + // update counts switch ( eval.getType() ) { case SNP: - nSNPs++; - if ( !eval.isBiallelic() ) { - nMultiSNPs++; - calculatePairwiseTiTv(eval); - calculateSNPPairwiseNovelty(eval, comp); - updateAFhistogram(eval, AFhistogramMaxSnp, AFhistogramMinSnp); - } + updateAFhistogram(eval, AFhistogramMaxSnp, AFhistogramMinSnp); break; case INDEL: - nIndels++; - if ( !eval.isBiallelic() ) { - nMultiIndels++; - calculateIndelPairwiseNovelty(eval, comp); - updateAFhistogram(eval, AFhistogramMaxIndel, AFhistogramMinIndel); - } + updateAFhistogram(eval, AFhistogramMaxIndel, AFhistogramMinIndel); break; default: throw new UserException.BadInput("Unexpected variant context type: " + eval); @@ -188,34 +134,6 @@ public class MultiallelicSummary extends VariantEvaluator { // implements Standa return null; // we don't capture any interesting sites } - private void calculatePairwiseTiTv(VariantContext vc) { - for ( Allele alt : vc.getAlternateAlleles() ) { - if ( VariantContextUtils.isTransition(vc.getReference(), alt) ) - nTi++; - else - nTv++; - } - } - - private void calculateSNPPairwiseNovelty(VariantContext eval, VariantContext comp) { - if ( comp == null ) - return; - - int knownAlleles = 0; - for ( Allele alt : eval.getAlternateAlleles() ) { - if ( comp.getAlternateAlleles().contains(alt) ) - knownAlleles++; - } - - if ( knownAlleles == eval.getAlternateAlleles().size() ) - knownSNPsComplete++; - else if ( knownAlleles > 0 ) - knownSNPsPartial++; - } - - private void calculateIndelPairwiseNovelty(VariantContext eval, VariantContext comp) { - } - private void updateAFhistogram(VariantContext vc, AFHistogram max, AFHistogram min) { final Object obj = vc.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY, null); @@ -233,22 +151,4 @@ public class MultiallelicSummary extends VariantEvaluator { // implements Standa for ( int i = 0; i < AFs.size() - 1; i++ ) min.update(AFs.get(i)); } - - private final String noveltyRate(final int all, final int known) { - final int novel = all - known; - final double rate = (novel / (1.0 * all)); - return all == 0 ? "NA" : String.format("%.2f", rate); - } - - public void finalizeEvaluation() { - processedMultiSnpRatio = (double)nMultiSNPs / (double)nProcessedLoci; - variantMultiSnpRatio = (double)nMultiSNPs / (double)nSNPs; - processedMultiIndelRatio = (double)nMultiIndels / (double)nProcessedLoci; - variantMultiIndelRatio = (double)nMultiIndels / (double)nIndels; - - TiTvRatio = (double)nTi / (double)nTv; - - SNPNoveltyRate = noveltyRate(nMultiSNPs, knownSNPsPartial + knownSNPsComplete); - indelNoveltyRate = noveltyRate(nMultiSNPs, knownIndelsPartial + knownIndelsComplete); - } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java index 1a4aa1cc8..5cea0322f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java @@ -31,14 +31,9 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.*; -import java.util.*; - @Analysis(description = "Evaluation summary for multi-allelic variants") public class MultiallelicSummary extends VariantEvaluator implements StandardEval { final protected static Logger logger = Logger.getLogger(MultiallelicSummary.class); @@ -91,60 +86,6 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva //@DataPoint(description = "Multi-allelic Indel Novelty Rate") public String indelNoveltyRate = "NA"; - @DataPoint(description="Histogram of allele frequencies for most common SNP alternate allele") - AFHistogram AFhistogramMaxSnp = new AFHistogram(); - - @DataPoint(description="Histogram of allele frequencies for less common SNP alternate alleles") - AFHistogram AFhistogramMinSnp = new AFHistogram(); - - @DataPoint(description="Histogram of allele frequencies for most common Indel alternate allele") - AFHistogram AFhistogramMaxIndel = new AFHistogram(); - - @DataPoint(description="Histogram of allele frequencies for less common Indel alternate alleles") - AFHistogram AFhistogramMinIndel = new AFHistogram(); - - /* - * AF histogram table object - */ - static class AFHistogram implements TableType { - private Object[] rowKeys, colKeys = {"count"}; - private int[] AFhistogram; - - private static final double AFincrement = 0.01; - private static final int numBins = (int)(1.00 / AFincrement); - - public AFHistogram() { - rowKeys = initRowKeys(); - AFhistogram = new int[rowKeys.length]; - } - - public Object[] getColumnKeys() { - return colKeys; - } - - public Object[] getRowKeys() { - return rowKeys; - } - - public Object getCell(int row, int col) { - return AFhistogram[row]; - } - - private static Object[] initRowKeys() { - ArrayList keyList = new ArrayList(numBins + 1); - for ( double a = 0.00; a <= 1.01; a += AFincrement ) { - keyList.add(String.format("%.2f", a)); - } - return keyList.toArray(); - } - - public String getName() { return "AFHistTable"; } - - public void update(final double AF) { - final int bin = (int)(numBins * MathUtils.round(AF, 2)); - AFhistogram[bin]++; - } - } public void initialize(VariantEvalWalker walker) {} @@ -170,7 +111,6 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva nMultiSNPs++; calculatePairwiseTiTv(eval); calculateSNPPairwiseNovelty(eval, comp); - updateAFhistogram(eval, AFhistogramMaxSnp, AFhistogramMinSnp); } break; case INDEL: @@ -178,7 +118,6 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva if ( !eval.isBiallelic() ) { nMultiIndels++; calculateIndelPairwiseNovelty(eval, comp); - updateAFhistogram(eval, AFhistogramMaxIndel, AFhistogramMinIndel); } break; default: @@ -214,26 +153,9 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva } private void calculateIndelPairwiseNovelty(VariantContext eval, VariantContext comp) { + // TODO -- implement me } - private void updateAFhistogram(VariantContext vc, AFHistogram max, AFHistogram min) { - - final Object obj = vc.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY, null); - if ( obj == null || !(obj instanceof List) ) - return; - - List list = (List)obj; - ArrayList AFs = new ArrayList(list.size()); - for ( String str : list ) { - AFs.add(Double.valueOf(str)); - } - - Collections.sort(AFs); - max.update(AFs.get(AFs.size()-1)); - for ( int i = 0; i < AFs.size() - 1; i++ ) - min.update(AFs.get(i)); - } - private final String noveltyRate(final int all, final int known) { final int novel = all - known; final double rate = (novel / (1.0 * all));