From 60c227d67ffbb8bd17ee073240fc464a5caf30ef Mon Sep 17 00:00:00 2001 From: rpoplin Date: Tue, 6 Apr 2010 15:19:27 +0000 Subject: [PATCH] Added new VE2 module to create a plot of titv ratio by variant quality score git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3125 348d0f76-0448-11de-a6fe-93d51630548a --- .../varianteval2/GenotypeConcordance.java | 5 +- .../varianteval2/VariantQualityScore.java | 169 ++++++++++++++++++ .../VariantEval2IntegrationTest.java | 10 +- 3 files changed, 178 insertions(+), 6 deletions(-) create mode 100755 java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantQualityScore.java diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/GenotypeConcordance.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/GenotypeConcordance.java index 13e859045..d074f07a8 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/GenotypeConcordance.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/GenotypeConcordance.java @@ -33,6 +33,7 @@ public class GenotypeConcordance extends VariantEvaluator { @DataPoint(name="samples", description = "the concordance statistics for each sample") SampleStats sampleStats = null; + // two histograms of variant quality scores, for true positive and false positive calls @DataPoint(description = "the variant quality score histograms for true positive and false positive calls") QualityScoreHistograms qualityScoreHistograms = null; @@ -314,7 +315,9 @@ public class GenotypeConcordance extends VariantEvaluator { } public void finalizeEvaluation() { - qualityScoreHistograms.organizeHistogramTables(); + if( qualityScoreHistograms!=null ) { + qualityScoreHistograms.organizeHistogramTables(); + } } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantQualityScore.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantQualityScore.java new file mode 100755 index 000000000..a89bae98b --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantQualityScore.java @@ -0,0 +1,169 @@ +package org.broadinstitute.sting.oneoffprojects.walkers.varianteval2; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.playground.utils.report.tags.Analysis; +import org.broadinstitute.sting.playground.utils.report.tags.DataPoint; +import org.broadinstitute.sting.playground.utils.report.utils.TableType; + +import java.util.ArrayList; + +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * @author rpoplin + * @since Apr 6, 2010 + */ + +@Analysis(name = "Variant Quality Score", description = "Shows various stats of sets of variants binned by variant quality score") +public class VariantQualityScore extends VariantEvaluator { + + // a mapping from quality score histogram bin to Ti/Tv ratio + @DataPoint(name="TiTv by Quality", description = "the Ti/Tv ratio broken out by variant quality") + TiTvStats titvStats = null; + + class TiTvStats implements TableType { + final int NUM_BINS = 20; + final ArrayList qualities = new ArrayList(); // An ArrayList holds all the qualities until we are able to bin them appropriately + final ArrayList isTransition = new ArrayList(); + final long transitionByQuality[] = new long[NUM_BINS]; + final long transversionByQuality[] = new long[NUM_BINS]; + final double titvByQuality[] = new double[NUM_BINS]; // the final ti/tv sets that get reported out + + public Object[] getRowKeys() { + return new String[]{"sample"}; + } + + public Object[] getColumnKeys() { + final String columnKeys[] = new String[NUM_BINS]; + for( int iii = 0; iii < NUM_BINS; iii++ ) { + columnKeys[iii] = "titvBin" + iii; + } + return columnKeys; + } + + public String getName() { + return "TiTvStats"; + } + + public String getCell(int x, int y) { + return String.valueOf(titvByQuality[y]); + } + + public String toString() { + String returnString = ""; + // output the ti/tv array + returnString += "titvByQuality: "; + for( int iii = 0; iii < NUM_BINS; iii++ ) { + returnString += titvByQuality[iii] + " "; + } + return returnString; + } + + public void incrValue( final double qual, final boolean _isTransition ) { + qualities.add(qual); + isTransition.add(_isTransition); + } + + public void organizeTiTvTables() { + for( int iii = 0; iii < NUM_BINS; iii++ ) { + transitionByQuality[iii] = 0L; + transversionByQuality[iii] = 0L; + titvByQuality[iii] = 0.0; + } + + double maxQual = 0.0; + + // Calculate the maximum quality score in order to normalize and histogram + for( final Double qual : qualities ) { + if( qual > maxQual ) { + maxQual = qual; + } + } + + final double binSize = maxQual / ((double) (NUM_BINS-1)); + + int jjj = 0; + for( final Double qual : qualities ) { + final int index = (int)Math.floor( qual / binSize ); + if(isTransition.get(jjj)) { + transitionByQuality[index]++; + } else { + transversionByQuality[index]++; + } + jjj++; + } + + for( int iii = 0; iii < NUM_BINS; iii++ ) { + if( transitionByQuality[iii] + transversionByQuality[iii] > 800L ) { // need to have a sufficient number of variants to get a useful Ti/Tv ratio + titvByQuality[iii] = ((double) transitionByQuality[iii]) / ((double) transversionByQuality[iii]); + } else { + titvByQuality[iii] = 0.0; + } + } + + } + } + + public VariantQualityScore(VariantEval2Walker parent) { + // don't do anything + } + + public String getName() { + return "VariantQualityScore"; + } + + public int getComparisonOrder() { + return 1; // we only need to see each eval track + } + + public boolean enabled() { + return true; + } + + public String toString() { + return getName(); + } + + public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + final String interesting = null; + + if(eval != null) { + if( titvStats == null ) { titvStats = new TiTvStats(); } + titvStats.incrValue(eval.getPhredScaledQual(), eval.isTransition()); + } + + return interesting; // This module doesn't capture any interesting sites, so return null + } + + public void finalizeEvaluation() { + if( titvStats!=null ) { + titvStats.organizeTiTvTables(); + } + } +} \ No newline at end of file diff --git a/java/test/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2IntegrationTest.java b/java/test/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2IntegrationTest.java index f5957cfb6..a62160a58 100755 --- a/java/test/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2IntegrationTest.java +++ b/java/test/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2IntegrationTest.java @@ -18,8 +18,8 @@ public class VariantEval2IntegrationTest extends WalkerTest { @Test public void testVE2Simple() { HashMap expectations = new HashMap(); - expectations.put("-L 1:1-10,000,000", "cb65879db9d00132484fbe7e7c0460b9"); - expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "7126f47b9d87dc4e1e0f8dec5bab49c9"); + expectations.put("-L 1:1-10,000,000", "c304b257004f9d11cbcd89725eb89319"); + expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "59cce874ed647fce22789b82779c4f13"); for ( Map.Entry entry : expectations.entrySet() ) { String extraArgs = entry.getKey(); @@ -39,10 +39,10 @@ public class VariantEval2IntegrationTest extends WalkerTest { " -B dbsnp_130,dbSNP," + GATKDataLocation + "dbsnp_130_b36.rod" + " -B comp_hapmap,VCF," + validationDataLocation + "CEU_hapmap_nogt_23.vcf"; - String eqMD5s = "a3eeb0ba5bde0c57a290c917a87ec900"; // next two examples should be the same! + String eqMD5s = "cf20ea2b9c6b173311b735f6d4ae72e6"; // next two examples should be the same! expectations.put("", eqMD5s); expectations.put(" -known comp_hapmap -known dbsnp", eqMD5s); - expectations.put(" -known comp_hapmap", "575faa090abf1bfdc8394dca46f9b7a2"); + expectations.put(" -known comp_hapmap", "f43630aa98764aecb867b175014879e1"); for ( Map.Entry entry : expectations.entrySet() ) { String extraArgs2 = entry.getKey(); @@ -60,7 +60,7 @@ public class VariantEval2IntegrationTest extends WalkerTest { String extraArgs = "-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 30"; WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s -outputVCF %s", 2, - Arrays.asList("2801558409c4ce3a5484b8e1b3a49e0d", "a3ce1d70d8ae3874807e9d61994d42af")); + Arrays.asList("abc2a36c0375386f731461da5e7e2104", "a3ce1d70d8ae3874807e9d61994d42af")); executeTest("testVE2WriteVCF", spec); } }