Added new VE2 module to create a plot of titv ratio by variant quality score

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3125 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2010-04-06 15:19:27 +00:00
parent 3530ef5a41
commit 60c227d67f
3 changed files with 178 additions and 6 deletions

View File

@ -33,6 +33,7 @@ public class GenotypeConcordance extends VariantEvaluator {
@DataPoint(name="samples", description = "the concordance statistics for each sample")
SampleStats sampleStats = null;
// two histograms of variant quality scores, for true positive and false positive calls
@DataPoint(description = "the variant quality score histograms for true positive and false positive calls")
QualityScoreHistograms qualityScoreHistograms = null;
@ -314,7 +315,9 @@ public class GenotypeConcordance extends VariantEvaluator {
}
public void finalizeEvaluation() {
qualityScoreHistograms.organizeHistogramTables();
if( qualityScoreHistograms!=null ) {
qualityScoreHistograms.organizeHistogramTables();
}
}
}

View File

@ -0,0 +1,169 @@
package org.broadinstitute.sting.oneoffprojects.walkers.varianteval2;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.playground.utils.report.tags.Analysis;
import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
import org.broadinstitute.sting.playground.utils.report.utils.TableType;
import java.util.ArrayList;
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* @author rpoplin
* @since Apr 6, 2010
*/
@Analysis(name = "Variant Quality Score", description = "Shows various stats of sets of variants binned by variant quality score")
public class VariantQualityScore extends VariantEvaluator {
// a mapping from quality score histogram bin to Ti/Tv ratio
@DataPoint(name="TiTv by Quality", description = "the Ti/Tv ratio broken out by variant quality")
TiTvStats titvStats = null;
class TiTvStats implements TableType {
final int NUM_BINS = 20;
final ArrayList<Double> qualities = new ArrayList<Double>(); // An ArrayList holds all the qualities until we are able to bin them appropriately
final ArrayList<Boolean> isTransition = new ArrayList<Boolean>();
final long transitionByQuality[] = new long[NUM_BINS];
final long transversionByQuality[] = new long[NUM_BINS];
final double titvByQuality[] = new double[NUM_BINS]; // the final ti/tv sets that get reported out
public Object[] getRowKeys() {
return new String[]{"sample"};
}
public Object[] getColumnKeys() {
final String columnKeys[] = new String[NUM_BINS];
for( int iii = 0; iii < NUM_BINS; iii++ ) {
columnKeys[iii] = "titvBin" + iii;
}
return columnKeys;
}
public String getName() {
return "TiTvStats";
}
public String getCell(int x, int y) {
return String.valueOf(titvByQuality[y]);
}
public String toString() {
String returnString = "";
// output the ti/tv array
returnString += "titvByQuality: ";
for( int iii = 0; iii < NUM_BINS; iii++ ) {
returnString += titvByQuality[iii] + " ";
}
return returnString;
}
public void incrValue( final double qual, final boolean _isTransition ) {
qualities.add(qual);
isTransition.add(_isTransition);
}
public void organizeTiTvTables() {
for( int iii = 0; iii < NUM_BINS; iii++ ) {
transitionByQuality[iii] = 0L;
transversionByQuality[iii] = 0L;
titvByQuality[iii] = 0.0;
}
double maxQual = 0.0;
// Calculate the maximum quality score in order to normalize and histogram
for( final Double qual : qualities ) {
if( qual > maxQual ) {
maxQual = qual;
}
}
final double binSize = maxQual / ((double) (NUM_BINS-1));
int jjj = 0;
for( final Double qual : qualities ) {
final int index = (int)Math.floor( qual / binSize );
if(isTransition.get(jjj)) {
transitionByQuality[index]++;
} else {
transversionByQuality[index]++;
}
jjj++;
}
for( int iii = 0; iii < NUM_BINS; iii++ ) {
if( transitionByQuality[iii] + transversionByQuality[iii] > 800L ) { // need to have a sufficient number of variants to get a useful Ti/Tv ratio
titvByQuality[iii] = ((double) transitionByQuality[iii]) / ((double) transversionByQuality[iii]);
} else {
titvByQuality[iii] = 0.0;
}
}
}
}
public VariantQualityScore(VariantEval2Walker parent) {
// don't do anything
}
public String getName() {
return "VariantQualityScore";
}
public int getComparisonOrder() {
return 1; // we only need to see each eval track
}
public boolean enabled() {
return true;
}
public String toString() {
return getName();
}
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
final String interesting = null;
if(eval != null) {
if( titvStats == null ) { titvStats = new TiTvStats(); }
titvStats.incrValue(eval.getPhredScaledQual(), eval.isTransition());
}
return interesting; // This module doesn't capture any interesting sites, so return null
}
public void finalizeEvaluation() {
if( titvStats!=null ) {
titvStats.organizeTiTvTables();
}
}
}

View File

@ -18,8 +18,8 @@ public class VariantEval2IntegrationTest extends WalkerTest {
@Test
public void testVE2Simple() {
HashMap<String, String> expectations = new HashMap<String, String>();
expectations.put("-L 1:1-10,000,000", "cb65879db9d00132484fbe7e7c0460b9");
expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "7126f47b9d87dc4e1e0f8dec5bab49c9");
expectations.put("-L 1:1-10,000,000", "c304b257004f9d11cbcd89725eb89319");
expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "59cce874ed647fce22789b82779c4f13");
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
String extraArgs = entry.getKey();
@ -39,10 +39,10 @@ public class VariantEval2IntegrationTest extends WalkerTest {
" -B dbsnp_130,dbSNP," + GATKDataLocation + "dbsnp_130_b36.rod" +
" -B comp_hapmap,VCF," + validationDataLocation + "CEU_hapmap_nogt_23.vcf";
String eqMD5s = "a3eeb0ba5bde0c57a290c917a87ec900"; // next two examples should be the same!
String eqMD5s = "cf20ea2b9c6b173311b735f6d4ae72e6"; // next two examples should be the same!
expectations.put("", eqMD5s);
expectations.put(" -known comp_hapmap -known dbsnp", eqMD5s);
expectations.put(" -known comp_hapmap", "575faa090abf1bfdc8394dca46f9b7a2");
expectations.put(" -known comp_hapmap", "f43630aa98764aecb867b175014879e1");
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
String extraArgs2 = entry.getKey();
@ -60,7 +60,7 @@ public class VariantEval2IntegrationTest extends WalkerTest {
String extraArgs = "-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 30";
WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s -outputVCF %s",
2,
Arrays.asList("2801558409c4ce3a5484b8e1b3a49e0d", "a3ce1d70d8ae3874807e9d61994d42af"));
Arrays.asList("abc2a36c0375386f731461da5e7e2104", "a3ce1d70d8ae3874807e9d61994d42af"));
executeTest("testVE2WriteVCF", spec);
}
}