Added new VE2 module to create a plot of titv ratio by variant quality score
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3125 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
3530ef5a41
commit
60c227d67f
|
|
@ -33,6 +33,7 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
@DataPoint(name="samples", description = "the concordance statistics for each sample")
|
||||
SampleStats sampleStats = null;
|
||||
|
||||
// two histograms of variant quality scores, for true positive and false positive calls
|
||||
@DataPoint(description = "the variant quality score histograms for true positive and false positive calls")
|
||||
QualityScoreHistograms qualityScoreHistograms = null;
|
||||
|
||||
|
|
@ -314,7 +315,9 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
}
|
||||
|
||||
public void finalizeEvaluation() {
|
||||
qualityScoreHistograms.organizeHistogramTables();
|
||||
if( qualityScoreHistograms!=null ) {
|
||||
qualityScoreHistograms.organizeHistogramTables();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,169 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers.varianteval2;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.playground.utils.report.tags.Analysis;
|
||||
import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
|
||||
import org.broadinstitute.sting.playground.utils.report.utils.TableType;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @author rpoplin
|
||||
* @since Apr 6, 2010
|
||||
*/
|
||||
|
||||
@Analysis(name = "Variant Quality Score", description = "Shows various stats of sets of variants binned by variant quality score")
|
||||
public class VariantQualityScore extends VariantEvaluator {
|
||||
|
||||
// a mapping from quality score histogram bin to Ti/Tv ratio
|
||||
@DataPoint(name="TiTv by Quality", description = "the Ti/Tv ratio broken out by variant quality")
|
||||
TiTvStats titvStats = null;
|
||||
|
||||
class TiTvStats implements TableType {
|
||||
final int NUM_BINS = 20;
|
||||
final ArrayList<Double> qualities = new ArrayList<Double>(); // An ArrayList holds all the qualities until we are able to bin them appropriately
|
||||
final ArrayList<Boolean> isTransition = new ArrayList<Boolean>();
|
||||
final long transitionByQuality[] = new long[NUM_BINS];
|
||||
final long transversionByQuality[] = new long[NUM_BINS];
|
||||
final double titvByQuality[] = new double[NUM_BINS]; // the final ti/tv sets that get reported out
|
||||
|
||||
public Object[] getRowKeys() {
|
||||
return new String[]{"sample"};
|
||||
}
|
||||
|
||||
public Object[] getColumnKeys() {
|
||||
final String columnKeys[] = new String[NUM_BINS];
|
||||
for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||
columnKeys[iii] = "titvBin" + iii;
|
||||
}
|
||||
return columnKeys;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return "TiTvStats";
|
||||
}
|
||||
|
||||
public String getCell(int x, int y) {
|
||||
return String.valueOf(titvByQuality[y]);
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
String returnString = "";
|
||||
// output the ti/tv array
|
||||
returnString += "titvByQuality: ";
|
||||
for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||
returnString += titvByQuality[iii] + " ";
|
||||
}
|
||||
return returnString;
|
||||
}
|
||||
|
||||
public void incrValue( final double qual, final boolean _isTransition ) {
|
||||
qualities.add(qual);
|
||||
isTransition.add(_isTransition);
|
||||
}
|
||||
|
||||
public void organizeTiTvTables() {
|
||||
for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||
transitionByQuality[iii] = 0L;
|
||||
transversionByQuality[iii] = 0L;
|
||||
titvByQuality[iii] = 0.0;
|
||||
}
|
||||
|
||||
double maxQual = 0.0;
|
||||
|
||||
// Calculate the maximum quality score in order to normalize and histogram
|
||||
for( final Double qual : qualities ) {
|
||||
if( qual > maxQual ) {
|
||||
maxQual = qual;
|
||||
}
|
||||
}
|
||||
|
||||
final double binSize = maxQual / ((double) (NUM_BINS-1));
|
||||
|
||||
int jjj = 0;
|
||||
for( final Double qual : qualities ) {
|
||||
final int index = (int)Math.floor( qual / binSize );
|
||||
if(isTransition.get(jjj)) {
|
||||
transitionByQuality[index]++;
|
||||
} else {
|
||||
transversionByQuality[index]++;
|
||||
}
|
||||
jjj++;
|
||||
}
|
||||
|
||||
for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||
if( transitionByQuality[iii] + transversionByQuality[iii] > 800L ) { // need to have a sufficient number of variants to get a useful Ti/Tv ratio
|
||||
titvByQuality[iii] = ((double) transitionByQuality[iii]) / ((double) transversionByQuality[iii]);
|
||||
} else {
|
||||
titvByQuality[iii] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public VariantQualityScore(VariantEval2Walker parent) {
|
||||
// don't do anything
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return "VariantQualityScore";
|
||||
}
|
||||
|
||||
public int getComparisonOrder() {
|
||||
return 1; // we only need to see each eval track
|
||||
}
|
||||
|
||||
public boolean enabled() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return getName();
|
||||
}
|
||||
|
||||
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
final String interesting = null;
|
||||
|
||||
if(eval != null) {
|
||||
if( titvStats == null ) { titvStats = new TiTvStats(); }
|
||||
titvStats.incrValue(eval.getPhredScaledQual(), eval.isTransition());
|
||||
}
|
||||
|
||||
return interesting; // This module doesn't capture any interesting sites, so return null
|
||||
}
|
||||
|
||||
public void finalizeEvaluation() {
|
||||
if( titvStats!=null ) {
|
||||
titvStats.organizeTiTvTables();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -18,8 +18,8 @@ public class VariantEval2IntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testVE2Simple() {
|
||||
HashMap<String, String> expectations = new HashMap<String, String>();
|
||||
expectations.put("-L 1:1-10,000,000", "cb65879db9d00132484fbe7e7c0460b9");
|
||||
expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "7126f47b9d87dc4e1e0f8dec5bab49c9");
|
||||
expectations.put("-L 1:1-10,000,000", "c304b257004f9d11cbcd89725eb89319");
|
||||
expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "59cce874ed647fce22789b82779c4f13");
|
||||
|
||||
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
|
||||
String extraArgs = entry.getKey();
|
||||
|
|
@ -39,10 +39,10 @@ public class VariantEval2IntegrationTest extends WalkerTest {
|
|||
" -B dbsnp_130,dbSNP," + GATKDataLocation + "dbsnp_130_b36.rod" +
|
||||
" -B comp_hapmap,VCF," + validationDataLocation + "CEU_hapmap_nogt_23.vcf";
|
||||
|
||||
String eqMD5s = "a3eeb0ba5bde0c57a290c917a87ec900"; // next two examples should be the same!
|
||||
String eqMD5s = "cf20ea2b9c6b173311b735f6d4ae72e6"; // next two examples should be the same!
|
||||
expectations.put("", eqMD5s);
|
||||
expectations.put(" -known comp_hapmap -known dbsnp", eqMD5s);
|
||||
expectations.put(" -known comp_hapmap", "575faa090abf1bfdc8394dca46f9b7a2");
|
||||
expectations.put(" -known comp_hapmap", "f43630aa98764aecb867b175014879e1");
|
||||
|
||||
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
|
||||
String extraArgs2 = entry.getKey();
|
||||
|
|
@ -60,7 +60,7 @@ public class VariantEval2IntegrationTest extends WalkerTest {
|
|||
String extraArgs = "-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 30";
|
||||
WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s -outputVCF %s",
|
||||
2,
|
||||
Arrays.asList("2801558409c4ce3a5484b8e1b3a49e0d", "a3ce1d70d8ae3874807e9d61994d42af"));
|
||||
Arrays.asList("abc2a36c0375386f731461da5e7e2104", "a3ce1d70d8ae3874807e9d61994d42af"));
|
||||
executeTest("testVE2WriteVCF", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue