Fixed long-standing bug in GenotypeConcordance module of VariantEval which caused incorrect numbers to be displayed in the concordance table. The format of the concordance table has changed. Added a concordance summary table which gives overall genotype concordance summary stats by sample. None of the VE integration tests contained genotype information so I added a comp track with genotypes to one of the tests.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3247 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
e0b51d0df0
commit
e7c0ded40e
|
|
@ -11,15 +11,31 @@ import org.apache.log4j.Logger;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/*
|
||||||
* The Broad Institute
|
* Copyright (c) 2010 The Broad Institute
|
||||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
*
|
||||||
* This software and its documentation are copyright 2009 by the
|
* Permission is hereby granted, free of charge, to any person
|
||||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
* obtaining a copy of this software and associated documentation
|
||||||
* <p/>
|
* files (the "Software"), to deal in the Software without
|
||||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
* restriction, including without limitation the rights to use,
|
||||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@Analysis(name = "Genotype Concordance", description = "Determine the genotype concordance between the genotypes in difference tracks")
|
@Analysis(name = "Genotype Concordance", description = "Determine the genotype concordance between the genotypes in difference tracks")
|
||||||
public class GenotypeConcordance extends VariantEvaluator {
|
public class GenotypeConcordance extends VariantEvaluator {
|
||||||
protected final static Logger logger = Logger.getLogger(GenotypeConcordance.class);
|
protected final static Logger logger = Logger.getLogger(GenotypeConcordance.class);
|
||||||
|
|
@ -32,6 +48,11 @@ public class GenotypeConcordance extends VariantEvaluator {
|
||||||
@DataPoint(name="samples", description = "the concordance statistics for each sample")
|
@DataPoint(name="samples", description = "the concordance statistics for each sample")
|
||||||
SampleStats sampleStats = null;
|
SampleStats sampleStats = null;
|
||||||
|
|
||||||
|
// a mapping from sample to stats summary
|
||||||
|
@DataPoint(name="summary", description = "the concordance statistics summary for each sample")
|
||||||
|
SampleSummaryStats sampleSummaryStats = null;
|
||||||
|
|
||||||
|
|
||||||
// two histograms of variant quality scores, for true positive and false positive calls
|
// two histograms of variant quality scores, for true positive and false positive calls
|
||||||
@DataPoint(description = "the variant quality score histograms for true positive and false positive calls")
|
@DataPoint(description = "the variant quality score histograms for true positive and false positive calls")
|
||||||
QualityScoreHistograms qualityScoreHistograms = null;
|
QualityScoreHistograms qualityScoreHistograms = null;
|
||||||
|
|
@ -250,6 +271,7 @@ public class GenotypeConcordance extends VariantEvaluator {
|
||||||
if (eval != null) {
|
if (eval != null) {
|
||||||
// initialize the concordance table
|
// initialize the concordance table
|
||||||
sampleStats = new SampleStats(eval,Genotype.Type.values().length);
|
sampleStats = new SampleStats(eval,Genotype.Type.values().length);
|
||||||
|
sampleSummaryStats = new SampleSummaryStats(eval);
|
||||||
for (final VariantContext vc : missedValidationData) {
|
for (final VariantContext vc : missedValidationData) {
|
||||||
determineStats(null, vc);
|
determineStats(null, vc);
|
||||||
}
|
}
|
||||||
|
|
@ -341,6 +363,10 @@ public class GenotypeConcordance extends VariantEvaluator {
|
||||||
if( qualityScoreHistograms != null ) {
|
if( qualityScoreHistograms != null ) {
|
||||||
qualityScoreHistograms.organizeHistogramTables();
|
qualityScoreHistograms.organizeHistogramTables();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if( sampleSummaryStats != null && sampleStats != null ) {
|
||||||
|
sampleSummaryStats.generateSampleSummaryStats( sampleStats );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -351,7 +377,7 @@ class SampleStats implements TableType {
|
||||||
private final int nGenotypeTypes;
|
private final int nGenotypeTypes;
|
||||||
|
|
||||||
// sample to concordance stats object
|
// sample to concordance stats object
|
||||||
private HashMap<String, long[][]> concordanceStats = new HashMap<String, long[][]>();
|
public final HashMap<String, long[][]> concordanceStats = new HashMap<String, long[][]>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
|
|
@ -379,12 +405,12 @@ class SampleStats implements TableType {
|
||||||
* @return a list of objects, in this case strings, that are the column names
|
* @return a list of objects, in this case strings, that are the column names
|
||||||
*/
|
*/
|
||||||
public Object[] getColumnKeys() {
|
public Object[] getColumnKeys() {
|
||||||
return new String[]{"total_true_ref","n_ref/ref","%_ref/ref",
|
return new String[]{"total_true_ref","%_ref/ref","n_ref/no-call",
|
||||||
"n_ref/no-call","n_ref/het","n_ref/hom",
|
"n_ref/ref","n_ref/het","n_ref/hom",
|
||||||
"total_true_het","n_het/het","%_het/het",
|
"total_true_het","%_het/het","n_het/no-call",
|
||||||
"n_het/no-call","n_het/ref","n_het/hom",
|
"n_het/ref","n_het/het","n_het/hom",
|
||||||
"total_true_hom","n_hom/hom","%_hom/hom",
|
"total_true_hom","%_hom/hom","n_hom/no-call",
|
||||||
"n_hom/no-call","n_hom/ref","n_hom/het"};
|
"n_hom/ref","n_hom/het","n_hom/hom"};
|
||||||
}
|
}
|
||||||
|
|
||||||
public SampleStats(VariantContext vc, int nGenotypeTypes) {
|
public SampleStats(VariantContext vc, int nGenotypeTypes) {
|
||||||
|
|
@ -399,19 +425,18 @@ class SampleStats implements TableType {
|
||||||
// save some repeat work, get the total every time
|
// save some repeat work, get the total every time
|
||||||
long total = 0;
|
long total = 0;
|
||||||
Object[] rowKeys = getRowKeys();
|
Object[] rowKeys = getRowKeys();
|
||||||
for (int called = 0; called < nGenotypeTypes; called++)
|
for (int called = 0; called < nGenotypeTypes; called++) {
|
||||||
total += concordanceStats.get(rowKeys[x])[type.ordinal()][called];
|
total += concordanceStats.get(rowKeys[x])[type.ordinal()][called];
|
||||||
|
}
|
||||||
|
|
||||||
// now get the cell they're interested in
|
// now get the cell they're interested in
|
||||||
switch (y % 6) {
|
switch (y % 6) {
|
||||||
case (0): // get the total_true for this type
|
case (0): // get the total_true for this type
|
||||||
return total;
|
return total;
|
||||||
case (1):
|
case (1):
|
||||||
return concordanceStats.get(rowKeys[x])[type.ordinal()][type.ordinal()];
|
|
||||||
case (2):
|
|
||||||
return total == 0 ? 0.0 : (100.0 * (double) concordanceStats.get(rowKeys[x])[type.ordinal()][type.ordinal()] / (double) total);
|
return total == 0 ? 0.0 : (100.0 * (double) concordanceStats.get(rowKeys[x])[type.ordinal()][type.ordinal()] / (double) total);
|
||||||
default:
|
default:
|
||||||
return concordanceStats.get(rowKeys[x])[type.ordinal()][(y % 6) - 3];
|
return concordanceStats.get(rowKeys[x])[type.ordinal()][(y % 6) - 2];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -419,3 +444,110 @@ class SampleStats implements TableType {
|
||||||
return "Sample Statistics";
|
return "Sample Statistics";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* a table of sample names to genotype concordance summary statistics
|
||||||
|
*/
|
||||||
|
class SampleSummaryStats implements TableType {
|
||||||
|
|
||||||
|
// sample to concordance stats object
|
||||||
|
private final HashMap<String, double[]> concordanceSummary = new HashMap<String, double[]>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @return one row per sample
|
||||||
|
*/
|
||||||
|
public Object[] getRowKeys() {
|
||||||
|
return concordanceSummary.keySet().toArray(new String[concordanceSummary.size()]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the column keys
|
||||||
|
* @return a list of objects, in this case strings, that are the column names
|
||||||
|
*/
|
||||||
|
public Object[] getColumnKeys() {
|
||||||
|
return new String[]{"%het_called_het","%hom_called_hom","%nonref_called_nonref","calledGenotypeConcordance","calledNonRefConcordance"};
|
||||||
|
}
|
||||||
|
|
||||||
|
public SampleSummaryStats(final VariantContext vc) {
|
||||||
|
for( final String sample : vc.getSampleNames() ) {
|
||||||
|
concordanceSummary.put(sample, new double[5]); // There are five summary values to report out
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object getCell(int x, int y) {
|
||||||
|
final Object[] rowKeys = getRowKeys();
|
||||||
|
return String.format("%.3f",concordanceSummary.get(rowKeys[x])[y]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate the five summary stats per sample
|
||||||
|
* @param sampleStats The Map which holds concordance values per sample
|
||||||
|
*/
|
||||||
|
public void generateSampleSummaryStats( final SampleStats sampleStats ) {
|
||||||
|
for( final String sample : concordanceSummary.keySet() ) {
|
||||||
|
final long[][] stats = sampleStats.concordanceStats.get(sample);
|
||||||
|
final double[] summary = concordanceSummary.get(sample);
|
||||||
|
if( stats == null ) { throw new StingException( "SampleStats and SampleSummaryStats contain different samples! sample = " + sample ); }
|
||||||
|
|
||||||
|
long numer, denom;
|
||||||
|
|
||||||
|
// Summary 0: % het called as het
|
||||||
|
numer = stats[Genotype.Type.HET.ordinal()][Genotype.Type.HET.ordinal()];
|
||||||
|
denom = 0L;
|
||||||
|
for(final Genotype.Type type : Genotype.Type.values()) {
|
||||||
|
denom += stats[Genotype.Type.HET.ordinal()][type.ordinal()];
|
||||||
|
}
|
||||||
|
summary[0] = ( denom != 0L ? 100.0 * ( ((double)numer) / ((double)denom) ) : 0.0 );
|
||||||
|
|
||||||
|
// Summary 1: % homVar called as homVar
|
||||||
|
numer = stats[Genotype.Type.HOM_VAR.ordinal()][Genotype.Type.HOM_VAR.ordinal()];
|
||||||
|
denom = 0L;
|
||||||
|
for(final Genotype.Type type : Genotype.Type.values()) {
|
||||||
|
denom += stats[Genotype.Type.HOM_VAR.ordinal()][type.ordinal()];
|
||||||
|
}
|
||||||
|
summary[1] = ( denom != 0L ? 100.0 * ( ((double)numer) / ((double)denom) ) : 0.0 );
|
||||||
|
|
||||||
|
// Summary 2: % non-ref called as non-ref
|
||||||
|
numer = 0L;
|
||||||
|
numer += stats[Genotype.Type.HOM_VAR.ordinal()][Genotype.Type.HOM_VAR.ordinal()];
|
||||||
|
numer += stats[Genotype.Type.HOM_VAR.ordinal()][Genotype.Type.HET.ordinal()];
|
||||||
|
numer += stats[Genotype.Type.HET.ordinal()][Genotype.Type.HOM_VAR.ordinal()];
|
||||||
|
numer += stats[Genotype.Type.HET.ordinal()][Genotype.Type.HET.ordinal()];
|
||||||
|
denom = 0L;
|
||||||
|
for(final Genotype.Type type : Genotype.Type.values()) {
|
||||||
|
denom += stats[Genotype.Type.HOM_VAR.ordinal()][type.ordinal()];
|
||||||
|
denom += stats[Genotype.Type.HET.ordinal()][type.ordinal()];
|
||||||
|
}
|
||||||
|
summary[2] = ( denom != 0L ? 100.0 * ( ((double)numer) / ((double)denom) ) : 0.0 );
|
||||||
|
|
||||||
|
// Summary 3: genotype concordance of sites called in eval track
|
||||||
|
numer = 0L;
|
||||||
|
numer += stats[Genotype.Type.HOM_VAR.ordinal()][Genotype.Type.HOM_VAR.ordinal()];
|
||||||
|
numer += stats[Genotype.Type.HET.ordinal()][Genotype.Type.HET.ordinal()];
|
||||||
|
numer += stats[Genotype.Type.HOM_REF.ordinal()][Genotype.Type.HOM_REF.ordinal()];
|
||||||
|
denom = 0L;
|
||||||
|
for(final Genotype.Type type : new Genotype.Type[]{Genotype.Type.HOM_VAR, Genotype.Type.HET, Genotype.Type.HOM_REF} ) { // excluding no calls here
|
||||||
|
denom += stats[Genotype.Type.HOM_VAR.ordinal()][type.ordinal()];
|
||||||
|
denom += stats[Genotype.Type.HET.ordinal()][type.ordinal()];
|
||||||
|
denom += stats[Genotype.Type.HOM_REF.ordinal()][type.ordinal()];
|
||||||
|
}
|
||||||
|
summary[3] = ( denom != 0L ? 100.0 * ( ((double)numer) / ((double)denom) ) : 0.0 );
|
||||||
|
|
||||||
|
// Summary 4: genotype concordance of sites called non-ref in eval track
|
||||||
|
numer = 0L;
|
||||||
|
numer += stats[Genotype.Type.HOM_VAR.ordinal()][Genotype.Type.HOM_VAR.ordinal()];
|
||||||
|
numer += stats[Genotype.Type.HET.ordinal()][Genotype.Type.HET.ordinal()];
|
||||||
|
denom = 0L;
|
||||||
|
for(final Genotype.Type type : new Genotype.Type[]{Genotype.Type.HOM_VAR, Genotype.Type.HET, Genotype.Type.HOM_REF} ) { // excluding no calls here
|
||||||
|
denom += stats[Genotype.Type.HOM_VAR.ordinal()][type.ordinal()];
|
||||||
|
denom += stats[Genotype.Type.HET.ordinal()][type.ordinal()];
|
||||||
|
}
|
||||||
|
summary[4] = ( denom != 0L ? 100.0 * ( ((double)numer) / ((double)denom) ) : 0.0 );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "Sample Summary Statistics";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -13,13 +13,14 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
private static String root = cmdRoot +
|
private static String root = cmdRoot +
|
||||||
" -D " + GATKDataLocation + "dbsnp_129_b36.rod" +
|
" -D " + GATKDataLocation + "dbsnp_129_b36.rod" +
|
||||||
" -B eval,VCF," + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf";
|
" -B eval,VCF," + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" +
|
||||||
|
" -B comp_genotypes,VCF," + validationDataLocation + "yri.trio.gatk.ug.head.vcf";
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testVESimple() {
|
public void testVESimple() {
|
||||||
HashMap<String, String> expectations = new HashMap<String, String>();
|
HashMap<String, String> expectations = new HashMap<String, String>();
|
||||||
expectations.put("-L 1:1-10,000,000", "bb96e002225df21a84ba7c72613cc67a");
|
expectations.put("-L 1:1-10,000,000", "b28ffcff420177d9b73aa726e260fb34");
|
||||||
expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "7427803b4121665e130c76191f862231");
|
expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "749d53687497d7939713e9ce586642a4");
|
||||||
|
|
||||||
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
|
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
|
||||||
String extraArgs = entry.getKey();
|
String extraArgs = entry.getKey();
|
||||||
|
|
@ -39,10 +40,11 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
||||||
" -B dbsnp_130,dbSNP," + GATKDataLocation + "dbsnp_130_b36.rod" +
|
" -B dbsnp_130,dbSNP," + GATKDataLocation + "dbsnp_130_b36.rod" +
|
||||||
" -B comp_hapmap,VCF," + validationDataLocation + "CEU_hapmap_nogt_23.vcf";
|
" -B comp_hapmap,VCF," + validationDataLocation + "CEU_hapmap_nogt_23.vcf";
|
||||||
|
|
||||||
String matchingMD5 = "920940d4006d5d140183f3ba35cafe00";
|
|
||||||
|
String matchingMD5 = "3abf37ada16619f4b3f39cb7ecad497f";
|
||||||
expectations.put("", matchingMD5);
|
expectations.put("", matchingMD5);
|
||||||
expectations.put(" -known comp_hapmap -known dbsnp", matchingMD5);
|
expectations.put(" -known comp_hapmap -known dbsnp", matchingMD5);
|
||||||
expectations.put(" -known comp_hapmap", "ae9ab5a1556e773b3b1ba80453b14eda");
|
expectations.put(" -known comp_hapmap", "75026dbdad38f74ac697a7c102e64db4");
|
||||||
|
|
||||||
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
|
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
|
||||||
String extraArgs2 = entry.getKey();
|
String extraArgs2 = entry.getKey();
|
||||||
|
|
@ -60,7 +62,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
||||||
String extraArgs = "-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 30";
|
String extraArgs = "-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 30";
|
||||||
WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s -outputVCF %s",
|
WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s -outputVCF %s",
|
||||||
2,
|
2,
|
||||||
Arrays.asList("40fca380c2e768cfff5febefc4a73aa0", "a3ce1d70d8ae3874807e9d61994d42af"));
|
Arrays.asList("06cdbe7f94990dfe61b5e3ec03b49151", "b4a42c90318adc88361691ece50426f2"));
|
||||||
executeTest("testVEWriteVCF", spec);
|
executeTest("testVEWriteVCF", spec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue