diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/GenotypeConcordance.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/GenotypeConcordance.java index ea01ceef2..d93d44fc6 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/GenotypeConcordance.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/GenotypeConcordance.java @@ -86,8 +86,8 @@ public class GenotypeConcordance extends VariantEvaluator { class QualityScoreHistograms implements TableType { final int NUM_BINS = 20; - final ArrayList truePositiveQualities = new ArrayList(); // An ArrayList holds all the qualities until we are able to bin them appropriately - final ArrayList falsePositiveQualities = new ArrayList(); + final HashMap truePositiveQualityScoreMap = new HashMap(); // A HashMap holds all the quality scores until we are able to bin them appropriately + final HashMap falsePositiveQualityScoreMap = new HashMap(); final int truePositiveHist[] = new int[NUM_BINS]; // the final histograms that get reported out final int falsePositiveHist[] = new int[NUM_BINS]; final String[] rowKeys = new String[]{"true_positive_hist", "false_positive_hist"}; @@ -133,10 +133,17 @@ public class GenotypeConcordance extends VariantEvaluator { } public void incrValue( final double qual, final boolean isTruePositiveCall ) { + HashMap qualScoreMap; if( isTruePositiveCall ) { - truePositiveQualities.add( qual ); + qualScoreMap = truePositiveQualityScoreMap; } else { - falsePositiveQualities.add( qual ); + qualScoreMap = falsePositiveQualityScoreMap; + } + final Integer qualKey = Math.round((float) qual); + if( qualScoreMap.containsKey(qualKey) ) { + qualScoreMap.put(qualKey, qualScoreMap.get(qualKey) + 1); + } else { + qualScoreMap.put(qualKey, 1); } } @@ -146,27 +153,33 @@ public class GenotypeConcordance extends VariantEvaluator { falsePositiveHist[iii] = 0; } - double maxQual = 0.0; + int maxQual = 0; // Calculate the maximum quality score for both TP and FP calls in order to normalize and histogram - for( final Double qual : truePositiveQualities ) { + for( final Integer qual : truePositiveQualityScoreMap.keySet()) { if( qual > maxQual ) { maxQual = qual; } } - for( final Double qual : falsePositiveQualities ) { + for( final Integer qual : falsePositiveQualityScoreMap.keySet()) { if( qual > maxQual ) { maxQual = qual; } } - final double binSize = maxQual / ((double) (NUM_BINS-1)); - - for( final Double qual : truePositiveQualities ) { - truePositiveHist[ (int)Math.floor( qual / binSize ) ]++; + final double binSize = ((double)maxQual) / ((double) (NUM_BINS-1)); //BUGBUG: should be normalized max to min, not max to 0 + + for( final Integer qual : truePositiveQualityScoreMap.keySet()) { + final int index = (int)Math.floor( ((double)qual) / binSize ); + if(index >= 0) { //BUGBUG: problem when maxQual is zero? + truePositiveHist[ index ] += truePositiveQualityScoreMap.get(qual); + } } - for( final Double qual : falsePositiveQualities ) { - falsePositiveHist[ (int)Math.floor( qual / binSize ) ]++; + for( final Integer qual : falsePositiveQualityScoreMap.keySet()) { + final int index = (int)Math.floor( ((double)qual) / binSize ); + if(index >= 0) { + falsePositiveHist[ index ] += falsePositiveQualityScoreMap.get(qual); + } } } } @@ -326,7 +339,7 @@ public class GenotypeConcordance extends VariantEvaluator { } public void finalizeEvaluation() { - if( qualityScoreHistograms!=null ) { + if( qualityScoreHistograms != null ) { qualityScoreHistograms.organizeHistogramTables(); } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantQualityScore.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantQualityScore.java index 9d7bbad49..7e8ede970 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantQualityScore.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantQualityScore.java @@ -2,11 +2,13 @@ package org.broadinstitute.sting.oneoffprojects.walkers.varianteval2; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.playground.utils.report.tags.Analysis; import org.broadinstitute.sting.playground.utils.report.tags.DataPoint; import org.broadinstitute.sting.playground.utils.report.utils.TableType; +import org.broadinstitute.sting.utils.Pair; import java.util.ArrayList; import java.util.HashMap; @@ -48,13 +50,12 @@ public class VariantQualityScore extends VariantEvaluator { @DataPoint(name="TiTv by Quality", description = "the Ti/Tv ratio broken out by variant quality") TiTvStats titvStats = null; - //@DataPoint(name="Quality by Allele Count", description = "average variant quality for each allele count") - //AlleleCountStats alleleCountStats = null; + @DataPoint(name="Quality by Allele Count", description = "average variant quality for each allele count") + AlleleCountStats alleleCountStats = null; class TiTvStats implements TableType { final int NUM_BINS = 20; - final ArrayList qualities = new ArrayList(); // An ArrayList holds all the qualities until we are able to bin them appropriately - final ArrayList isTransition = new ArrayList(); + final HashMap> qualByIsTransition = new HashMap>(); // A hashMap holds all the qualities until we are able to bin them appropriately final long transitionByQuality[] = new long[NUM_BINS]; final long transversionByQuality[] = new long[NUM_BINS]; final double titvByQuality[] = new double[NUM_BINS]; // the final ti/tv sets that get reported out @@ -89,9 +90,17 @@ public class VariantQualityScore extends VariantEvaluator { return returnString; } - public void incrValue( final double qual, final boolean _isTransition ) { - qualities.add(qual); - isTransition.add(_isTransition); + public void incrValue( final double qual, final boolean isTransition ) { + final Integer qualKey = Math.round((float) qual); + final long numTransition = (isTransition ? 1L : 0L); + final long numTransversion = (isTransition ? 0L : 1L); + if( qualByIsTransition.containsKey(qualKey) ) { + Pair transitionPair = qualByIsTransition.get(qualKey); + transitionPair.set(transitionPair.getFirst() + numTransition, transitionPair.getSecond() + numTransversion); + qualByIsTransition.put(qualKey, transitionPair); + } else { + qualByIsTransition.put(qualKey, new Pair(numTransition,numTransversion)); + } } public void organizeTiTvTables() { @@ -101,26 +110,24 @@ public class VariantQualityScore extends VariantEvaluator { titvByQuality[iii] = 0.0; } - double maxQual = 0.0; + int maxQual = 0; // Calculate the maximum quality score in order to normalize and histogram - for( final Double qual : qualities ) { + for( final Integer qual : qualByIsTransition.keySet() ) { if( qual > maxQual ) { maxQual = qual; } } - final double binSize = maxQual / ((double) (NUM_BINS-1)); + final double binSize = ((double)maxQual) / ((double) (NUM_BINS-1)); - int jjj = 0; - for( final Double qual : qualities ) { - final int index = (int)Math.floor( qual / binSize ); - if(isTransition.get(jjj)) { - transitionByQuality[index]++; - } else { - transversionByQuality[index]++; + for( final Integer qual : qualByIsTransition.keySet() ) { + final int index = (int)Math.floor( ((double) qual) / binSize ); + if( index >= 0 ) { // BUGBUG: why is there overflow here? + Pair transitionPair = qualByIsTransition.get(qual); + transitionByQuality[index] += transitionPair.getFirst(); + transversionByQuality[index] += transitionPair.getSecond(); } - jjj++; } for( int iii = 0; iii < NUM_BINS; iii++ ) { @@ -134,89 +141,71 @@ public class VariantQualityScore extends VariantEvaluator { } } - /* class AlleleCountStats implements TableType { final HashMap> qualityListMap = new HashMap>(); final HashMap qualityMap = new HashMap(); - public Object[] getRowKeys() { - return new String[]{"sample"}; + public Object[] getRowKeys() { + final int NUM_BINS = qualityListMap.keySet().size(); + final String rowKeys[] = new String[NUM_BINS]; + int iii = 0; + for( final Integer key : qualityListMap.keySet() ) { + rowKeys[iii] = "AC" + key; + iii++; + } + return rowKeys; + } public Object[] getColumnKeys() { - final int NUM_BINS = qualityListMap.keySet().size(); - final String columnKeys[] = new String[NUM_BINS]; - int iii = 0; - for( final Integer key : qualityListMap.keySet() ) { - columnKeys[iii] = "AC" + key; - iii++; - } - return columnKeys; + return new String[]{"alleleCount","avgQual"}; } public String getName() { - return "TiTvStats"; + return "AlleleCountStats"; } public String getCell(int x, int y) { - return String.valueOf(titvByQuality[y]); + int iii = 0; + for( final Integer key : qualityListMap.keySet() ) { + if(iii == x) { + if(y == 0) { return String.valueOf(key); } + else { return String.valueOf(qualityMap.get(key)); } + } + iii++; + } + return null; } public String toString() { String returnString = ""; - // output the ti/tv array - returnString += "titvByQuality: "; - for( int iii = 0; iii < NUM_BINS; iii++ ) { - returnString += titvByQuality[iii] + " "; - } + // output the quality map + returnString += "AlleleCountStats: "; + //for( int iii = 0; iii < NUM_BINS; iii++ ) { + // returnString += titvByQuality[iii] + " "; + //} return returnString; } - public void incrValue( final double qual, final boolean _isTransition ) { - qualities.add(qual); - isTransition.add(_isTransition); + public void incrValue( final double qual, final int alleleCount ) { + ArrayList list = qualityListMap.get(alleleCount); + if(list==null) { list = new ArrayList(); } + list.add(qual); + qualityListMap.put(alleleCount, list); } - public void organizeTiTvTables() { - for( int iii = 0; iii < NUM_BINS; iii++ ) { - transitionByQuality[iii] = 0L; - transversionByQuality[iii] = 0L; - titvByQuality[iii] = 0.0; - } - - double maxQual = 0.0; - - // Calculate the maximum quality score in order to normalize and histogram - for( final Double qual : qualities ) { - if( qual > maxQual ) { - maxQual = qual; + public void organizeAlleleCountTables() { + for( final Integer key : qualityListMap.keySet() ) { + final ArrayList list = qualityListMap.get(key); + double meanQual = 0.0; + final double numQuals = (double)list.size(); + for( Double qual : list ) { + meanQual += qual / numQuals; } + qualityMap.put(key, meanQual); } - - final double binSize = maxQual / ((double) (NUM_BINS-1)); - - int jjj = 0; - for( final Double qual : qualities ) { - final int index = (int)Math.floor( qual / binSize ); - if(isTransition.get(jjj)) { - transitionByQuality[index]++; - } else { - transversionByQuality[index]++; - } - jjj++; - } - - for( int iii = 0; iii < NUM_BINS; iii++ ) { - if( transitionByQuality[iii] + transversionByQuality[iii] > 800L ) { // need to have a sufficient number of variants to get a useful Ti/Tv ratio - titvByQuality[iii] = ((double) transitionByQuality[iii]) / ((double) transversionByQuality[iii]); - } else { - titvByQuality[iii] = 0.0; - } - } - } } - */ public VariantQualityScore(VariantEval2Walker parent) { // don't do anything @@ -241,17 +230,27 @@ public class VariantQualityScore extends VariantEvaluator { public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { final String interesting = null; - if(eval != null) { + if( eval != null && eval.isSNP() && eval.isBiallelic() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites) if( titvStats == null ) { titvStats = new TiTvStats(); } titvStats.incrValue(eval.getPhredScaledQual(), eval.isTransition()); + + if( alleleCountStats == null ) { alleleCountStats = new AlleleCountStats(); } + int alternateAlleleCount = 0; + for (final Allele a : eval.getAlternateAlleles()) { + alternateAlleleCount += eval.getChromosomeCount(a); + } + alleleCountStats.incrValue(eval.getPhredScaledQual(), alternateAlleleCount); } return interesting; // This module doesn't capture any interesting sites, so return null } public void finalizeEvaluation() { - if( titvStats!=null ) { + if( titvStats != null ) { titvStats.organizeTiTvTables(); } + if( alleleCountStats != null ) { + alleleCountStats.organizeAlleleCountTables(); + } } } \ No newline at end of file diff --git a/java/test/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2IntegrationTest.java b/java/test/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2IntegrationTest.java index 299cbad85..f26db95c6 100755 --- a/java/test/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2IntegrationTest.java +++ b/java/test/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2IntegrationTest.java @@ -18,8 +18,8 @@ public class VariantEval2IntegrationTest extends WalkerTest { @Test public void testVE2Simple() { HashMap expectations = new HashMap(); - expectations.put("-L 1:1-10,000,000", "32d771b62dadc1c164f19bf3a1efc018"); - expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "de42a239847e3466f4526e4781710ce7"); + expectations.put("-L 1:1-10,000,000", "44fca2e43dade85ca16fa9e323e4cc9b"); + expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "6910a9a49625a8df2ad797d751be9ed6"); for ( Map.Entry entry : expectations.entrySet() ) { String extraArgs = entry.getKey(); @@ -39,10 +39,10 @@ public class VariantEval2IntegrationTest extends WalkerTest { " -B dbsnp_130,dbSNP," + GATKDataLocation + "dbsnp_130_b36.rod" + " -B comp_hapmap,VCF," + validationDataLocation + "CEU_hapmap_nogt_23.vcf"; - String eqMD5s = "0e961437bec3d4ad28b1511c0c45d13a"; // next two examples should be the same! + String eqMD5s = "a525597a34cd3fa84c75263dbd026acb"; // next two examples should be the same! expectations.put("", eqMD5s); expectations.put(" -known comp_hapmap -known dbsnp", eqMD5s); - expectations.put(" -known comp_hapmap", "139b10dc0ea968dc035798492ef4ef97"); + expectations.put(" -known comp_hapmap", "35feda6c361408755e7f1dae245462d7"); for ( Map.Entry entry : expectations.entrySet() ) { String extraArgs2 = entry.getKey(); @@ -60,7 +60,7 @@ public class VariantEval2IntegrationTest extends WalkerTest { String extraArgs = "-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 30"; WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s -outputVCF %s", 2, - Arrays.asList("3d4de331f90ef49792d3da3ee9509edc", "a3ce1d70d8ae3874807e9d61994d42af")); + Arrays.asList("0d2ac5ea23f8cd3237836080c722711b", "a3ce1d70d8ae3874807e9d61994d42af")); executeTest("testVE2WriteVCF", spec); } }