diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypeConcordance.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypeConcordance.java index e43acca89..5a639ff89 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypeConcordance.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypeConcordance.java @@ -57,7 +57,7 @@ public class GenotypeConcordance extends VariantEvaluator { @DataPoint(description = "the variant quality score histograms for true positive and false positive calls") QualityScoreHistograms qualityScoreHistograms = null; - private static final int MAX_MISSED_VALIDATION_DATA = 10000; + private static final int MAX_MISSED_VALIDATION_DATA = 100; class FrequencyStats implements TableType { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/SimpleMetricsByAC.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/SimpleMetricsByAC.java new file mode 100755 index 000000000..f425869fa --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/SimpleMetricsByAC.java @@ -0,0 +1,161 @@ +package org.broadinstitute.sting.gatk.walkers.varianteval; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.playground.utils.report.tags.Analysis; +import org.broadinstitute.sting.playground.utils.report.tags.DataPoint; +import org.broadinstitute.sting.playground.utils.report.utils.TableType; +import org.broadinstitute.sting.utils.StingException; +import java.util.ArrayList; + +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * @author depristo + * @since Apr 11, 2010 + */ + +@Analysis(name = "Quality Metrics by allele count", description = "Shows various stats binned by allele count") +public class SimpleMetricsByAC extends VariantEvaluator { + // a mapping from quality score histogram bin to Ti/Tv ratio + @DataPoint(name="TiTv by AC", description = "TiTv by allele count") + MetricsByAc metrics = null; + + //@DataPoint(name="Quality by Allele Count", description = "average variant quality for each allele count") + //AlleleCountStats alleleCountStats = null; + + private final static Object[] METRIC_COLUMNS = {"AC", "nTi", "nTv", "n", "Ti/Tv"}; + + class MetricsAtAC { + public int ac = -1, nTi = 0, nTv = 0; + + public MetricsAtAC(int ac) { this.ac = ac; } + + public void update(VariantContext eval) { + if ( eval.isTransition() ) + nTi++; + else + nTv++; + } + + // corresponding to METRIC_COLUMNS + public String getColumn(int i) { + switch (i) { + case 0: return String.valueOf(ac); + case 1: return String.valueOf(nTi); + case 2: return String.valueOf(nTv); + case 3: return String.valueOf(nTi + nTv); + case 4: return String.valueOf(ratio(nTi, nTv)); + default: + throw new StingException("Unexpected column " + i); + } + } + } + + class MetricsByAc implements TableType { + int nchromosomes = -1; + ArrayList metrics = new ArrayList(); + Object[] rows = null; + + public MetricsByAc( int nchromosomes ) { + this.nchromosomes = nchromosomes; + rows = new Object[nchromosomes+1]; + metrics = new ArrayList(nchromosomes+1); + for ( int i = 0; i < nchromosomes + 1; i++ ) { + metrics.add(new MetricsAtAC(i)); + rows[i] = "ac" + i; + } + } + + public Object[] getRowKeys() { + return rows; + } + + public Object[] getColumnKeys() { + return METRIC_COLUMNS; + } + + public String getName() { + return "MetricsByAc"; + } + + // + public String getCell(int ac, int y) { + return metrics.get(ac).getColumn(y); + } + + public String toString() { + String returnString = ""; + return returnString; + } + + public void incrValue( VariantContext eval ) { + int an = eval.getChromosomeCount(); + if ( an == nchromosomes ) { // ignore sites with no calls + int ac = eval.getChromosomeCount(eval.getAlternateAllele(0)); + metrics.get(ac).update(eval); + } + } + } + + public SimpleMetricsByAC(VariantEvalWalker parent) { + // don't do anything + } + + public String getName() { + return "SimpleMetricsByAC"; + } + + public int getComparisonOrder() { + return 1; // we only need to see each eval track + } + + public boolean enabled() { + return true; + } + + public String toString() { + return getName(); + } + + public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + final String interesting = null; + + if (eval != null && eval.isSNP() && eval.hasGenotypes() ) { + if ( metrics == null ) + metrics = new MetricsByAc(2 * eval.getNSamples()); + metrics.incrValue(eval); + } + + return interesting; // This module doesn't capture any interesting sites, so return null + } + + //public void finalizeEvaluation() { + // + //} +} diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index e3a260fd4..a3f12b4f1 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -7,7 +7,8 @@ import java.util.Arrays; import java.util.HashMap; import java.util.Map; -public class VariantEvalIntegrationTest extends WalkerTest { +public class + VariantEvalIntegrationTest extends WalkerTest { private static String cmdRoot = "-T VariantEval" + " -R " + oneKGLocation + "reference/human_b36_both.fasta -reportType Grep"; @@ -19,8 +20,8 @@ public class VariantEvalIntegrationTest extends WalkerTest { @Test public void testVESimple() { HashMap expectations = new HashMap(); - expectations.put("-L 1:1-10,000,000", "b28ffcff420177d9b73aa726e260fb34"); - expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "749d53687497d7939713e9ce586642a4"); + expectations.put("-L 1:1-10,000,000", "f2ce1e0e9bbe81b482a9448cb88e2263"); + expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "b39229f55b726c2f61a17463a3c883be"); for ( Map.Entry entry : expectations.entrySet() ) { String extraArgs = entry.getKey(); @@ -41,10 +42,10 @@ public class VariantEvalIntegrationTest extends WalkerTest { " -B comp_hapmap,VCF," + validationDataLocation + "CEU_hapmap_nogt_23.vcf"; - String matchingMD5 = "3abf37ada16619f4b3f39cb7ecad497f"; + String matchingMD5 = "f8c7bd3ce4499cff485d2170ce896af9"; expectations.put("", matchingMD5); expectations.put(" -known comp_hapmap -known dbsnp", matchingMD5); - expectations.put(" -known comp_hapmap", "75026dbdad38f74ac697a7c102e64db4"); + expectations.put(" -known comp_hapmap", "99c687b95b1a4614cd76570bfd24e7a6"); for ( Map.Entry entry : expectations.entrySet() ) { String extraArgs2 = entry.getKey(); @@ -62,7 +63,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { String extraArgs = "-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 30"; WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s -outputVCF %s", 2, - Arrays.asList("06cdbe7f94990dfe61b5e3ec03b49151", "b4a42c90318adc88361691ece50426f2")); + Arrays.asList("bfbad4053c89c264cec16040df0bcc4c", "b4a42c90318adc88361691ece50426f2")); executeTest("testVEWriteVCF", spec); } }