diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java index 081192e20..ddd9b1f79 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java @@ -10,10 +10,7 @@ import org.broadinstitute.sting.utils.genotype.vcf.*; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; -import java.util.Iterator; -import java.util.List; -import java.util.NoSuchElementException; -import java.util.Map; +import java.util.*; /** @@ -256,6 +253,8 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, /** * get the genotype * + * // todo -- WTF is this? This is a deeply unsafe call + * * @return a map in lexigraphical order of the genotypes */ public Genotype getCalledGenotype() { @@ -301,6 +300,18 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, return mCurrentRecord.getSampleNames(); } +// public Map getSampleGenotypes() { +// String[] samples = getSampleNames(); +// List genotypes = getGenotypes(); +// HashMap map = new HashMap(); +// +// for ( int i = 0; i < samples.length; i++ ) { +// map.put(samples[i], genotypes.get(i)); +// } +// +// return map; +// } + public Map getInfoValues() { assertNotNull(); return mCurrentRecord.getInfoValues(); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/ConcordanceTruthTable.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/ConcordanceTruthTable.java index ea9ba4f2a..cd75f2659 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/ConcordanceTruthTable.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/ConcordanceTruthTable.java @@ -4,6 +4,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval; import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.Variation; import org.broadinstitute.sting.utils.Pair; +import org.broadinstitute.sting.utils.Utils; import java.util.*; @@ -22,7 +23,7 @@ public class ConcordanceTruthTable { private static final int FALSE_POSITIVE = 2; private static final int FALSE_NEGATIVE = 3; private static final int VARIANT = 1; - private static final String[] POOL_HEADERS = {"TRUE_POSITIVE","TRUE_NEGATIVE","FALSE_POSITIVE","FALSE_NEGATIVE"}; + private static final String[] POOL_HEADERS = {"TP","TN","FP","FN"}; private static final int REF = 0; private static final int VAR_HET = 1; @@ -103,24 +104,19 @@ public class ConcordanceTruthTable { } } else { // if we cannot associate tables with individuals, then we are working in a pooled context // first we need to expand our tables to include frequency information + Pair > poolVariant = getPooledAlleleFrequency(chipEvals, ref); - Pair > poolVariant = getPooledAlleleFrequency(chipEvals, ref); - - int truthType = getGenotype(poolVariant.getFirst(),ref); // convenience method; now to interpret - - if (truthType == VAR_HET || truthType == VAR_HOM) { - truthType = VARIANT; - } - + int truthType = poolVariant.getFirst(); // convenience method; now to interpret int callType = getCallIndex(eval,ref); int numTrueSupportingAlleles = poolVariant.getSecond().getFirst(); if ( numTrueSupportingAlleles > 0 && truthType == VARIANT && callType != VARIANT ) { - violation = String.format("False negative: %s with %d alt alleles", - chipEvals.get(0).getFirst(), numTrueSupportingAlleles); + violation = String.format("False negative: %s with %d alt alleles", chipEvals.get(0).getFirst(), numTrueSupportingAlleles); + } else if ( truthType == REF && callType == VARIANT ) { + violation = String.format("False positive: %s at hom-ref site", eval); } - addFrequencyEntry( truthType,callType, poolVariant.getSecond().getFirst() ); + addFrequencyEntry( truthType, callType, poolVariant.getSecond().getFirst() ); } @@ -131,46 +127,48 @@ public class ConcordanceTruthTable { return violation; } - public Pair< Genotype , Pair > getPooledAlleleFrequency( List> chips, char ref) { - // TODO -- this is actually just a note that I wanted to appear in blue. This method explicitly uses - // TODO --- the assumption that tri-allelic sites do not really exist, and that if they do the - // TODO --- site will be marked as such by an 'N' in the reference, so we will not get to this point. - Genotype variant = null; + public Pair> getPooledAlleleFrequency( List> chips, char ref) { + // this is actually just a note that I wanted to appear in blue. This method explicitly uses + // the assumption that tri-allelic sites do not really exist, and that if they do the + // site will be marked as such by an 'N' in the reference, so we will not get to this point. + int frequency = 0; int nChips = 0; if ( chips != null ) { for ( Pair chip : chips ) { Genotype c = chip.getFirst(); if ( c != null ) { - nChips ++; + nChips++; if ( c.isVariant(ref) ) { if ( c.isHet() ) { - frequency ++; + frequency++; } else { // c is hom frequency += 2; } - variant = c; } + //System.out.printf(" Genotype %s at %c => %d%n", c, ref, frequency); } } + //System.out.printf("*** %d%n", frequency); } - return new Pair< Genotype, Pair >(variant, new Pair(frequency,nChips)); - + int truthType = nChips > 0 ? ( frequency > 0 ? VARIANT : REF ) : NO_CALL; + return new Pair >(truthType, new Pair(frequency,nChips)); } - private void addFrequencyEntry ( int truthIndex, int callIndex, int numTrueSupportingAlleles ) { - calls_totals[callIndex] ++; - truth_totals[truthIndex] ++; + private void addFrequencyEntry( int truthIndex, int callIndex, int numTrueSupportingAlleles ) { + //System.out.printf(" %s %s %d%n", CALL_NAMES[truthIndex], CALL_NAMES[callIndex], numTrueSupportingAlleles); + calls_totals[callIndex]++; + truth_totals[truthIndex]++; - if ( truthIndex == REF && callIndex == REF ) { + if ( truthIndex == REF && ( callIndex == REF || callIndex == NO_CALL ) ) { // true negative - table[numTrueSupportingAlleles][TRUE_NEGATIVE] ++; + table[numTrueSupportingAlleles][TRUE_NEGATIVE]++; // sanity check - there should never be an entry in // [*][TRUE_NEGATIVE] for * > 0 } else if ( truthIndex == REF && callIndex == VARIANT ) { // false positive - table[numTrueSupportingAlleles][FALSE_POSITIVE] ++; + table[numTrueSupportingAlleles][FALSE_POSITIVE]++; } else if ( truthIndex == VARIANT && (callIndex == NO_CALL || callIndex == REF) ) { // false negative table[numTrueSupportingAlleles][FALSE_NEGATIVE]++; @@ -224,22 +222,39 @@ public class ConcordanceTruthTable { addFrequencyStats(s); } - private void addFrequencyStats(List s) { +// private void addFrequencyStats(List s) { +// +// // TODO -- implement me for pooled mode with frequency stats +// s.add(String.format("name %s",name)); +// s.add(String.format("TRUTH_ALLELE_FREQUENCY\tERROR_OR_TRUTH_TYPE\tTOTAL\tAS_PRCT_OF_TOTAL_CALLS\tAS_PRCT_OF_CALLS_AT_FREQUENCY")); +// +// for ( int af = 0; af < table.length; af ++ ) { +// for ( int errorIndex = 0; errorIndex < 4; errorIndex ++ ) { +// StringBuffer sb = new StringBuffer(); +// sb.append(String.format("%f ", ((double) af)/ table.length)); +// sb.append(String.format("%s ",POOL_HEADERS[errorIndex])); +// sb.append(String.format("%d ", table[af][errorIndex])); +// sb.append(String.format("%s ", percentOfTotal(table,af,errorIndex))); +// sb.append(String.format("%s ", marginalPercent(table[af],errorIndex))); +// s.add(sb.toString()); +// } +// } +// +// } - // TODO -- implement me for pooled mode with frequency stats + private void addFrequencyStats(List s) { s.add(String.format("name %s",name)); - s.add(String.format("TRUTH_ALLELE_FREQUENCY\tERROR_OR_TRUTH_TYPE\tTOTAL\tAS_PRCT_OF_TOTAL_CALLS\tAS_PRCT_OF_CALLS_AT_FREQUENCY")); + s.add("TRUTH_ALLELE_COUNT\tTRUTH_ALLELE_FREQ\tTOTAL\t" + Utils.join(" ", POOL_HEADERS)); for ( int af = 0; af < table.length; af ++ ) { + int sum = 0; + String counts = ""; for ( int errorIndex = 0; errorIndex < 4; errorIndex ++ ) { - StringBuffer sb = new StringBuffer(); - sb.append(String.format("%f ", ((double) af)/ table.length)); - sb.append(String.format("%s ",POOL_HEADERS[errorIndex])); - sb.append(String.format("%d ", table[af][errorIndex])); - sb.append(String.format("%s ", percentOfTotal(table,af,errorIndex))); - sb.append(String.format("%s ", marginalPercent(table[af],errorIndex))); - s.add(sb.toString()); + int count = table[af][errorIndex]; + sum += count; + counts += String.format(" %6d", count); } + s.add(String.format("%6d %.3f %6d%s", af, ((double)af)/ table.length, sum, counts)); } } @@ -321,24 +336,4 @@ public class ConcordanceTruthTable { sb.append("%"); return sb.toString(); } - - private static String percentOfTotal(int [][] errorMatrix, int alleleFrequency, int errorIndex ) { - int count = 0; - for ( int i = 0; i < errorMatrix.length; i ++ ) { - for ( int j = 0; j < 4; j ++ ) { - count += errorMatrix[i][j]; - } - } - - return cellPercent(errorMatrix[alleleFrequency][errorIndex], count ); - } - - private static String marginalPercent ( int[] errors, int errorIndex ) { - int count = 0; - for ( int i = 0; i < 4; i ++ ) { - count += errors[i]; - } - - return cellPercent(errors[errorIndex], count); - } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PoolAnalysis.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PoolAnalysis.java index 71084672b..a9ce647f1 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PoolAnalysis.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PoolAnalysis.java @@ -7,6 +7,6 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval; * Time: 4:39:12 PM * To change this template use File | Settings | File Templates. */ -public interface PoolAnalysis { +public interface PoolAnalysis extends PopulationAnalysis, GenotypeAnalysis { } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java index 460048636..62ff415e3 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java @@ -48,8 +48,8 @@ public class VariantEvalWalker extends RefWalker { @Argument(fullName="supressDateInformation", doc="This flag indicates that we want to suppress the date information from the output, so that if can be diff'ed against previous evals.", required=false) public boolean supressDateInformation = false; - @Argument(fullName = "numPeopleInPool", shortName="PS", doc="If using a variant file from a pooled caller, this field provides the number of individuals in each pool", required=false) - public int numPeopleInPool = -1; + //@Argument(fullName = "numPeopleInPool", shortName="PS", doc="If using a variant file from a pooled caller, this field provides the number of individuals in each pool", required=false) + //public int numPeopleInPool = -1; @Argument(fullName = "samplesFile", shortName="samples", doc="When running an analysis on one or more individuals with truth data, this field provides a filepath to the listing of which samples are used (and are used to name corresponding rods with -B)", required=false) public String samplesFile = null; @@ -130,14 +130,16 @@ public class VariantEvalWalker extends RefWalker { analyses.add(new VariantCounter()); analyses.add(new VariantDBCoverage(knownSNPDBName)); + if ( samplesFile != null ) { - if ( numPeopleInPool < 1 ) - analyses.add(new GenotypeConcordance(samplesFile, true)); - else - analyses.add(new PooledConcordance(samplesFile, true)); + //if ( numPeopleInPool < 1 ) + // analyses.add(new GenotypeConcordance(samplesFile, true)); + //else + analyses.add(new PooledConcordance(samplesFile, true)); } else { analyses.add(new GenotypeConcordance(genotypeChipName, false)); } + analyses.add(new TransitionTranversionAnalysis()); analyses.add(new NeighborDistanceAnalysis()); analyses.add(new HardyWeinbergEquilibrium(badHWEThreshold)); @@ -160,7 +162,8 @@ public class VariantEvalWalker extends RefWalker { } else if ( ! (analysis instanceof PopulationAnalysis || analysis instanceof PoolAnalysis) ) { removed = analysis; iter.remove(); - } else if ( numPeopleInPool > 1 && ! ( analysis instanceof PoolAnalysis ) ) { +// } else if ( numPeopleInPool > 1 && ! ( analysis instanceof PoolAnalysis ) ) { + } else if ( analysis instanceof PoolAnalysis && samplesFile == null ) { removed = analysis; iter.remove(); }