diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/poolseq/PowerBelowFrequencyWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/poolseq/PowerBelowFrequencyWalker.java index b4e3ef54b..14b7019bd 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/poolseq/PowerBelowFrequencyWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/poolseq/PowerBelowFrequencyWalker.java @@ -50,11 +50,17 @@ public class PowerBelowFrequencyWalker extends LocusWalker { @Argument(fullName="ignoreReverseReads",doc="Ignore the reverse reads at a site. Defaults to false.", required = false) boolean ignoreReverseReads = false; + private boolean calledByAnotherWalker = false; + public void initialize() { if ( alleleFreq < 1 ) { String err = "Allele frequency (-af) must be greater than or equal to one."; throw new StingException(err); } + + if ( numIndividuals == 0 ) { + calledByAnotherWalker = true; + } } public Integer reduceInit() { @@ -105,7 +111,7 @@ public class PowerBelowFrequencyWalker extends LocusWalker { } public double calculatePowerAtFrequency( AlignmentContext context, int alleles ) { - return theoreticalPower( context.numReads(), getMeanQ(context), alleles ); + return theoreticalPower( context.numReads(), getMeanQ(context), alleles, lodThresh ); } public byte getMeanQ( AlignmentContext context ) { @@ -139,7 +145,7 @@ public class PowerBelowFrequencyWalker extends LocusWalker { return String.format("%s%n", header); } - public double theoreticalPower( int depth, byte q, int alleles ) { + public double theoreticalPower( int depth, byte q, int alleles, double lodThreshold ) { double power; if( depth <= 0 ) { power = 0.0; @@ -151,7 +157,7 @@ public class PowerBelowFrequencyWalker extends LocusWalker { double dkterm_num = Math.log10( snpProp * (p_error/3) + (1 - snpProp) * (1 - p_error) ); double dkterm_denom = Math.log10( 1 - p_error); - int kaccept = (int) Math.ceil( ( lodThresh - ( (double) depth ) * ( dkterm_num - dkterm_denom ) ) / + int kaccept = (int) Math.ceil( ( lodThreshold - ( (double) depth ) * ( dkterm_num - dkterm_denom ) ) / ( kterm_num - kterm_denom- dkterm_num + dkterm_denom ) ); // we will reject the null hypothesis if we see kaccept or more SNPs, the power is the probability that this occurs @@ -171,5 +177,11 @@ public class PowerBelowFrequencyWalker extends LocusWalker { return ((double)alleles)/(2*numIndividuals); } - + public void setPoolSize(int poolSize) { + if ( calledByAnotherWalker ) { + numIndividuals = poolSize; + } else { + throw new StingException("This method should only be accessible by calling it from another walker."); + } + } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/ConcordanceTruthTable.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/ConcordanceTruthTable.java index ce30cd3df..23bac1601 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/ConcordanceTruthTable.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/ConcordanceTruthTable.java @@ -22,6 +22,8 @@ public class ConcordanceTruthTable { private static final int TRUE_NEGATIVE = 1; private static final int FALSE_POSITIVE = 2; private static final int FALSE_NEGATIVE = 3; + private static final int VARIANT = 1; + private static final String[] POOL_HEADERS = {"TRUE_POSITIVE","TRUE_NEGATIVE","FALSE_POSITIVE","FALSE_NEGATIVE"}; private static final int REF = 0; private static final int VAR_HET = 1; @@ -38,6 +40,7 @@ public class ConcordanceTruthTable { private int[] truth_totals; private int[] calls_totals; + public ConcordanceTruthTable(String name) { // there's a specific sample associated with this truth table this.name = name; @@ -54,9 +57,30 @@ public class ConcordanceTruthTable { } } - public ConcordanceTruthTable() { + public ConcordanceTruthTable(int nSamples) { // there's no specific sample associated with this truth table singleSampleMode = false; + name = "pooled_concordance"; + truth_totals = new int[4]; + calls_totals = new int[4]; + for (int i = 0; i < 4; i++) { + truth_totals[i] = 0; + calls_totals[i] = 0; + } + + initializeFrequencyTable(nSamples); + } + + private void initializeFrequencyTable( int numChips ) { + // System.out.println("Frequency Table for Pooled Concordance initialized with number of chips = "+numChips); + table = new int[numChips*2][4]; + for (int i = 0; i < 4; i++) { + for ( int freq = 0; freq < 2*numChips; freq ++ ) { + table[freq][i] = 0; + } + } + + // System.out.println("Table Size: "+table.length+" by "+table[1].length); } public void addEntry(List> chipEvals, Variation eval, char ref) { @@ -77,6 +101,21 @@ public class ConcordanceTruthTable { //System.out.printf("TEST: %d/%d %s vs. %s%n", truthIndex, callIndex, chip, eval); addGenotypeEntry(truthType, callType); } + } else { // if we cannot associate tables with individuals, then we are working in a pooled context + // first we need to expand our tables to include frequency information + + Pair poolVariant = getPooledAlleleFrequency(chipEvals, ref); + + int truthType = getGenotype(poolVariant.getFirst(),ref); // convenience method; now to interpret + + if (truthType == VAR_HET || truthType == VAR_HOM) { + truthType = VARIANT; + } + + int callType = getCallIndex(eval,ref); + + addFrequencyEntry( truthType,callType,poolVariant.getSecond() ); + } // TODO -- implement me for pooled mode with frequency stats @@ -85,6 +124,69 @@ public class ConcordanceTruthTable { // TODO -- Indexes like TRUE_POSITIVE are defined above for you } + public Pair getPooledAlleleFrequency( List> chips, char ref) { + // TODO -- this is actually just a note that I wanted to appear in blue. This method explicitly uses + // TODO --- the assumption that tri-allelic sites do not really exist, and that if they do the + // TODO --- site will be marked as such by an 'N' in the reference, so we will not get to this point. + Genotype variant = null; + int frequency = 0; + if ( chips != null ) { + for ( Pair chip : chips ) { + Genotype c = chip.getFirst(); + if ( c != null ) { + if ( c.isVariant(ref) ) { + if ( c.isHet() ) { + frequency ++; + } else { // c is hom + frequency += 2; + } + variant = c; + } + } + } + } + + return new Pair(variant, frequency); + + } + + private void addFrequencyEntry ( int truthIndex, int callIndex, int numTrueSupportingAlleles ) { + calls_totals[callIndex] ++; + truth_totals[truthIndex] ++; + + if ( truthIndex == REF && callIndex == REF ) { + // true negative + table[numTrueSupportingAlleles][TRUE_NEGATIVE] ++; + // sanity check - there should never be an entry in + // [*][TRUE_NEGATIVE] for * > 0 + } else if ( truthIndex == REF && callIndex == VARIANT ) { + // false positive + table[numTrueSupportingAlleles][FALSE_POSITIVE] ++; + } else if ( truthIndex == VARIANT && (callIndex == NO_CALL || callIndex == REF) ) { + // false negative + table[numTrueSupportingAlleles][FALSE_NEGATIVE]++; + } else if ( truthIndex == VARIANT && callIndex == VARIANT ) { + // true positive + table[numTrueSupportingAlleles][TRUE_POSITIVE]++; + } else { + // something else is going on; wonky site or something. Don't do anything to the table. + } + } + + private static int getCallIndex(Variation eval, char ref) { + int index; + + if ( eval == null ) { + index = NO_CALL; + } else if ( ! eval.isSNP() ) { + index = REF; + } else { + index = VARIANT; + } + + return index; + } + private static int getGenotype(Genotype g, char ref) { int type; @@ -116,6 +218,20 @@ public class ConcordanceTruthTable { private void addFrequencyStats(List s) { // TODO -- implement me for pooled mode with frequency stats + s.add(String.format("name %s",name)); + s.add(String.format("TRUTH_ALLELE_FREQUENCY\tERROR_OR_TRUTH_TYPE\tTOTAL\tAS_PRCT_OF_TOTAL_CALLS\tAS_PRCT_OF_CALLS_AT_FREQUENCY")); + + for ( int af = 0; af < table.length; af ++ ) { + for ( int errorIndex = 0; errorIndex < 4; errorIndex ++ ) { + StringBuffer sb = new StringBuffer(); + sb.append(String.format("%f ", ((double) af)/ table.length)); + sb.append(String.format("%s ",POOL_HEADERS[errorIndex])); + sb.append(String.format("%d ", table[af][errorIndex])); + sb.append(String.format("%s ", percentOfTotal(table,af,errorIndex))); + sb.append(String.format("%s ", marginalPercent(table[af],errorIndex))); + s.add(sb.toString()); + } + } } @@ -196,4 +312,24 @@ public class ConcordanceTruthTable { sb.append("%"); return sb.toString(); } + + private static String percentOfTotal(int [][] errorMatrix, int alleleFrequency, int errorIndex ) { + int count = 0; + for ( int i = 0; i < errorMatrix.length; i ++ ) { + for ( int j = 0; j < 4; j ++ ) { + count += errorMatrix[i][j]; + } + } + + return cellPercent(errorMatrix[alleleFrequency][errorIndex], count ); + } + + private static String marginalPercent ( int[] errors, int errorIndex ) { + int count = 0; + for ( int i = 0; i < 4; i ++ ) { + count += errors[i]; + } + + return cellPercent(errors[errorIndex], count); + } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledConcordance.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledConcordance.java index bf967a6aa..2d4cd1a97 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledConcordance.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledConcordance.java @@ -30,7 +30,7 @@ public class PooledConcordance extends ChipConcordance implements PoolAnalysis { protected ConcordanceTruthTable[] createTruthTableMappings(List rodNames, Map sampleToArrayIndex) { // there is only one truth table - the pool-wide one ConcordanceTruthTable[] tables = new ConcordanceTruthTable[1]; - tables[0] = new ConcordanceTruthTable(); + tables[0] = new ConcordanceTruthTable(rodNames.size()); for ( String name : rodNames ) { sampleToArrayIndex.put(name, 0); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledGenotypeConcordance.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledGenotypeConcordance.java deleted file mode 100755 index 009f22251..000000000 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledGenotypeConcordance.java +++ /dev/null @@ -1,396 +0,0 @@ -package org.broadinstitute.sting.playground.gatk.walkers.varianteval; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.genotype.Variation; - -import java.io.*; -import java.util.*; - -/** - * Created by IntelliJ IDEA. - * User: chartl - * Date: Sep 9, 2009 - * Time: 4:45:21 PM - * To change this template use File | Settings | File Templates. - */ - -public class PooledGenotypeConcordance extends BasicVariantAnalysis implements PoolAnalysis, GenotypeAnalysis { - - private PooledConcordanceTable table; - private String[] hapmapNames; - - public PooledGenotypeConcordance( String pathToPoolFile ) { - super("Pooled_Genotype_Concordance"); - if( pathToPoolFile == null ) { - table = null; - hapmapNames = null; - } else { - generateNameTableFromFile( pathToPoolFile ); - table = new PooledConcordanceTable( hapmapNames.length ); - } - } - - public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { - String s = table.updateTable(tracker, ref, eval, hapmapNames); - if ( s == null ) { - return null; - } else { - return s + " " + context.getLocation().toString(); - } - } - - public List done() { - return table.standardOutput(); - } - - // private methods for reading in names from a file - - private void generateNameTableFromFile(String file) { - BufferedReader reader; - try { - reader = new BufferedReader(new FileReader(file)); - } catch( FileNotFoundException e) { - String errMsg = "Hapmap pool file at "+file+" was not found. Please check filepath."; - throw new StingException(errMsg, e); - } - - LinkedList nameList = new LinkedList(); - - while(continueReading(reader)) { - String line = readLine(reader); - nameList.add(line); - } - - hapmapNames = nameList.toArray(new String[nameList.size()]); - } - - private boolean continueReading(BufferedReader reader) { - boolean continueReading; - try { - continueReading = reader.ready(); - } catch(IOException e) { - continueReading = false; - } - return continueReading; - } - - private String readLine(BufferedReader reader) { - String line; - try { - line = reader.readLine(); - } catch( IOException e) { - String errMsg = "BufferedReader pointing to "+reader.toString()+" was declared ready but no line could be read from it."; - throw new StingException(errMsg,e); - } - return line; - } - -} - -class PooledConcordanceTable { - private final int CALL_INDECES = 6; - private final int TRUTH_INDECES = 4; - private final int NO_CALL = 5; - private final int REF_CALL = 0; // synonym - private final int VARIANT_CALL_NONHAPMAP = 1; - private final int VARIANT_CALL_MATCH = 2; - private final int VARIANT_CALL = 2; // re-using index - private final int VARIANT_CALL_MISMATCH = 3; - private final int VARIANT_CALL_UNKNOWN = 4; - private final int NO_TRUTH_DATA = 0; - private final int TRUTH_REF = 1; - private final int TRUTH_VAR = 2; - private final int TRUTH_UNKNOWN = 3; - - private int[][] table; - private int[][][] tableByHMFrequency; - private int variantCallsAtRefN; - private int poolSize; - - public PooledConcordanceTable(int poolSize) { - this.poolSize = poolSize; - table = new int[TRUTH_INDECES][CALL_INDECES]; - tableByHMFrequency = new int[calculateNumFrequencyIndeces(poolSize)][TRUTH_INDECES][CALL_INDECES]; - variantCallsAtRefN = 0; - - for ( int j = 0; j < TRUTH_INDECES; j ++ ) { - for ( int k = 0; k < CALL_INDECES; k ++ ) { - table[j][k] = 0; - for( int i = 0; i < calculateNumFrequencyIndeces(poolSize); i ++ ) { - tableByHMFrequency[i][j][k] = 0; - } - } - } - } - - public String updateTable(RefMetaDataTracker tracker, char ref, Variation eval, String[] names) { - int truthIndex, callIndex, frequencyIndex = -1; - List chips = getChips(names, tracker); - if ( ref == 'N' || ref == 'n' ) { - variantCallsAtRefN += ( eval == null ) ? 0 : 1; - truthIndex = NO_TRUTH_DATA; // don't want calls on Ns to factor in to calculation - callIndex = NO_CALL; // don't want calls on Ns to factor in to calculation - } else if( chips.isEmpty() ) { - truthIndex = NO_TRUTH_DATA; - if ( eval == null ) { - callIndex = NO_CALL; - } else { - callIndex = ( pooledCallIsRef( eval, ref ) ) ? REF_CALL : VARIANT_CALL_NONHAPMAP; - } - } else { - frequencyIndex = calcVariantFrequencyIndex(ref, chips); - - if ( freqIndexToFrequency( frequencyIndex ) != 0 ) { - truthIndex = TRUTH_VAR; - } else if ( chips.size() == poolSize ) { - truthIndex = TRUTH_REF; - } else { - truthIndex = TRUTH_UNKNOWN; - } - - if ( eval == null ) { - callIndex = NO_CALL; - } else { - if ( pooledCallIsRef( eval, ref ) ) { - callIndex = REF_CALL; - } else if ( truthIndex == TRUTH_REF ) { - callIndex = VARIANT_CALL; - } else if (truthIndex == TRUTH_UNKNOWN) { - callIndex = VARIANT_CALL_UNKNOWN; - } else if ( mismatchingCalls( eval, chips, ref ) ) { - callIndex = VARIANT_CALL_MISMATCH; - } else { - callIndex = VARIANT_CALL_MATCH; - } - } - - tableByHMFrequency[frequencyIndex][truthIndex][callIndex] ++; // note that this updates only on HAPMAP sites. This is good. - } - - table[truthIndex][callIndex] ++; - - String interest; - - if ( truthIndex == TRUTH_VAR && ( callIndex == NO_CALL || callIndex == REF_CALL ) ) { - interest = "False_Negative_with_frequency: "+ Double.toString(freqIndexToFrequency(frequencyIndex)); - } else if ( truthIndex == TRUTH_REF && ( callIndex == VARIANT_CALL )) { - interest = "False_Positive"; - } else if ( callIndex == VARIANT_CALL_MISMATCH ) { - interest = "Mismatching_call"; - } else { - interest = null; - } - - return interest; - } - - public List getChips(String[] names, RefMetaDataTracker tracker) { - LinkedList chips = new LinkedList(); - for ( String name : names ) { - Variation chip = (Variation) tracker.lookup(name,null); - if ( chip != null ) { - chips.add(chip); - } - } - - return chips; - } - - public boolean pooledCallIsRef(Variation eval, char ref) { - // code broken out for easy alteration when we start using pool-specific variations - return Utils.join("",eval.getAlleleList()).equalsIgnoreCase((Utils.dupString(ref,2))); - } - - public int calculateNumFrequencyIndeces(int poolSize) { - // code broken out for easy alteration when we start using pool-specific variations - return 2*(poolSize+1); - } - - public int calcVariantFrequencyIndex( char ref, List evals ) { - return frequencyToFrequencyIndex( calcVariantFrequency( evals, ref ) ); - } - - public int frequencyToFrequencyIndex( double frequency ) { - // code broken out for easy alteration when we start using pool-specific variations - return (int) frequency; - } - - public double calcVariantFrequency( List evals, char ref ) { - // code broken out for easy alteration when we start using pool-specific variations - Variation firstEval = evals.get(0); - double alternateFrequency = 0.0; - for ( Variation eval : evals ) { - if ( mismatchingCalls(firstEval, eval, ref) ) { - // todo -- make this not a StingException but go to the log - throw new StingException("Tri-Allelic Position "+Utils.join("",eval.getAlleleList())+"/"+Utils.join("",firstEval.getAlleleList()) + " Ref: "+ ref + " not supported"); - } else { - alternateFrequency += calledVariantFrequency(eval,ref); - } - } - return alternateFrequency; - } - - public boolean mismatchingCalls(Variation var1, List vars, char ref) { - ListIterator varIter = vars.listIterator(); - try { - while( pooledCallIsRef( (Variation) varIter.next(), ref ) ) { - // don't do squat! - } - } catch (NoSuchElementException e) { - String errMsg = "Comparison data given to mismatchingCalls(Variation, List, char) was entirely reference. This is a bug and should never happen."; - throw new StingException(errMsg, e); - } - - return mismatchingCalls( var1, (Variation) varIter.previous(), ref); - } - - public boolean mismatchingCalls(Variation eval, Variation chip, char ref) { - // eval and chip guaranteed to be non-null - char chipF = Utils.stringToChar(chip.getAlleleList().get(0)); - char chipS = Utils.stringToChar(chip.getAlleleList().get(1)); - char evalF = Utils.stringToChar(eval.getAlleleList().get(0)); - char evalS = Utils.stringToChar(eval.getAlleleList().get(1)); - boolean mismatch; - if (chipF == ref) { - if ( chipS == ref ) { - mismatch = false; // no mismatch against hom ref - } else { - mismatch = ( evalF != chipS && evalS != chipS ); // test against het nonref - } - } else if ( chipS == ref ) { // het nonref (somehow made nonref|ref rather than ref|nonref) - mismatch = ( evalF != chipF && evalS != chipF ); // test against het nonref - } else { // chip is homozygous nonreference - if( evalF == ref ) { - mismatch = ( evalS != chipF && evalS != chipS ); // make sure the nonref call of eval matches one of the chip alleles - } else if ( evalS == ref ) { - mismatch = ( evalF != chipF && evalF != chipS ); // make sure the nonref call of eval matches one of the chip alleles - } else { - // both are hom nonref - mismatch = ( evalF != chipF || evalS != chipS); // could be multiallelic calls - } - } - return mismatch; - } - - public double calledVariantFrequency( Variation var, char ref ) { - // code broken out for easy alteration when we start using pool-specific variations - String varStr = Utils.join("",var.getAlleleList()); - double freq; - if ( varStr.charAt(0) != ref && varStr.charAt(1) != ref ) { - freq = (double) 2; - } else if ( varStr.charAt(0) == ref && varStr.charAt(1) == ref ) { - freq = (double) 0; - } else { - freq = (double) 1; - } - - return freq; - } - - public double freqIndexToFrequency( int freqIndex ) { - // code broken out for easy alteration when we start using pool-specific variations - return (double) freqIndex; - } - - public List standardOutput() { - LinkedList out = new LinkedList(); - int nSNPCallSites = 0; - int nHapmapSites = 0; - int nHapmapSNPSites = 0; - int nFullHapmapRefSites = 0; - int nUnknownHapmapSites = 0; - - for( int truthIndex = 0; truthIndex < TRUTH_INDECES; truthIndex ++ ) { - nSNPCallSites += table[truthIndex][VARIANT_CALL_MATCH] + table[truthIndex][VARIANT_CALL_MISMATCH] + table[truthIndex][VARIANT_CALL_NONHAPMAP]; - nSNPCallSites += table[truthIndex][VARIANT_CALL_UNKNOWN]; - } - - for ( int callIndex = 0; callIndex < CALL_INDECES; callIndex ++ ) { - nHapmapSites += table[TRUTH_REF][callIndex] + table[TRUTH_VAR][callIndex] + table[TRUTH_UNKNOWN][callIndex]; - nUnknownHapmapSites += table[TRUTH_UNKNOWN][callIndex]; - nHapmapSNPSites += table[TRUTH_VAR][callIndex]; - nFullHapmapRefSites += table[TRUTH_REF][callIndex]; - } - - int nRefsCalledCorrectly = table[TRUTH_REF][REF_CALL]; - int nHapmapRefsNotCalled = table[TRUTH_REF][NO_CALL]; - int nRefsCalledAsSNP = table[TRUTH_REF][VARIANT_CALL]; - int nSNPsCalledCorrectly = table[TRUTH_VAR][VARIANT_CALL_MATCH]; - int nSNPsCalledIncorrectly = table[TRUTH_VAR][VARIANT_CALL_MISMATCH]; - int nSNPsCalledAsRef = table[TRUTH_VAR][REF_CALL]; - int nHapmapSNPsNotCalled = table[TRUTH_VAR][NO_CALL]; - int nSNPsOnHapmapSNP = table[TRUTH_VAR][VARIANT_CALL_MATCH] + table[TRUTH_VAR][VARIANT_CALL_MISMATCH]; - int nSNPsAtNonHapmap = table[NO_TRUTH_DATA][VARIANT_CALL_NONHAPMAP]; - int nSNPsAtHapmapWithRefDataOnSubsetOfIndividuals = table[TRUTH_UNKNOWN][VARIANT_CALL_UNKNOWN]; - - out.add(String.format("Total Number of SNP Calls:\t\t%d", nSNPCallSites)); - out.add(String.format("Total SNP calls on non-Hapmap sites\t\t%d", nSNPsAtNonHapmap)); - out.add(String.format("Number of Hapmap Sites:\t\t%d", nHapmapSites)); - out.add("Data on Hapmap Reference Sites"); - out.add(String.format("\tSites where all Hapmap chips were ref: \t\t%d", nFullHapmapRefSites)); - out.add(String.format("\t\tReference sites correctly called:\t\t%d\t(%d%%)", nRefsCalledCorrectly, divideToPercent(nRefsCalledCorrectly, nFullHapmapRefSites))); - out.add(String.format("\t\tReference sites called as variant:\t\t%d\t(%d%%)", nRefsCalledAsSNP, divideToPercent(nRefsCalledAsSNP, nFullHapmapRefSites))); - out.add(String.format("\t\tReference sites not confidently called SNP:\t%d\t(%d%%)", nHapmapRefsNotCalled, divideToPercent(nHapmapRefsNotCalled, nFullHapmapRefSites))); - out.add(String.format("\tSites where all seen Hapmap chips were ref, but not all Hapmap chips available: %d", nUnknownHapmapSites)); - out.add(String.format("\t\tPutative reference sites called ref:\t\t\t%d\t(%d%%)", table[TRUTH_UNKNOWN][REF_CALL], divideToPercent(table[TRUTH_UNKNOWN][REF_CALL], nUnknownHapmapSites))); - out.add(String.format("\t\tPutative reference sites called SNP:\t\t\t%d\t(%d%%)", nSNPsAtHapmapWithRefDataOnSubsetOfIndividuals, divideToPercent(nSNPsAtHapmapWithRefDataOnSubsetOfIndividuals, nUnknownHapmapSites))); - out.add(String.format("\t\tPutative reference sites not confidently called SNP:\t%d\t(%d%%)", table[TRUTH_UNKNOWN][NO_CALL], divideToPercent(table[TRUTH_UNKNOWN][NO_CALL], nUnknownHapmapSites))); - out.add("Data on Hapmap Variant Sites"); - out.add(String.format("\tNumber of Hapmap SNP Sites:\t\t\t%d", nHapmapSNPSites)); - out.add(String.format("\t\tSNP sites incorrectly called ref:\t%d\t(%d%%)", nSNPsCalledAsRef, divideToPercent(nSNPsCalledAsRef, nHapmapSNPSites))); - out.add(String.format("\t\tSNP sites not confidently called:\t%d\t(%d%%)", nHapmapSNPsNotCalled, divideToPercent(nHapmapSNPsNotCalled, nHapmapSNPSites))); - out.add(String.format("\tSNP calls on Hapmap SNP Sites:\t\t%d", nSNPsOnHapmapSNP)); - out.add(String.format("\t\tSNP sites correctly called SNP:\t%d\t(%d%%)", nSNPsCalledCorrectly, divideToPercent(nSNPsCalledCorrectly, nSNPsOnHapmapSNP))); - out.add(String.format("\t\tSNP sites called a different base:\t%d\t(%d%%)", nSNPsCalledIncorrectly, divideToPercent(nSNPsCalledIncorrectly, nSNPsOnHapmapSNP))); - out.add(String.format("Calls on reference N:\t\t%d", variantCallsAtRefN)); - out.add("----------------------- Output By Allele Frequency ------------------------"); - out.add(""); - out.add("FREQUENCY \tFALSE_POSITIVES\tTRUE_NEGATIVES\tFALSE_NEGATIVES\tTRUE_POSITIVES\tMISCALLS\tNO_CALLS\tFALSE_NEGATIVE_RATE\tFALSE_POSITIVE_RATE"); - for( int i = 0; i < getLargestOutputAlleleFrequencyIndex(); i ++) { - double freq = freqIndexToFrequency(i); - int nRefsCalledAsSNPFreq = tableByHMFrequency[i][TRUTH_REF][VARIANT_CALL]; - int nRefsCalledCorrectFreq = tableByHMFrequency[i][TRUTH_REF][REF_CALL]; - int nSNPsCalledIncorrectlyFreq = tableByHMFrequency[i][TRUTH_VAR][VARIANT_CALL_MISMATCH]; - int nSNPsCalledCorrectlyFreq = tableByHMFrequency[i][TRUTH_VAR][VARIANT_CALL_MATCH]; - int nSNPsCalledAsRefFreq = tableByHMFrequency[i][TRUTH_VAR][REF_CALL]; - int nNoCallsAtFreq = tableByHMFrequency[i][TRUTH_VAR][NO_CALL] + tableByHMFrequency[i][TRUTH_REF][NO_CALL]; - int nSnpsCalledAtFreq = nSNPsCalledIncorrectlyFreq + nSNPsCalledCorrectlyFreq + nSNPsCalledAsRefFreq; - int nSNPsAtFreq = 0; - for( int j = 0; j < CALL_INDECES; j ++ ) { - nSNPsAtFreq += tableByHMFrequency[i][TRUTH_VAR][j]; - } - int nSNPsNoCallFreq = tableByHMFrequency[i][TRUTH_VAR][NO_CALL]; - double fnrate = ((double) (nSNPsNoCallFreq + nSNPsCalledAsRefFreq) / (nSNPsAtFreq)); - double fprate = ((double) nRefsCalledAsSNPFreq / (nRefsCalledAsSNPFreq + nRefsCalledCorrectFreq + tableByHMFrequency[i][TRUTH_REF][NO_CALL])); - out.add(String.format("%f\t%d\t\t%d\t\t%d\t\t\t%d\t%d\t\t%d\t\t%f\t\t\t%f", - freq, nRefsCalledAsSNPFreq, nRefsCalledCorrectFreq, nSNPsCalledAsRefFreq, nSNPsCalledCorrectlyFreq, nSNPsCalledIncorrectlyFreq, nNoCallsAtFreq, fnrate, fprate)); - } - - return out; - } - - public int getLargestOutputAlleleFrequencyIndex() { - // fixed - returns the last index at which genotype sites appear - - int freqIndex = -1; - for ( int j = 0; j < calculateNumFrequencyIndeces(poolSize); j ++ ) { - int nHapmapSitesAtFreq = 0; - for ( int i = 0; i < CALL_INDECES; i ++) { - nHapmapSitesAtFreq += table[TRUTH_REF][i] + table[TRUTH_VAR][i] + table[TRUTH_UNKNOWN][i]; - } - - if ( nHapmapSitesAtFreq > 0 ) { - freqIndex = j; - } - } - - return freqIndex; - } - - public int divideToPercent( int numerator, int denominator ) { - return (int) Math.floor(100.0*((double) numerator)/ denominator ); - } -} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java index f5aa7e2ec..ae6de1fc0 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java @@ -130,15 +130,16 @@ public class VariantEvalWalker extends RefWalker { // analyses.add(new ValidationDataAnalysis()); - // todo -- chris remove? - //analyses.add(new PooledGenotypeConcordance(samplesFile)); analyses.add(new VariantCounter()); analyses.add(new VariantDBCoverage(knownSNPDBName)); //analyses.add(new PooledFrequencyAnalysis(numPeopleInPool,knownSNPDBName)); if ( samplesFile == null ) analyses.add(new GenotypeConcordance(genotypeChipName, false)); - else - analyses.add(new GenotypeConcordance(samplesFile, true)); + else if ( numPeopleInPool < 1) + analyses.add(new GenotypeConcordance(samplesFile, true)); + else { + analyses.add(new PooledConcordance(samplesFile,true)); + } analyses.add(new TransitionTranversionAnalysis()); analyses.add(new NeighborDistanceAnalysis()); analyses.add(new HardyWeinbergEquilibrium(badHWEThreshold));