diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledGenotypeConcordance.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledGenotypeConcordance.java index c23d1de68..6ce10b316 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledGenotypeConcordance.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledGenotypeConcordance.java @@ -8,11 +8,8 @@ import org.broadinstitute.sting.utils.genotype.Variation; import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; import org.broadinstitute.sting.utils.genotype.DiploidGenotype; -import java.io.BufferedReader; -import java.io.FileReader; -import java.io.IOException; -import java.util.LinkedList; -import java.util.StringTokenizer; +import java.io.*; +import java.util.*; /** * Created by IntelliJ IDEA. @@ -21,171 +18,43 @@ import java.util.StringTokenizer; * Time: 4:45:21 PM * To change this template use File | Settings | File Templates. */ - -//todo -- onTraversalDone - + public class PooledGenotypeConcordance extends BasicVariantAnalysis implements GenotypeAnalysis { - private String[] individuals = null; - private static final boolean DEBUG = true; + private PooledConcordanceTable table; + private String[] hapmapNames; - private static final int REF = 0; - private static final int VAR_MATCH = 1; - private static final int VAR_HET = 1; - private static final int VAR_MISMATCH = 2; - private static final int VAR_HOM = 2; - private static final int UNKNOWN = 3; - private static final int NO_CALL = 3; // synonym - private static final String[] TRUTH_NAMES = {"IS_REF", "IS_SINGLE_VARIANT", "IS_MULTIPLE_VARIANT", "UNKNOWN"}; - private static final String[] CALL_NAMES = {"CALLED_REF", "CALLED_SINGLE_VARIANT", "CALLED_MULTIPLE_VARIANT", "NO_CALL"}; - - private int[][][] tableByIndividual; - private int[][] truthTotalsByIndividual; - private int[][] callTotalsByIndividual; - - public PooledGenotypeConcordance(String pathToHapmapPoolFile) { - super("genotype_concordance"); - if(pathToHapmapPoolFile == null) { - // do nothing + public PooledGenotypeConcordance( String pathToPoolFile ) { + super("Pooled Genotype Concordance"); + if( pathToPoolFile == null ) { + table = null; + hapmapNames = null; } else { - BufferedReader fileReader = null; - try { - fileReader = new BufferedReader(new FileReader(pathToHapmapPoolFile)); - } catch (IOException e) { - String errorMsg = "Could not open any file at " + pathToHapmapPoolFile + " Please check to see if the path is accurate."; - throw new StingException(errorMsg, e); - } - - generateNameTableFromFile(fileReader); - - if(DEBUG) { - printAllToCheck(individuals); - } - - truthTotalsByIndividual = new int[individuals.length][4]; - callTotalsByIndividual = new int[individuals.length][4]; - tableByIndividual = new int[individuals.length][4][4]; - for(int individual = 0; individual < individuals.length; individual ++) { - for(int call1 = 0; call1 < 4; call1++) { - for(int call2 = 0; call2 < 4; call2++) - { - tableByIndividual[individual][call1][call2] = 0; - } - callTotalsByIndividual[individual][call1] = 0; - truthTotalsByIndividual[individual][call1] = 0; - } - } + generateNameTableFromFile( pathToPoolFile ); + table = new PooledConcordanceTable( hapmapNames.length ); } } - public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context){ - if(DEBUG && context != null) { - System.out.println( "Update Called at location: " + context.getLocation().toString() ); - } else if (DEBUG) { - System.out.println("Update Called, but no alignment context was given."); - } - for( int nameOffset = 0; nameOffset < individuals.length; nameOffset++ ) { - inc(eval, tracker, ref, context, nameOffset); - } + public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { + table.updateTable(tracker, ref, eval, hapmapNames); return null; } - - public void inc(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context, int nameOffset) { - Variation chip = (Variation) tracker.lookup(individuals[nameOffset],null); - if ((chip != null && !(chip instanceof VariantBackedByGenotype) || (eval != null && !(eval instanceof VariantBackedByGenotype)))) { - String errMsg = "Trying to compare non-pooled data or non-genotype data."; - throw new StingException(errMsg); - } - - DiploidGenotype g = DiploidGenotype.valueOf(Utils.dupString(ref,2)); - - if(DEBUG) { - System.out.print(incDebugString(nameOffset, chip, eval, ref)); - } - - // todo -- is this how want to do pooled concordance checking? - int truthIndex, callIndex; - if( chip == null ) { - truthIndex = UNKNOWN; - } else if ( chip.getAlternateBases().equals(g.toString()) ) { - truthIndex = REF; - } else if ( chip.getAlternateBases().charAt(0) != chip.getAlternateBases().charAt(1)) { - truthIndex = VAR_HET; - } else { - truthIndex = VAR_HOM; - } - - if( eval == null ) { - callIndex = UNKNOWN; - } else if ( eval.getAlternateBases().equals(g.toString()) ) { - callIndex = REF; - } else if ( callWrongBase(eval,chip,ref) ) { - callIndex = VAR_MISMATCH; - } else { - callIndex = VAR_MATCH; - } - - if( chip != null || eval != null ) { - tableByIndividual[nameOffset][truthIndex][callIndex]++; - truthTotalsByIndividual[nameOffset][truthIndex]++; - if ( callIndex != NO_CALL ) { - callTotalsByIndividual[nameOffset][callIndex]++; - } - } - - if(DEBUG) { - System.out.printf("TruthIndex: %d CallIndex: %d%n", truthIndex, callIndex); - } - - - + public List done() { + return table.standardOutput(); } - public boolean isCorrectVariantType(Variation eval) { - // todo -- this. Check if eval is a TypeOf some ROD class that's the right pooled call output that we - // todo -- want to deal with. + // private methods for reading in names from a file - return true; - } - - public boolean callWrongBase(Variation eval, Variation chip, char ref) { - boolean wrongCall; - if ( chip == null ) { - wrongCall = true; - } else if ( chip.getAlternateBases().charAt(0) == chip.getAlternateBases().charAt(1) ) { // homozygous nonref - if( eval.getAlternateBases().charAt(0) == chip.getAlternateBases().charAt(0) || - eval.getAlternateBases().charAt(1) == chip.getAlternateBases().charAt(0) ) { - wrongCall = false; - } else { - wrongCall = true; - } - } else if ( chip.getAlternateBases().charAt(0) == ref || chip.getAlternateBases().charAt(1) == ref ) { // het nonref - wrongCall = ( getNonrefBase(eval,ref) != getNonrefBase(chip,ref) ); - } else { // todo -- what do we really want to do if ref is C, but chip is AG ?? - wrongCall = true; - } - return wrongCall; - } - - public char getNonrefBase(Variation var, char ref) { - char nonRefBase; - if( var.getAlternateBases().charAt(0) == ref ) { - nonRefBase = var.getAlternateBases().charAt(1); - } else { - nonRefBase = var.getAlternateBases().charAt(0); + private void generateNameTableFromFile(String file) { + BufferedReader reader; + try { + reader = new BufferedReader(new FileReader(file)); + } catch( FileNotFoundException e) { + String errMsg = "Hapmap pool file at "+file+" was not found. Please check filepath."; + throw new StingException(errMsg, e); } - return nonRefBase; - } - - - - // - // private methods for reading names into individualsByPool from an external file - // - - private void generateNameTableFromFile(BufferedReader reader) { LinkedList nameList = new LinkedList(); while(continueReading(reader)) { @@ -193,11 +62,11 @@ public class PooledGenotypeConcordance extends BasicVariantAnalysis implements G nameList.add(line); } - individuals = nameList.toArray(new String[nameList.size()]); + hapmapNames = nameList.toArray(new String[nameList.size()]); } private boolean continueReading(BufferedReader reader) { - boolean continueReading = false; + boolean continueReading; try { continueReading = reader.ready(); } catch(IOException e) { @@ -217,32 +86,246 @@ public class PooledGenotypeConcordance extends BasicVariantAnalysis implements G return line; } - // code strictly for debug - - private void printAllToCheck(String[] blah) { - System.out.println("Checking:"); - for( String s : blah) { - System.out.println(s); - } - } - - private String incDebugString(int nameOffset, Variation chip, Variation eval, char ref) { - String truth; - if(chip == null) { - truth = "NoChip"; - } else { - truth = chip.getAlternateBases(); - } - - String call; - if(eval == null) { - call = "NoCall"; - } else { - call = eval.getAlternateBases(); - } - return String.format("Person: %s Ref: %s Truth: %s Call: %s%n", - individuals[nameOffset], Character.toString(ref), truth, - call); - } - +} + +class PooledConcordanceTable { + private final int CALL_INDECES = 5; + private final int TRUTH_INDECES = 4; + private final int NO_CALL = 0; + private final int REF_CALL = 0; // synonym + private final int VARIANT_CALL_NONHAPMAP = 1; + private final int VARIANT_CALL_MATCH = 2; + private final int VARIANT_CALL = 2; // re-using index + private final int VARIANT_CALL_MISMATCH = 3; + private final int VARIANT_CALL_UNKNOWN = 4; + private final int NO_TRUTH_DATA = 0; + private final int TRUTH_REF = 1; + private final int TRUTH_VAR = 2; + private final int TRUTH_UNKNOWN = 3; + + private int[][] table; + private int[][][] tableByHMFrequency; + private int variantCallsAtRefN; + private int poolSize; + + public PooledConcordanceTable(int poolSize) { + this.poolSize = poolSize; + table = new int[TRUTH_INDECES][CALL_INDECES]; + tableByHMFrequency = new int[calculateNumFrequencyIndeces(poolSize)][TRUTH_INDECES][CALL_INDECES]; + variantCallsAtRefN = 0; + + for ( int j = 0; j < TRUTH_INDECES; j ++ ) { + for ( int k = 0; k < CALL_INDECES; k ++ ) { + table[j][k] = 0; + for( int i = 0; i < calculateNumFrequencyIndeces(poolSize); i ++ ) { + tableByHMFrequency[i][j][k] = 0; + } + } + } + } + + public void updateTable(RefMetaDataTracker tracker, char ref, Variation eval, String[] names) { + int truthIndex, callIndex, frequencyIndex; + List chips = getChips(names, tracker); + if ( ref == 'N' || ref == 'n' ) { + variantCallsAtRefN += ( eval == null ) ? 0 : 1; + truthIndex = NO_TRUTH_DATA; // don't want calls on Ns to factor in to calculation + callIndex = NO_CALL; // don't want calls on Ns to factor in to calculation + } else if( chips.isEmpty() ) { + truthIndex = NO_TRUTH_DATA; + if ( eval == null ) { + callIndex = NO_CALL; + } else { + callIndex = ( pooledCallIsRef( eval, ref ) ) ? REF_CALL : VARIANT_CALL_NONHAPMAP; + } + } else { + frequencyIndex = calcVariantFrequencyIndex(ref, chips); + + if ( freqIndexToFrequency( frequencyIndex ) != 0 ) { + truthIndex = TRUTH_VAR; + } else if ( chips.size() == poolSize ) { + truthIndex = TRUTH_REF; + } else { + truthIndex = TRUTH_UNKNOWN; + } + + if ( eval == null ) { + callIndex = NO_CALL; + } else { + if ( pooledCallIsRef( eval, ref ) ) { + callIndex = REF_CALL; + } else if (truthIndex == TRUTH_REF ) { + callIndex = VARIANT_CALL; + } else if (truthIndex == TRUTH_UNKNOWN) { + callIndex = VARIANT_CALL_UNKNOWN; + } else if ( mismatchingCalls( eval, chips, ref ) ) { + callIndex = VARIANT_CALL_MISMATCH; + } else { + callIndex = VARIANT_CALL_MATCH; + } + } + + tableByHMFrequency[frequencyIndex][truthIndex][callIndex] ++; // note that this updates only on HAPMAP sites. This is good. + } + + table[truthIndex][callIndex] ++; + } + + public List getChips(String[] names, RefMetaDataTracker tracker) { + LinkedList chips = new LinkedList(); + for ( String name : names ) { + Variation chip = (Variation) tracker.lookup(name,null); + if ( chip != null ) { + chips.add(chip); + } + } + + return chips; + } + + public boolean pooledCallIsRef(Variation eval, char ref) { + // code broken out for easy alteration when we start using pool-specific variations + return eval.getAlternateBases().equalsIgnoreCase((Utils.dupString(ref,2))); + } + + public int calculateNumFrequencyIndeces(int poolSize) { + // code broken out for easy alteration when we start using pool-specific variations + return 2*(poolSize+1); + } + + public int calcVariantFrequencyIndex( char ref, List evals ) { + return frequencyToFrequencyIndex( calcVariantFrequency( evals, ref ) ); + } + + public int frequencyToFrequencyIndex( double frequency ) { + // code broken out for easy alteration when we start using pool-specific variations + return (int) frequency; + } + + public double calcVariantFrequency( List evals, char ref ) { + // code broken out for easy alteration when we start using pool-specific variations + Variation firstEval = evals.get(0); + double alternateFrequency = 0.0; + for ( Variation eval : evals ) { + if ( mismatchingCalls(firstEval, eval, ref) ) { + // todo -- make this not a StingException but go to the log + throw new StingException("Tri-Allelic Position "+eval.getAlternateBases()+"/"+firstEval.getAlternateBases() + " Ref: "+ ref + " not supported"); + } else { + alternateFrequency += calledVariantFrequency(eval,ref); + } + } + return alternateFrequency; + } + + public boolean mismatchingCalls(Variation var1, List vars, char ref) { + ListIterator varIter = vars.listIterator(); + try { + while( pooledCallIsRef( (Variation) varIter.next(), ref ) ) { + // don't do squat! + } + } catch (NoSuchElementException e) { + String errMsg = "Comparison data given to mismatchingCalls(Variation, List, char) was entirely reference. This is a bug and should never happen."; + throw new StingException(errMsg, e); + } + + return mismatchingCalls( var1, (Variation) varIter.previous(), ref); + } + + public boolean mismatchingCalls(Variation eval, Variation chip, char ref) { + // eval and chip guaranteed to be non-null + char chipF = chip.getAlternateBases().charAt(0); + char chipS = chip.getAlternateBases().charAt(1); + char evalF = chip.getAlternateBases().charAt(0); + char evalS = chip.getAlternateBases().charAt(1); + boolean mismatch; + if (chipF == ref) { + if ( chipS == ref ) { + mismatch = false; // no mismatch against hom ref + } else { + mismatch = ( evalF != chipS && evalS != chipS ); // test against het nonref + } + } else if ( chipS == ref ) { + mismatch = ( evalF != chipF && evalS != chipF ); // test against het nonref + } else { + if( evalF == ref ) { + mismatch = ( evalS != chipF && evalS != chipF ); // test against hom nonref + } else if ( evalS == ref ) { + mismatch = ( evalF != chipF && evalF != chipF ); // test against hom nonref + } else { + // both are hom nonref + mismatch = ( evalF != chipF ); + } + } + return mismatch; + } + + public double calledVariantFrequency( Variation var, char ref ) { + // code broken out for easy alteration when we start using pool-specific variations + String varStr = var.getAlternateBases(); + double freq; + if ( varStr.charAt(0) != ref && varStr.charAt(1) != ref ) { + freq = (double) 2; + } else if ( varStr.charAt(0) == ref && varStr.charAt(1) == ref ) { + freq = (double) 0; + } else { + freq = (double) 1; + } + + return freq; + } + + public double freqIndexToFrequency( int freqIndex ) { + // code broken out for easy alteration when we start using pool-specific variations + return (double) freqIndex; + } + + public List standardOutput() { + LinkedList out = new LinkedList(); + int nSNPCallSites = 0; + int nHapmapSites = 0; + int nHapmapSNPSites = 0; + int nFullHapmapRefSites = 0; + int nUnknownHapmapSites = 0; + + for( int truthIndex = 0; truthIndex < TRUTH_INDECES; truthIndex ++ ) { + nSNPCallSites += table[truthIndex][VARIANT_CALL_MATCH] + table[truthIndex][VARIANT_CALL_MISMATCH] + table[truthIndex][VARIANT_CALL_NONHAPMAP]; + nSNPCallSites += table[truthIndex][VARIANT_CALL_UNKNOWN]; + } + + for ( int callIndex = 0; callIndex < CALL_INDECES; callIndex ++ ) { + nHapmapSites += table[TRUTH_REF][callIndex] + table[TRUTH_VAR][callIndex] + table[TRUTH_UNKNOWN][callIndex]; + nUnknownHapmapSites += table[TRUTH_UNKNOWN][callIndex]; + nHapmapSNPSites += table[TRUTH_VAR][callIndex]; + nFullHapmapRefSites += table[TRUTH_REF][callIndex]; + } + + int nRefsCalledCorrectly = table[TRUTH_REF][NO_CALL]; + int nRefsCalledAsSNP = table[TRUTH_REF][VARIANT_CALL]; + int nSNPsCalledCorrectly = table[TRUTH_VAR][VARIANT_CALL_MATCH]; + int nSNPsCalledIncorrectly = table[TRUTH_VAR][VARIANT_CALL_MISMATCH]; + int nSNPsCalledAsRef = table[TRUTH_VAR][REF_CALL]; + int nSNPsOnHapmapSNP = table[TRUTH_VAR][VARIANT_CALL_MATCH] + table[TRUTH_VAR][VARIANT_CALL_MISMATCH]; + int nSNPsAtNonHapmap = table[NO_TRUTH_DATA][VARIANT_CALL_NONHAPMAP]; + int nSNPsAtHapmapWithRefDataOnSubsetOfIndividuals = table[TRUTH_UNKNOWN][VARIANT_CALL_UNKNOWN]; + + out.add(String.format("Total Number of SNP Calls: %d", nSNPCallSites)); + out.add(String.format("Total SNP calls on non-Hapmap sites %d", nSNPsAtNonHapmap)); + out.add(String.format("Number of Hapmap Sites: %d", nHapmapSites)); + out.add("Data on Hapmap Reference Sites"); + out.add(String.format("\tSites where all Hapmap chips were ref: %d", nFullHapmapRefSites)); + out.add(String.format("\t\tReference sites correctly called: %d (%f)", nRefsCalledCorrectly, ((double)nRefsCalledCorrectly/nFullHapmapRefSites))); + out.add(String.format("\t\tReference sites called as variant: %d (%f)", nRefsCalledAsSNP, ((double)nRefsCalledAsSNP/nFullHapmapRefSites))); + out.add(String.format("\tSites where all seen Hapmap chips were ref, but not all Hapmap chips available: %d", nUnknownHapmapSites)); + out.add(String.format("\t\tPutative reference sites called ref: %d (%f)", table[TRUTH_UNKNOWN][REF_CALL], ((double)table[TRUTH_UNKNOWN][REF_CALL]/nUnknownHapmapSites))); + out.add(String.format("\t\tPutative reference sites called SNP: %d (%f)", nSNPsAtHapmapWithRefDataOnSubsetOfIndividuals, ((double)nSNPsAtHapmapWithRefDataOnSubsetOfIndividuals/nUnknownHapmapSites))); + out.add("Data on Hapmap Variant Sites"); + out.add(String.format("\tNumber of Hapmap SNP Sites: %d", nHapmapSNPSites)); + out.add(String.format("\t\tSNP sites incorrectly called ref: %d (%f)", nSNPsCalledAsRef, ((double)nSNPsCalledAsRef/nHapmapSNPSites))); + out.add(String.format("\tSNP calls on Hapmap SNP Sites: %d", nSNPsOnHapmapSNP)); + out.add(String.format("\t\tSNP sites correctly called SNP: %d (%f)", nSNPsCalledCorrectly, ((double)nSNPsCalledCorrectly/nSNPsOnHapmapSNP))); + out.add(String.format("\t\tSNP sites called a different base: %d (%f)", nSNPsCalledIncorrectly, ((double)nSNPsCalledIncorrectly/nSNPsOnHapmapSNP))); + out.add(String.format("Calls on reference N: %d", variantCallsAtRefN)); + + return out; + } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledGenotypeConcordanceNew.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledGenotypeConcordanceNew.java index 97311128a..6e77ce6b7 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledGenotypeConcordanceNew.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledGenotypeConcordanceNew.java @@ -12,91 +12,13 @@ import java.io.*; import java.util.LinkedList; import java.util.List; -/** - * Created by IntelliJ IDEA. - * User: Ghost - * Date: Sep 15, 2009 - * Time: 6:06:37 PM - * To change this template use File | Settings | File Templates. - */ -public class PooledGenotypeConcordanceNew extends BasicVariantAnalysis { - - private String[] hapmapNames; - private DebugWriter debug; - private HapmapConcordanceTable table; - - public PooledGenotypeConcordanceNew(String pathToPoolFile, String pathToDebugFile, int verbosity) { - super("Pooled Genotype Concordance"); - if( pathToPoolFile == null ) { - debug = new DebugWriter(); - } else { - if( pathToDebugFile != null ) { - debug = new DebugWriter( pathToDebugFile, verbosity ); - } - generateNameTableFromFile( pathToPoolFile ); - table = new HapmapConcordanceTable( hapmapNames.length ); - } - } - - public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { - table.updateTable(tracker, ref, hapmapNames, eval, context.getLocation(), debug); - return null; - } - - public List done() { - return table.standardOutput(); - } - - // methods for reading pool information from file - - private void generateNameTableFromFile(String file) { - BufferedReader reader; - try { - reader = new BufferedReader(new FileReader(file)); - } catch( FileNotFoundException e) { - String errMsg = "Hapmap pool file at "+file+" was not found. Please check filepath."; - throw new StingException(errMsg, e); - } - - LinkedList nameList = new LinkedList(); - - while(continueReading(reader)) { - String line = readLine(reader); - nameList.add(line); - } - - hapmapNames = nameList.toArray(new String[nameList.size()]); - } - - private boolean continueReading(BufferedReader reader) { - boolean continueReading; - try { - continueReading = reader.ready(); - } catch(IOException e) { - continueReading = false; - } - return continueReading; - } - - private String readLine(BufferedReader reader) { - String line; - try { - line = reader.readLine(); - } catch( IOException e) { - String errMsg = "BufferedReader pointing to "+reader.toString()+" was declared ready but no line could be read from it."; - throw new StingException(errMsg,e); - } - return line; - } -} - -class HapmapConcordanceTable { +class HapmapConcordanceTableOld { private int[][][] truthTableByName; private int[][] poolCallConcordance; private int[][] callsByName; private int[][] truthByName; - private int absoluteFalseNegatives; + private int absoluteFalsePositives; private int sitesWithFullDataAndNoSNPs; private final int REF = 0; @@ -107,14 +29,15 @@ class HapmapConcordanceTable { private final int MATCH = 1; private final int MISMATCH = 2; private final int NO_CALL = 3; + private final int DE_NOVO_CALL = 3; //synonym private final int ONE_SNP = 0; private final int MULTI_SNP = 1; private final int DE_NOVO = 2; private final int TRUE_INT = 1; private final int FALSE_INT = 0; - public HapmapConcordanceTable( int numberOfPeople ) { - absoluteFalseNegatives = 0; + public HapmapConcordanceTableOld( int numberOfPeople ) { + absoluteFalsePositives = 0; sitesWithFullDataAndNoSNPs = 0; truthTableByName = new int[numberOfPeople][4][4]; callsByName = new int[numberOfPeople][4]; @@ -145,12 +68,12 @@ class HapmapConcordanceTable { int hapmapCoverage = 0; for ( int name = 0; name < names.length; name ++ ) { Variation chip = (Variation) tracker.lookup(names[name], null); - debug.printLocusInfo(eval, names[name], ref, chip, loc); SQuad variantAndCall = update(eval, name, chip, Character.toUpperCase(ref), debug); nHapmapVariants += variantAndCall.getFirst(); nCorrectCalls += variantAndCall.getSecond(); hapmapCoverage += variantAndCall.getThird(); - updateFalseNegatives(hapmapCoverage,nHapmapVariants,variantAndCall.getFourth()); + updateFalsePositives(hapmapCoverage,nHapmapVariants,variantAndCall.getFourth()); + debug.printLocusInfo(eval, names[name], ref, chip, loc, variantAndCall.getFourth()); } updatePoolCallConcordance(nHapmapVariants,nCorrectCalls); debug.printVerbose(String.format("%s Variants In Pool: %d Correctly Called: %d%n", loc.toString(), nHapmapVariants, nCorrectCalls)); @@ -187,12 +110,12 @@ class HapmapConcordanceTable { poolCallConcordance[hapmapAllelesIndex][callConcordanceIndex]++; } - public void updateFalseNegatives(int coverage, int nHapmapVariants, int callConcordanceIndex) { + public void updateFalsePositives(int coverage, int nHapmapVariants, int callConcordanceIndex) { if( coverage == truthByName.length ) { - if( nHapmapVariants == 0 ) { + if( nHapmapVariants == 0 ) { if( callConcordanceIndex != NO_CALL ) { if (callConcordanceIndex != NOVARIANT ) { - absoluteFalseNegatives++; + absoluteFalsePositives++; sitesWithFullDataAndNoSNPs++; } else { sitesWithFullDataAndNoSNPs++; @@ -225,7 +148,10 @@ class HapmapConcordanceTable { if( eval == null ) { callIndex = NO_CALL; isCorrectCallAndThereIsVariant = FALSE_INT; - } else if ( eval.getAlternateBases().charAt(0) == ref && chip.getAlternateBases().charAt(1) == ref ) { + } else if ( chip == null ) { + callIndex = DE_NOVO_CALL; + isCorrectCallAndThereIsVariant = FALSE_INT; + } else if ( eval.getAlternateBases().charAt(0) == ref && eval.getAlternateBases().charAt(1) == ref ) { callIndex = NOVARIANT; isCorrectCallAndThereIsVariant = FALSE_INT; } else if ( callWrongBase(eval, chip, ref, debug) ) { @@ -249,6 +175,7 @@ class HapmapConcordanceTable { // eval and chip guaranteed to be non-null char evalRef; char evalSNP; + if ( eval.getAlternateBases().charAt(0) == ref ) { evalRef = eval.getAlternateBases().charAt(0); evalSNP = eval.getAlternateBases().charAt(1); @@ -266,14 +193,32 @@ class HapmapConcordanceTable { LinkedList outLines = new LinkedList(); int numSingleSNPSites = poolCallConcordance[ONE_SNP][MATCH] + poolCallConcordance[ONE_SNP][MISMATCH]; int numMultiSNPSites = poolCallConcordance[MULTI_SNP][MATCH] + poolCallConcordance[MULTI_SNP][MISMATCH]; - outLines.add(String.format("Sites with variants in One hapmap individual: %d%n", numSingleSNPSites)); - outLines.add(String.format("\tNumber called correctly: %d (%f)%n",poolCallConcordance[ONE_SNP][MATCH], ((double)poolCallConcordance[ONE_SNP][MATCH]/numSingleSNPSites))); - outLines.add(String.format("\tNumber called incorrectly: %d (%f)%n", poolCallConcordance[ONE_SNP][MISMATCH], ((double)poolCallConcordance[ONE_SNP][MISMATCH])/numSingleSNPSites)); - outLines.add(String.format("Sites with variants in multiple hapmap individuals: %d%n", numMultiSNPSites)); - outLines.add(String.format("\tNumber called correctly: %d (%f)%n", poolCallConcordance[MULTI_SNP][MATCH], ((double)poolCallConcordance[MULTI_SNP][MISMATCH])/numMultiSNPSites)); - outLines.add(String.format("\tNumber called incorrectly: %d (%f)%n", poolCallConcordance[MULTI_SNP][MISMATCH], ((double)poolCallConcordance[MULTI_SNP][MISMATCH])/numMultiSNPSites)); - outLines.add(String.format("Number of called sites with chip data on all individuals: %d%n", sitesWithFullDataAndNoSNPs)); - outLines.add(String.format("\tNumber incorrectly called SNPs: %d (%f)%n", absoluteFalseNegatives, ((double)absoluteFalseNegatives)/sitesWithFullDataAndNoSNPs)); + int numSNPSites = numSingleSNPSites + numMultiSNPSites; + int numCorrect = poolCallConcordance[ONE_SNP][MATCH] + poolCallConcordance[MULTI_SNP][MATCH]; + int numIncorrect = poolCallConcordance[ONE_SNP][MISMATCH] + poolCallConcordance[MULTI_SNP][MISMATCH]; + outLines.add(String.format("Number of Hapmap SNP Sites: %d", numSNPSites)); + outLines.add(String.format("\tNumber correctly called: %d (%f)", numCorrect, ((double) numCorrect)/numSNPSites)); + outLines.add(String.format("\tNumber incorrectly called: %d (%f)", numIncorrect, ((double) numIncorrect)/numSNPSites)); + outLines.add(String.format("\tSites with variants in One hapmap individual: %d", numSingleSNPSites)); + outLines.add(String.format("\t\tNumber called correctly: %d (%f)",poolCallConcordance[ONE_SNP][MATCH], ((double)poolCallConcordance[ONE_SNP][MATCH]/numSingleSNPSites))); + outLines.add(String.format("\t\tNumber called incorrectly: %d (%f)", poolCallConcordance[ONE_SNP][MISMATCH], ((double)poolCallConcordance[ONE_SNP][MISMATCH])/numSingleSNPSites)); + outLines.add(String.format("\tSites with variants in multiple hapmap individuals: %d", numMultiSNPSites)); + outLines.add(String.format("\t\tNumber called correctly: %d (%f)", poolCallConcordance[MULTI_SNP][MATCH], ((double)poolCallConcordance[MULTI_SNP][MATCH])/numMultiSNPSites)); + outLines.add(String.format("\t\tNumber called incorrectly: %d (%f)", poolCallConcordance[MULTI_SNP][MISMATCH], ((double)poolCallConcordance[MULTI_SNP][MISMATCH])/numMultiSNPSites)); + outLines.add(String.format("\tNumber of called sites with homozygous reference chip data on all individuals: %d", sitesWithFullDataAndNoSNPs)); + outLines.add(String.format("\t\tNumber incorrectly called SNPs: %d (%f)", absoluteFalsePositives, ((double)absoluteFalsePositives)/sitesWithFullDataAndNoSNPs)); + return outLines; + } + + public List verboseOutput(String[] names) { + LinkedList outLines = (LinkedList) this.standardOutput(); + for( int nameOffset = 0; nameOffset < names.length; nameOffset ++ ) { + outLines.add(String.format("Concordance for Hapmap individual %s:", names[nameOffset])); + int numSNPSites = truthByName[nameOffset][HET] + truthByName[nameOffset][HOM]; + int numCallSites = callsByName[nameOffset][MATCH] + callsByName[nameOffset][MISMATCH]; + outLines.add(String.format("hi %s", "hello")); + } + return outLines; } @@ -347,11 +292,11 @@ class DebugWriter { return verbosity; } - public void printLocusInfo(Variation eval, String name, char ref, Variation chip, GenomeLoc loc) { + public void printLocusInfo(Variation eval, String name, char ref, Variation chip, GenomeLoc loc, int callIndex) { if(verbosity == INFO || verbosity == ALL) { String callStr = (eval == null) ? "NoCall" : eval.getAlternateBases(); String varStr = (chip == null) ? "NoChip" : chip.getAlternateBases(); - this.print(String.format("%s %s Call: %s Chip: %s Ref: %s%n", name, loc, callStr, varStr, ref)); + this.print(String.format("%s %s Call: %s Chip: %s Ref: %s CallIndex: %d%n", name, loc, callStr, varStr, ref, callIndex)); } }