Deletion: PooledGenotypeConcordanceNew

Rewrite: PooledGenotypeConcordance. It works, and is blazing fast compared to the earlier version (1 order of magnitude speedup)! And is now entirely non-hackey, as opposed to before when there were some hacky bits.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1640 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2009-09-16 20:22:16 +00:00
parent 3e289fcaa4
commit 8fce376792
2 changed files with 311 additions and 283 deletions

View File

@ -8,11 +8,8 @@ import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.LinkedList;
import java.util.StringTokenizer;
import java.io.*;
import java.util.*;
/**
* Created by IntelliJ IDEA.
@ -21,171 +18,43 @@ import java.util.StringTokenizer;
* Time: 4:45:21 PM
* To change this template use File | Settings | File Templates.
*/
//todo -- onTraversalDone
public class PooledGenotypeConcordance extends BasicVariantAnalysis implements GenotypeAnalysis {
private String[] individuals = null;
private static final boolean DEBUG = true;
private PooledConcordanceTable table;
private String[] hapmapNames;
private static final int REF = 0;
private static final int VAR_MATCH = 1;
private static final int VAR_HET = 1;
private static final int VAR_MISMATCH = 2;
private static final int VAR_HOM = 2;
private static final int UNKNOWN = 3;
private static final int NO_CALL = 3; // synonym
private static final String[] TRUTH_NAMES = {"IS_REF", "IS_SINGLE_VARIANT", "IS_MULTIPLE_VARIANT", "UNKNOWN"};
private static final String[] CALL_NAMES = {"CALLED_REF", "CALLED_SINGLE_VARIANT", "CALLED_MULTIPLE_VARIANT", "NO_CALL"};
private int[][][] tableByIndividual;
private int[][] truthTotalsByIndividual;
private int[][] callTotalsByIndividual;
public PooledGenotypeConcordance(String pathToHapmapPoolFile) {
super("genotype_concordance");
if(pathToHapmapPoolFile == null) {
// do nothing
public PooledGenotypeConcordance( String pathToPoolFile ) {
super("Pooled Genotype Concordance");
if( pathToPoolFile == null ) {
table = null;
hapmapNames = null;
} else {
BufferedReader fileReader = null;
try {
fileReader = new BufferedReader(new FileReader(pathToHapmapPoolFile));
} catch (IOException e) {
String errorMsg = "Could not open any file at " + pathToHapmapPoolFile + " Please check to see if the path is accurate.";
throw new StingException(errorMsg, e);
}
generateNameTableFromFile(fileReader);
if(DEBUG) {
printAllToCheck(individuals);
}
truthTotalsByIndividual = new int[individuals.length][4];
callTotalsByIndividual = new int[individuals.length][4];
tableByIndividual = new int[individuals.length][4][4];
for(int individual = 0; individual < individuals.length; individual ++) {
for(int call1 = 0; call1 < 4; call1++) {
for(int call2 = 0; call2 < 4; call2++)
{
tableByIndividual[individual][call1][call2] = 0;
}
callTotalsByIndividual[individual][call1] = 0;
truthTotalsByIndividual[individual][call1] = 0;
}
}
generateNameTableFromFile( pathToPoolFile );
table = new PooledConcordanceTable( hapmapNames.length );
}
}
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context){
if(DEBUG && context != null) {
System.out.println( "Update Called at location: " + context.getLocation().toString() );
} else if (DEBUG) {
System.out.println("Update Called, but no alignment context was given.");
}
for( int nameOffset = 0; nameOffset < individuals.length; nameOffset++ ) {
inc(eval, tracker, ref, context, nameOffset);
}
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
table.updateTable(tracker, ref, eval, hapmapNames);
return null;
}
public void inc(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context, int nameOffset) {
Variation chip = (Variation) tracker.lookup(individuals[nameOffset],null);
if ((chip != null && !(chip instanceof VariantBackedByGenotype) || (eval != null && !(eval instanceof VariantBackedByGenotype)))) {
String errMsg = "Trying to compare non-pooled data or non-genotype data.";
throw new StingException(errMsg);
}
DiploidGenotype g = DiploidGenotype.valueOf(Utils.dupString(ref,2));
if(DEBUG) {
System.out.print(incDebugString(nameOffset, chip, eval, ref));
}
// todo -- is this how want to do pooled concordance checking?
int truthIndex, callIndex;
if( chip == null ) {
truthIndex = UNKNOWN;
} else if ( chip.getAlternateBases().equals(g.toString()) ) {
truthIndex = REF;
} else if ( chip.getAlternateBases().charAt(0) != chip.getAlternateBases().charAt(1)) {
truthIndex = VAR_HET;
} else {
truthIndex = VAR_HOM;
}
if( eval == null ) {
callIndex = UNKNOWN;
} else if ( eval.getAlternateBases().equals(g.toString()) ) {
callIndex = REF;
} else if ( callWrongBase(eval,chip,ref) ) {
callIndex = VAR_MISMATCH;
} else {
callIndex = VAR_MATCH;
}
if( chip != null || eval != null ) {
tableByIndividual[nameOffset][truthIndex][callIndex]++;
truthTotalsByIndividual[nameOffset][truthIndex]++;
if ( callIndex != NO_CALL ) {
callTotalsByIndividual[nameOffset][callIndex]++;
}
}
if(DEBUG) {
System.out.printf("TruthIndex: %d CallIndex: %d%n", truthIndex, callIndex);
}
public List<String> done() {
return table.standardOutput();
}
public boolean isCorrectVariantType(Variation eval) {
// todo -- this. Check if eval is a TypeOf some ROD class that's the right pooled call output that we
// todo -- want to deal with.
// private methods for reading in names from a file
return true;
}
public boolean callWrongBase(Variation eval, Variation chip, char ref) {
boolean wrongCall;
if ( chip == null ) {
wrongCall = true;
} else if ( chip.getAlternateBases().charAt(0) == chip.getAlternateBases().charAt(1) ) { // homozygous nonref
if( eval.getAlternateBases().charAt(0) == chip.getAlternateBases().charAt(0) ||
eval.getAlternateBases().charAt(1) == chip.getAlternateBases().charAt(0) ) {
wrongCall = false;
} else {
wrongCall = true;
}
} else if ( chip.getAlternateBases().charAt(0) == ref || chip.getAlternateBases().charAt(1) == ref ) { // het nonref
wrongCall = ( getNonrefBase(eval,ref) != getNonrefBase(chip,ref) );
} else { // todo -- what do we really want to do if ref is C, but chip is AG ??
wrongCall = true;
}
return wrongCall;
}
public char getNonrefBase(Variation var, char ref) {
char nonRefBase;
if( var.getAlternateBases().charAt(0) == ref ) {
nonRefBase = var.getAlternateBases().charAt(1);
} else {
nonRefBase = var.getAlternateBases().charAt(0);
private void generateNameTableFromFile(String file) {
BufferedReader reader;
try {
reader = new BufferedReader(new FileReader(file));
} catch( FileNotFoundException e) {
String errMsg = "Hapmap pool file at "+file+" was not found. Please check filepath.";
throw new StingException(errMsg, e);
}
return nonRefBase;
}
//
// private methods for reading names into individualsByPool from an external file
//
private void generateNameTableFromFile(BufferedReader reader) {
LinkedList<String> nameList = new LinkedList<String>();
while(continueReading(reader)) {
@ -193,11 +62,11 @@ public class PooledGenotypeConcordance extends BasicVariantAnalysis implements G
nameList.add(line);
}
individuals = nameList.toArray(new String[nameList.size()]);
hapmapNames = nameList.toArray(new String[nameList.size()]);
}
private boolean continueReading(BufferedReader reader) {
boolean continueReading = false;
boolean continueReading;
try {
continueReading = reader.ready();
} catch(IOException e) {
@ -217,32 +86,246 @@ public class PooledGenotypeConcordance extends BasicVariantAnalysis implements G
return line;
}
// code strictly for debug
private void printAllToCheck(String[] blah) {
System.out.println("Checking:");
for( String s : blah) {
System.out.println(s);
}
}
private String incDebugString(int nameOffset, Variation chip, Variation eval, char ref) {
String truth;
if(chip == null) {
truth = "NoChip";
} else {
truth = chip.getAlternateBases();
}
String call;
if(eval == null) {
call = "NoCall";
} else {
call = eval.getAlternateBases();
}
return String.format("Person: %s Ref: %s Truth: %s Call: %s%n",
individuals[nameOffset], Character.toString(ref), truth,
call);
}
}
class PooledConcordanceTable {
private final int CALL_INDECES = 5;
private final int TRUTH_INDECES = 4;
private final int NO_CALL = 0;
private final int REF_CALL = 0; // synonym
private final int VARIANT_CALL_NONHAPMAP = 1;
private final int VARIANT_CALL_MATCH = 2;
private final int VARIANT_CALL = 2; // re-using index
private final int VARIANT_CALL_MISMATCH = 3;
private final int VARIANT_CALL_UNKNOWN = 4;
private final int NO_TRUTH_DATA = 0;
private final int TRUTH_REF = 1;
private final int TRUTH_VAR = 2;
private final int TRUTH_UNKNOWN = 3;
private int[][] table;
private int[][][] tableByHMFrequency;
private int variantCallsAtRefN;
private int poolSize;
public PooledConcordanceTable(int poolSize) {
this.poolSize = poolSize;
table = new int[TRUTH_INDECES][CALL_INDECES];
tableByHMFrequency = new int[calculateNumFrequencyIndeces(poolSize)][TRUTH_INDECES][CALL_INDECES];
variantCallsAtRefN = 0;
for ( int j = 0; j < TRUTH_INDECES; j ++ ) {
for ( int k = 0; k < CALL_INDECES; k ++ ) {
table[j][k] = 0;
for( int i = 0; i < calculateNumFrequencyIndeces(poolSize); i ++ ) {
tableByHMFrequency[i][j][k] = 0;
}
}
}
}
public void updateTable(RefMetaDataTracker tracker, char ref, Variation eval, String[] names) {
int truthIndex, callIndex, frequencyIndex;
List<Variation> chips = getChips(names, tracker);
if ( ref == 'N' || ref == 'n' ) {
variantCallsAtRefN += ( eval == null ) ? 0 : 1;
truthIndex = NO_TRUTH_DATA; // don't want calls on Ns to factor in to calculation
callIndex = NO_CALL; // don't want calls on Ns to factor in to calculation
} else if( chips.isEmpty() ) {
truthIndex = NO_TRUTH_DATA;
if ( eval == null ) {
callIndex = NO_CALL;
} else {
callIndex = ( pooledCallIsRef( eval, ref ) ) ? REF_CALL : VARIANT_CALL_NONHAPMAP;
}
} else {
frequencyIndex = calcVariantFrequencyIndex(ref, chips);
if ( freqIndexToFrequency( frequencyIndex ) != 0 ) {
truthIndex = TRUTH_VAR;
} else if ( chips.size() == poolSize ) {
truthIndex = TRUTH_REF;
} else {
truthIndex = TRUTH_UNKNOWN;
}
if ( eval == null ) {
callIndex = NO_CALL;
} else {
if ( pooledCallIsRef( eval, ref ) ) {
callIndex = REF_CALL;
} else if (truthIndex == TRUTH_REF ) {
callIndex = VARIANT_CALL;
} else if (truthIndex == TRUTH_UNKNOWN) {
callIndex = VARIANT_CALL_UNKNOWN;
} else if ( mismatchingCalls( eval, chips, ref ) ) {
callIndex = VARIANT_CALL_MISMATCH;
} else {
callIndex = VARIANT_CALL_MATCH;
}
}
tableByHMFrequency[frequencyIndex][truthIndex][callIndex] ++; // note that this updates only on HAPMAP sites. This is good.
}
table[truthIndex][callIndex] ++;
}
public List<Variation> getChips(String[] names, RefMetaDataTracker tracker) {
LinkedList<Variation> chips = new LinkedList<Variation>();
for ( String name : names ) {
Variation chip = (Variation) tracker.lookup(name,null);
if ( chip != null ) {
chips.add(chip);
}
}
return chips;
}
public boolean pooledCallIsRef(Variation eval, char ref) {
// code broken out for easy alteration when we start using pool-specific variations
return eval.getAlternateBases().equalsIgnoreCase((Utils.dupString(ref,2)));
}
public int calculateNumFrequencyIndeces(int poolSize) {
// code broken out for easy alteration when we start using pool-specific variations
return 2*(poolSize+1);
}
public int calcVariantFrequencyIndex( char ref, List<Variation> evals ) {
return frequencyToFrequencyIndex( calcVariantFrequency( evals, ref ) );
}
public int frequencyToFrequencyIndex( double frequency ) {
// code broken out for easy alteration when we start using pool-specific variations
return (int) frequency;
}
public double calcVariantFrequency( List<Variation> evals, char ref ) {
// code broken out for easy alteration when we start using pool-specific variations
Variation firstEval = evals.get(0);
double alternateFrequency = 0.0;
for ( Variation eval : evals ) {
if ( mismatchingCalls(firstEval, eval, ref) ) {
// todo -- make this not a StingException but go to the log
throw new StingException("Tri-Allelic Position "+eval.getAlternateBases()+"/"+firstEval.getAlternateBases() + " Ref: "+ ref + " not supported");
} else {
alternateFrequency += calledVariantFrequency(eval,ref);
}
}
return alternateFrequency;
}
public boolean mismatchingCalls(Variation var1, List<Variation> vars, char ref) {
ListIterator varIter = vars.listIterator();
try {
while( pooledCallIsRef( (Variation) varIter.next(), ref ) ) {
// don't do squat!
}
} catch (NoSuchElementException e) {
String errMsg = "Comparison data given to mismatchingCalls(Variation, List<Variation>, char) was entirely reference. This is a bug and should never happen.";
throw new StingException(errMsg, e);
}
return mismatchingCalls( var1, (Variation) varIter.previous(), ref);
}
public boolean mismatchingCalls(Variation eval, Variation chip, char ref) {
// eval and chip guaranteed to be non-null
char chipF = chip.getAlternateBases().charAt(0);
char chipS = chip.getAlternateBases().charAt(1);
char evalF = chip.getAlternateBases().charAt(0);
char evalS = chip.getAlternateBases().charAt(1);
boolean mismatch;
if (chipF == ref) {
if ( chipS == ref ) {
mismatch = false; // no mismatch against hom ref
} else {
mismatch = ( evalF != chipS && evalS != chipS ); // test against het nonref
}
} else if ( chipS == ref ) {
mismatch = ( evalF != chipF && evalS != chipF ); // test against het nonref
} else {
if( evalF == ref ) {
mismatch = ( evalS != chipF && evalS != chipF ); // test against hom nonref
} else if ( evalS == ref ) {
mismatch = ( evalF != chipF && evalF != chipF ); // test against hom nonref
} else {
// both are hom nonref
mismatch = ( evalF != chipF );
}
}
return mismatch;
}
public double calledVariantFrequency( Variation var, char ref ) {
// code broken out for easy alteration when we start using pool-specific variations
String varStr = var.getAlternateBases();
double freq;
if ( varStr.charAt(0) != ref && varStr.charAt(1) != ref ) {
freq = (double) 2;
} else if ( varStr.charAt(0) == ref && varStr.charAt(1) == ref ) {
freq = (double) 0;
} else {
freq = (double) 1;
}
return freq;
}
public double freqIndexToFrequency( int freqIndex ) {
// code broken out for easy alteration when we start using pool-specific variations
return (double) freqIndex;
}
public List<String> standardOutput() {
LinkedList<String> out = new LinkedList<String>();
int nSNPCallSites = 0;
int nHapmapSites = 0;
int nHapmapSNPSites = 0;
int nFullHapmapRefSites = 0;
int nUnknownHapmapSites = 0;
for( int truthIndex = 0; truthIndex < TRUTH_INDECES; truthIndex ++ ) {
nSNPCallSites += table[truthIndex][VARIANT_CALL_MATCH] + table[truthIndex][VARIANT_CALL_MISMATCH] + table[truthIndex][VARIANT_CALL_NONHAPMAP];
nSNPCallSites += table[truthIndex][VARIANT_CALL_UNKNOWN];
}
for ( int callIndex = 0; callIndex < CALL_INDECES; callIndex ++ ) {
nHapmapSites += table[TRUTH_REF][callIndex] + table[TRUTH_VAR][callIndex] + table[TRUTH_UNKNOWN][callIndex];
nUnknownHapmapSites += table[TRUTH_UNKNOWN][callIndex];
nHapmapSNPSites += table[TRUTH_VAR][callIndex];
nFullHapmapRefSites += table[TRUTH_REF][callIndex];
}
int nRefsCalledCorrectly = table[TRUTH_REF][NO_CALL];
int nRefsCalledAsSNP = table[TRUTH_REF][VARIANT_CALL];
int nSNPsCalledCorrectly = table[TRUTH_VAR][VARIANT_CALL_MATCH];
int nSNPsCalledIncorrectly = table[TRUTH_VAR][VARIANT_CALL_MISMATCH];
int nSNPsCalledAsRef = table[TRUTH_VAR][REF_CALL];
int nSNPsOnHapmapSNP = table[TRUTH_VAR][VARIANT_CALL_MATCH] + table[TRUTH_VAR][VARIANT_CALL_MISMATCH];
int nSNPsAtNonHapmap = table[NO_TRUTH_DATA][VARIANT_CALL_NONHAPMAP];
int nSNPsAtHapmapWithRefDataOnSubsetOfIndividuals = table[TRUTH_UNKNOWN][VARIANT_CALL_UNKNOWN];
out.add(String.format("Total Number of SNP Calls: %d", nSNPCallSites));
out.add(String.format("Total SNP calls on non-Hapmap sites %d", nSNPsAtNonHapmap));
out.add(String.format("Number of Hapmap Sites: %d", nHapmapSites));
out.add("Data on Hapmap Reference Sites");
out.add(String.format("\tSites where all Hapmap chips were ref: %d", nFullHapmapRefSites));
out.add(String.format("\t\tReference sites correctly called: %d (%f)", nRefsCalledCorrectly, ((double)nRefsCalledCorrectly/nFullHapmapRefSites)));
out.add(String.format("\t\tReference sites called as variant: %d (%f)", nRefsCalledAsSNP, ((double)nRefsCalledAsSNP/nFullHapmapRefSites)));
out.add(String.format("\tSites where all seen Hapmap chips were ref, but not all Hapmap chips available: %d", nUnknownHapmapSites));
out.add(String.format("\t\tPutative reference sites called ref: %d (%f)", table[TRUTH_UNKNOWN][REF_CALL], ((double)table[TRUTH_UNKNOWN][REF_CALL]/nUnknownHapmapSites)));
out.add(String.format("\t\tPutative reference sites called SNP: %d (%f)", nSNPsAtHapmapWithRefDataOnSubsetOfIndividuals, ((double)nSNPsAtHapmapWithRefDataOnSubsetOfIndividuals/nUnknownHapmapSites)));
out.add("Data on Hapmap Variant Sites");
out.add(String.format("\tNumber of Hapmap SNP Sites: %d", nHapmapSNPSites));
out.add(String.format("\t\tSNP sites incorrectly called ref: %d (%f)", nSNPsCalledAsRef, ((double)nSNPsCalledAsRef/nHapmapSNPSites)));
out.add(String.format("\tSNP calls on Hapmap SNP Sites: %d", nSNPsOnHapmapSNP));
out.add(String.format("\t\tSNP sites correctly called SNP: %d (%f)", nSNPsCalledCorrectly, ((double)nSNPsCalledCorrectly/nSNPsOnHapmapSNP)));
out.add(String.format("\t\tSNP sites called a different base: %d (%f)", nSNPsCalledIncorrectly, ((double)nSNPsCalledIncorrectly/nSNPsOnHapmapSNP)));
out.add(String.format("Calls on reference N: %d", variantCallsAtRefN));
return out;
}
}

View File

@ -12,91 +12,13 @@ import java.io.*;
import java.util.LinkedList;
import java.util.List;
/**
* Created by IntelliJ IDEA.
* User: Ghost
* Date: Sep 15, 2009
* Time: 6:06:37 PM
* To change this template use File | Settings | File Templates.
*/
public class PooledGenotypeConcordanceNew extends BasicVariantAnalysis {
private String[] hapmapNames;
private DebugWriter debug;
private HapmapConcordanceTable table;
public PooledGenotypeConcordanceNew(String pathToPoolFile, String pathToDebugFile, int verbosity) {
super("Pooled Genotype Concordance");
if( pathToPoolFile == null ) {
debug = new DebugWriter();
} else {
if( pathToDebugFile != null ) {
debug = new DebugWriter( pathToDebugFile, verbosity );
}
generateNameTableFromFile( pathToPoolFile );
table = new HapmapConcordanceTable( hapmapNames.length );
}
}
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
table.updateTable(tracker, ref, hapmapNames, eval, context.getLocation(), debug);
return null;
}
public List<String> done() {
return table.standardOutput();
}
// methods for reading pool information from file
private void generateNameTableFromFile(String file) {
BufferedReader reader;
try {
reader = new BufferedReader(new FileReader(file));
} catch( FileNotFoundException e) {
String errMsg = "Hapmap pool file at "+file+" was not found. Please check filepath.";
throw new StingException(errMsg, e);
}
LinkedList<String> nameList = new LinkedList<String>();
while(continueReading(reader)) {
String line = readLine(reader);
nameList.add(line);
}
hapmapNames = nameList.toArray(new String[nameList.size()]);
}
private boolean continueReading(BufferedReader reader) {
boolean continueReading;
try {
continueReading = reader.ready();
} catch(IOException e) {
continueReading = false;
}
return continueReading;
}
private String readLine(BufferedReader reader) {
String line;
try {
line = reader.readLine();
} catch( IOException e) {
String errMsg = "BufferedReader pointing to "+reader.toString()+" was declared ready but no line could be read from it.";
throw new StingException(errMsg,e);
}
return line;
}
}
class HapmapConcordanceTable {
class HapmapConcordanceTableOld {
private int[][][] truthTableByName;
private int[][] poolCallConcordance;
private int[][] callsByName;
private int[][] truthByName;
private int absoluteFalseNegatives;
private int absoluteFalsePositives;
private int sitesWithFullDataAndNoSNPs;
private final int REF = 0;
@ -107,14 +29,15 @@ class HapmapConcordanceTable {
private final int MATCH = 1;
private final int MISMATCH = 2;
private final int NO_CALL = 3;
private final int DE_NOVO_CALL = 3; //synonym
private final int ONE_SNP = 0;
private final int MULTI_SNP = 1;
private final int DE_NOVO = 2;
private final int TRUE_INT = 1;
private final int FALSE_INT = 0;
public HapmapConcordanceTable( int numberOfPeople ) {
absoluteFalseNegatives = 0;
public HapmapConcordanceTableOld( int numberOfPeople ) {
absoluteFalsePositives = 0;
sitesWithFullDataAndNoSNPs = 0;
truthTableByName = new int[numberOfPeople][4][4];
callsByName = new int[numberOfPeople][4];
@ -145,12 +68,12 @@ class HapmapConcordanceTable {
int hapmapCoverage = 0;
for ( int name = 0; name < names.length; name ++ ) {
Variation chip = (Variation) tracker.lookup(names[name], null);
debug.printLocusInfo(eval, names[name], ref, chip, loc);
SQuad<Integer> variantAndCall = update(eval, name, chip, Character.toUpperCase(ref), debug);
nHapmapVariants += variantAndCall.getFirst();
nCorrectCalls += variantAndCall.getSecond();
hapmapCoverage += variantAndCall.getThird();
updateFalseNegatives(hapmapCoverage,nHapmapVariants,variantAndCall.getFourth());
updateFalsePositives(hapmapCoverage,nHapmapVariants,variantAndCall.getFourth());
debug.printLocusInfo(eval, names[name], ref, chip, loc, variantAndCall.getFourth());
}
updatePoolCallConcordance(nHapmapVariants,nCorrectCalls);
debug.printVerbose(String.format("%s Variants In Pool: %d Correctly Called: %d%n", loc.toString(), nHapmapVariants, nCorrectCalls));
@ -187,12 +110,12 @@ class HapmapConcordanceTable {
poolCallConcordance[hapmapAllelesIndex][callConcordanceIndex]++;
}
public void updateFalseNegatives(int coverage, int nHapmapVariants, int callConcordanceIndex) {
public void updateFalsePositives(int coverage, int nHapmapVariants, int callConcordanceIndex) {
if( coverage == truthByName.length ) {
if( nHapmapVariants == 0 ) {
if( nHapmapVariants == 0 ) {
if( callConcordanceIndex != NO_CALL ) {
if (callConcordanceIndex != NOVARIANT ) {
absoluteFalseNegatives++;
absoluteFalsePositives++;
sitesWithFullDataAndNoSNPs++;
} else {
sitesWithFullDataAndNoSNPs++;
@ -225,7 +148,10 @@ class HapmapConcordanceTable {
if( eval == null ) {
callIndex = NO_CALL;
isCorrectCallAndThereIsVariant = FALSE_INT;
} else if ( eval.getAlternateBases().charAt(0) == ref && chip.getAlternateBases().charAt(1) == ref ) {
} else if ( chip == null ) {
callIndex = DE_NOVO_CALL;
isCorrectCallAndThereIsVariant = FALSE_INT;
} else if ( eval.getAlternateBases().charAt(0) == ref && eval.getAlternateBases().charAt(1) == ref ) {
callIndex = NOVARIANT;
isCorrectCallAndThereIsVariant = FALSE_INT;
} else if ( callWrongBase(eval, chip, ref, debug) ) {
@ -249,6 +175,7 @@ class HapmapConcordanceTable {
// eval and chip guaranteed to be non-null
char evalRef;
char evalSNP;
if ( eval.getAlternateBases().charAt(0) == ref ) {
evalRef = eval.getAlternateBases().charAt(0);
evalSNP = eval.getAlternateBases().charAt(1);
@ -266,14 +193,32 @@ class HapmapConcordanceTable {
LinkedList<String> outLines = new LinkedList<String>();
int numSingleSNPSites = poolCallConcordance[ONE_SNP][MATCH] + poolCallConcordance[ONE_SNP][MISMATCH];
int numMultiSNPSites = poolCallConcordance[MULTI_SNP][MATCH] + poolCallConcordance[MULTI_SNP][MISMATCH];
outLines.add(String.format("Sites with variants in One hapmap individual: %d%n", numSingleSNPSites));
outLines.add(String.format("\tNumber called correctly: %d (%f)%n",poolCallConcordance[ONE_SNP][MATCH], ((double)poolCallConcordance[ONE_SNP][MATCH]/numSingleSNPSites)));
outLines.add(String.format("\tNumber called incorrectly: %d (%f)%n", poolCallConcordance[ONE_SNP][MISMATCH], ((double)poolCallConcordance[ONE_SNP][MISMATCH])/numSingleSNPSites));
outLines.add(String.format("Sites with variants in multiple hapmap individuals: %d%n", numMultiSNPSites));
outLines.add(String.format("\tNumber called correctly: %d (%f)%n", poolCallConcordance[MULTI_SNP][MATCH], ((double)poolCallConcordance[MULTI_SNP][MISMATCH])/numMultiSNPSites));
outLines.add(String.format("\tNumber called incorrectly: %d (%f)%n", poolCallConcordance[MULTI_SNP][MISMATCH], ((double)poolCallConcordance[MULTI_SNP][MISMATCH])/numMultiSNPSites));
outLines.add(String.format("Number of called sites with chip data on all individuals: %d%n", sitesWithFullDataAndNoSNPs));
outLines.add(String.format("\tNumber incorrectly called SNPs: %d (%f)%n", absoluteFalseNegatives, ((double)absoluteFalseNegatives)/sitesWithFullDataAndNoSNPs));
int numSNPSites = numSingleSNPSites + numMultiSNPSites;
int numCorrect = poolCallConcordance[ONE_SNP][MATCH] + poolCallConcordance[MULTI_SNP][MATCH];
int numIncorrect = poolCallConcordance[ONE_SNP][MISMATCH] + poolCallConcordance[MULTI_SNP][MISMATCH];
outLines.add(String.format("Number of Hapmap SNP Sites: %d", numSNPSites));
outLines.add(String.format("\tNumber correctly called: %d (%f)", numCorrect, ((double) numCorrect)/numSNPSites));
outLines.add(String.format("\tNumber incorrectly called: %d (%f)", numIncorrect, ((double) numIncorrect)/numSNPSites));
outLines.add(String.format("\tSites with variants in One hapmap individual: %d", numSingleSNPSites));
outLines.add(String.format("\t\tNumber called correctly: %d (%f)",poolCallConcordance[ONE_SNP][MATCH], ((double)poolCallConcordance[ONE_SNP][MATCH]/numSingleSNPSites)));
outLines.add(String.format("\t\tNumber called incorrectly: %d (%f)", poolCallConcordance[ONE_SNP][MISMATCH], ((double)poolCallConcordance[ONE_SNP][MISMATCH])/numSingleSNPSites));
outLines.add(String.format("\tSites with variants in multiple hapmap individuals: %d", numMultiSNPSites));
outLines.add(String.format("\t\tNumber called correctly: %d (%f)", poolCallConcordance[MULTI_SNP][MATCH], ((double)poolCallConcordance[MULTI_SNP][MATCH])/numMultiSNPSites));
outLines.add(String.format("\t\tNumber called incorrectly: %d (%f)", poolCallConcordance[MULTI_SNP][MISMATCH], ((double)poolCallConcordance[MULTI_SNP][MISMATCH])/numMultiSNPSites));
outLines.add(String.format("\tNumber of called sites with homozygous reference chip data on all individuals: %d", sitesWithFullDataAndNoSNPs));
outLines.add(String.format("\t\tNumber incorrectly called SNPs: %d (%f)", absoluteFalsePositives, ((double)absoluteFalsePositives)/sitesWithFullDataAndNoSNPs));
return outLines;
}
public List<String> verboseOutput(String[] names) {
LinkedList<String> outLines = (LinkedList<String>) this.standardOutput();
for( int nameOffset = 0; nameOffset < names.length; nameOffset ++ ) {
outLines.add(String.format("Concordance for Hapmap individual %s:", names[nameOffset]));
int numSNPSites = truthByName[nameOffset][HET] + truthByName[nameOffset][HOM];
int numCallSites = callsByName[nameOffset][MATCH] + callsByName[nameOffset][MISMATCH];
outLines.add(String.format("hi %s", "hello"));
}
return outLines;
}
@ -347,11 +292,11 @@ class DebugWriter {
return verbosity;
}
public void printLocusInfo(Variation eval, String name, char ref, Variation chip, GenomeLoc loc) {
public void printLocusInfo(Variation eval, String name, char ref, Variation chip, GenomeLoc loc, int callIndex) {
if(verbosity == INFO || verbosity == ALL) {
String callStr = (eval == null) ? "NoCall" : eval.getAlternateBases();
String varStr = (chip == null) ? "NoChip" : chip.getAlternateBases();
this.print(String.format("%s %s Call: %s Chip: %s Ref: %s%n", name, loc, callStr, varStr, ref));
this.print(String.format("%s %s Call: %s Chip: %s Ref: %s CallIndex: %d%n", name, loc, callStr, varStr, ref, callIndex));
}
}