Changes:
Deletion: PooledGenotypeConcordanceNew Rewrite: PooledGenotypeConcordance. It works, and is blazing fast compared to the earlier version (1 order of magnitude speedup)! And is now entirely non-hackey, as opposed to before when there were some hacky bits. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1640 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
3e289fcaa4
commit
8fce376792
|
|
@ -8,11 +8,8 @@ import org.broadinstitute.sting.utils.genotype.Variation;
|
||||||
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
|
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
|
||||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import java.io.*;
|
||||||
import java.io.FileReader;
|
import java.util.*;
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.LinkedList;
|
|
||||||
import java.util.StringTokenizer;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -21,171 +18,43 @@ import java.util.StringTokenizer;
|
||||||
* Time: 4:45:21 PM
|
* Time: 4:45:21 PM
|
||||||
* To change this template use File | Settings | File Templates.
|
* To change this template use File | Settings | File Templates.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
//todo -- onTraversalDone
|
|
||||||
|
|
||||||
public class PooledGenotypeConcordance extends BasicVariantAnalysis implements GenotypeAnalysis {
|
public class PooledGenotypeConcordance extends BasicVariantAnalysis implements GenotypeAnalysis {
|
||||||
private String[] individuals = null;
|
|
||||||
|
|
||||||
private static final boolean DEBUG = true;
|
private PooledConcordanceTable table;
|
||||||
|
private String[] hapmapNames;
|
||||||
|
|
||||||
private static final int REF = 0;
|
public PooledGenotypeConcordance( String pathToPoolFile ) {
|
||||||
private static final int VAR_MATCH = 1;
|
super("Pooled Genotype Concordance");
|
||||||
private static final int VAR_HET = 1;
|
if( pathToPoolFile == null ) {
|
||||||
private static final int VAR_MISMATCH = 2;
|
table = null;
|
||||||
private static final int VAR_HOM = 2;
|
hapmapNames = null;
|
||||||
private static final int UNKNOWN = 3;
|
|
||||||
private static final int NO_CALL = 3; // synonym
|
|
||||||
private static final String[] TRUTH_NAMES = {"IS_REF", "IS_SINGLE_VARIANT", "IS_MULTIPLE_VARIANT", "UNKNOWN"};
|
|
||||||
private static final String[] CALL_NAMES = {"CALLED_REF", "CALLED_SINGLE_VARIANT", "CALLED_MULTIPLE_VARIANT", "NO_CALL"};
|
|
||||||
|
|
||||||
private int[][][] tableByIndividual;
|
|
||||||
private int[][] truthTotalsByIndividual;
|
|
||||||
private int[][] callTotalsByIndividual;
|
|
||||||
|
|
||||||
public PooledGenotypeConcordance(String pathToHapmapPoolFile) {
|
|
||||||
super("genotype_concordance");
|
|
||||||
if(pathToHapmapPoolFile == null) {
|
|
||||||
// do nothing
|
|
||||||
} else {
|
} else {
|
||||||
BufferedReader fileReader = null;
|
generateNameTableFromFile( pathToPoolFile );
|
||||||
try {
|
table = new PooledConcordanceTable( hapmapNames.length );
|
||||||
fileReader = new BufferedReader(new FileReader(pathToHapmapPoolFile));
|
|
||||||
} catch (IOException e) {
|
|
||||||
String errorMsg = "Could not open any file at " + pathToHapmapPoolFile + " Please check to see if the path is accurate.";
|
|
||||||
throw new StingException(errorMsg, e);
|
|
||||||
}
|
|
||||||
|
|
||||||
generateNameTableFromFile(fileReader);
|
|
||||||
|
|
||||||
if(DEBUG) {
|
|
||||||
printAllToCheck(individuals);
|
|
||||||
}
|
|
||||||
|
|
||||||
truthTotalsByIndividual = new int[individuals.length][4];
|
|
||||||
callTotalsByIndividual = new int[individuals.length][4];
|
|
||||||
tableByIndividual = new int[individuals.length][4][4];
|
|
||||||
for(int individual = 0; individual < individuals.length; individual ++) {
|
|
||||||
for(int call1 = 0; call1 < 4; call1++) {
|
|
||||||
for(int call2 = 0; call2 < 4; call2++)
|
|
||||||
{
|
|
||||||
tableByIndividual[individual][call1][call2] = 0;
|
|
||||||
}
|
|
||||||
callTotalsByIndividual[individual][call1] = 0;
|
|
||||||
truthTotalsByIndividual[individual][call1] = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context){
|
|
||||||
if(DEBUG && context != null) {
|
|
||||||
System.out.println( "Update Called at location: " + context.getLocation().toString() );
|
|
||||||
} else if (DEBUG) {
|
|
||||||
System.out.println("Update Called, but no alignment context was given.");
|
|
||||||
}
|
|
||||||
|
|
||||||
for( int nameOffset = 0; nameOffset < individuals.length; nameOffset++ ) {
|
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||||
inc(eval, tracker, ref, context, nameOffset);
|
table.updateTable(tracker, ref, eval, hapmapNames);
|
||||||
}
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<String> done() {
|
||||||
public void inc(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context, int nameOffset) {
|
return table.standardOutput();
|
||||||
Variation chip = (Variation) tracker.lookup(individuals[nameOffset],null);
|
|
||||||
if ((chip != null && !(chip instanceof VariantBackedByGenotype) || (eval != null && !(eval instanceof VariantBackedByGenotype)))) {
|
|
||||||
String errMsg = "Trying to compare non-pooled data or non-genotype data.";
|
|
||||||
throw new StingException(errMsg);
|
|
||||||
}
|
|
||||||
|
|
||||||
DiploidGenotype g = DiploidGenotype.valueOf(Utils.dupString(ref,2));
|
|
||||||
|
|
||||||
if(DEBUG) {
|
|
||||||
System.out.print(incDebugString(nameOffset, chip, eval, ref));
|
|
||||||
}
|
|
||||||
|
|
||||||
// todo -- is this how want to do pooled concordance checking?
|
|
||||||
int truthIndex, callIndex;
|
|
||||||
if( chip == null ) {
|
|
||||||
truthIndex = UNKNOWN;
|
|
||||||
} else if ( chip.getAlternateBases().equals(g.toString()) ) {
|
|
||||||
truthIndex = REF;
|
|
||||||
} else if ( chip.getAlternateBases().charAt(0) != chip.getAlternateBases().charAt(1)) {
|
|
||||||
truthIndex = VAR_HET;
|
|
||||||
} else {
|
|
||||||
truthIndex = VAR_HOM;
|
|
||||||
}
|
|
||||||
|
|
||||||
if( eval == null ) {
|
|
||||||
callIndex = UNKNOWN;
|
|
||||||
} else if ( eval.getAlternateBases().equals(g.toString()) ) {
|
|
||||||
callIndex = REF;
|
|
||||||
} else if ( callWrongBase(eval,chip,ref) ) {
|
|
||||||
callIndex = VAR_MISMATCH;
|
|
||||||
} else {
|
|
||||||
callIndex = VAR_MATCH;
|
|
||||||
}
|
|
||||||
|
|
||||||
if( chip != null || eval != null ) {
|
|
||||||
tableByIndividual[nameOffset][truthIndex][callIndex]++;
|
|
||||||
truthTotalsByIndividual[nameOffset][truthIndex]++;
|
|
||||||
if ( callIndex != NO_CALL ) {
|
|
||||||
callTotalsByIndividual[nameOffset][callIndex]++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if(DEBUG) {
|
|
||||||
System.out.printf("TruthIndex: %d CallIndex: %d%n", truthIndex, callIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isCorrectVariantType(Variation eval) {
|
// private methods for reading in names from a file
|
||||||
// todo -- this. Check if eval is a TypeOf some ROD class that's the right pooled call output that we
|
|
||||||
// todo -- want to deal with.
|
|
||||||
|
|
||||||
return true;
|
private void generateNameTableFromFile(String file) {
|
||||||
}
|
BufferedReader reader;
|
||||||
|
try {
|
||||||
public boolean callWrongBase(Variation eval, Variation chip, char ref) {
|
reader = new BufferedReader(new FileReader(file));
|
||||||
boolean wrongCall;
|
} catch( FileNotFoundException e) {
|
||||||
if ( chip == null ) {
|
String errMsg = "Hapmap pool file at "+file+" was not found. Please check filepath.";
|
||||||
wrongCall = true;
|
throw new StingException(errMsg, e);
|
||||||
} else if ( chip.getAlternateBases().charAt(0) == chip.getAlternateBases().charAt(1) ) { // homozygous nonref
|
|
||||||
if( eval.getAlternateBases().charAt(0) == chip.getAlternateBases().charAt(0) ||
|
|
||||||
eval.getAlternateBases().charAt(1) == chip.getAlternateBases().charAt(0) ) {
|
|
||||||
wrongCall = false;
|
|
||||||
} else {
|
|
||||||
wrongCall = true;
|
|
||||||
}
|
|
||||||
} else if ( chip.getAlternateBases().charAt(0) == ref || chip.getAlternateBases().charAt(1) == ref ) { // het nonref
|
|
||||||
wrongCall = ( getNonrefBase(eval,ref) != getNonrefBase(chip,ref) );
|
|
||||||
} else { // todo -- what do we really want to do if ref is C, but chip is AG ??
|
|
||||||
wrongCall = true;
|
|
||||||
}
|
|
||||||
return wrongCall;
|
|
||||||
}
|
|
||||||
|
|
||||||
public char getNonrefBase(Variation var, char ref) {
|
|
||||||
char nonRefBase;
|
|
||||||
if( var.getAlternateBases().charAt(0) == ref ) {
|
|
||||||
nonRefBase = var.getAlternateBases().charAt(1);
|
|
||||||
} else {
|
|
||||||
nonRefBase = var.getAlternateBases().charAt(0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return nonRefBase;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//
|
|
||||||
// private methods for reading names into individualsByPool from an external file
|
|
||||||
//
|
|
||||||
|
|
||||||
private void generateNameTableFromFile(BufferedReader reader) {
|
|
||||||
LinkedList<String> nameList = new LinkedList<String>();
|
LinkedList<String> nameList = new LinkedList<String>();
|
||||||
|
|
||||||
while(continueReading(reader)) {
|
while(continueReading(reader)) {
|
||||||
|
|
@ -193,11 +62,11 @@ public class PooledGenotypeConcordance extends BasicVariantAnalysis implements G
|
||||||
nameList.add(line);
|
nameList.add(line);
|
||||||
}
|
}
|
||||||
|
|
||||||
individuals = nameList.toArray(new String[nameList.size()]);
|
hapmapNames = nameList.toArray(new String[nameList.size()]);
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean continueReading(BufferedReader reader) {
|
private boolean continueReading(BufferedReader reader) {
|
||||||
boolean continueReading = false;
|
boolean continueReading;
|
||||||
try {
|
try {
|
||||||
continueReading = reader.ready();
|
continueReading = reader.ready();
|
||||||
} catch(IOException e) {
|
} catch(IOException e) {
|
||||||
|
|
@ -217,32 +86,246 @@ public class PooledGenotypeConcordance extends BasicVariantAnalysis implements G
|
||||||
return line;
|
return line;
|
||||||
}
|
}
|
||||||
|
|
||||||
// code strictly for debug
|
}
|
||||||
|
|
||||||
private void printAllToCheck(String[] blah) {
|
class PooledConcordanceTable {
|
||||||
System.out.println("Checking:");
|
private final int CALL_INDECES = 5;
|
||||||
for( String s : blah) {
|
private final int TRUTH_INDECES = 4;
|
||||||
System.out.println(s);
|
private final int NO_CALL = 0;
|
||||||
}
|
private final int REF_CALL = 0; // synonym
|
||||||
}
|
private final int VARIANT_CALL_NONHAPMAP = 1;
|
||||||
|
private final int VARIANT_CALL_MATCH = 2;
|
||||||
private String incDebugString(int nameOffset, Variation chip, Variation eval, char ref) {
|
private final int VARIANT_CALL = 2; // re-using index
|
||||||
String truth;
|
private final int VARIANT_CALL_MISMATCH = 3;
|
||||||
if(chip == null) {
|
private final int VARIANT_CALL_UNKNOWN = 4;
|
||||||
truth = "NoChip";
|
private final int NO_TRUTH_DATA = 0;
|
||||||
} else {
|
private final int TRUTH_REF = 1;
|
||||||
truth = chip.getAlternateBases();
|
private final int TRUTH_VAR = 2;
|
||||||
}
|
private final int TRUTH_UNKNOWN = 3;
|
||||||
|
|
||||||
String call;
|
private int[][] table;
|
||||||
if(eval == null) {
|
private int[][][] tableByHMFrequency;
|
||||||
call = "NoCall";
|
private int variantCallsAtRefN;
|
||||||
} else {
|
private int poolSize;
|
||||||
call = eval.getAlternateBases();
|
|
||||||
}
|
public PooledConcordanceTable(int poolSize) {
|
||||||
return String.format("Person: %s Ref: %s Truth: %s Call: %s%n",
|
this.poolSize = poolSize;
|
||||||
individuals[nameOffset], Character.toString(ref), truth,
|
table = new int[TRUTH_INDECES][CALL_INDECES];
|
||||||
call);
|
tableByHMFrequency = new int[calculateNumFrequencyIndeces(poolSize)][TRUTH_INDECES][CALL_INDECES];
|
||||||
}
|
variantCallsAtRefN = 0;
|
||||||
|
|
||||||
|
for ( int j = 0; j < TRUTH_INDECES; j ++ ) {
|
||||||
|
for ( int k = 0; k < CALL_INDECES; k ++ ) {
|
||||||
|
table[j][k] = 0;
|
||||||
|
for( int i = 0; i < calculateNumFrequencyIndeces(poolSize); i ++ ) {
|
||||||
|
tableByHMFrequency[i][j][k] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void updateTable(RefMetaDataTracker tracker, char ref, Variation eval, String[] names) {
|
||||||
|
int truthIndex, callIndex, frequencyIndex;
|
||||||
|
List<Variation> chips = getChips(names, tracker);
|
||||||
|
if ( ref == 'N' || ref == 'n' ) {
|
||||||
|
variantCallsAtRefN += ( eval == null ) ? 0 : 1;
|
||||||
|
truthIndex = NO_TRUTH_DATA; // don't want calls on Ns to factor in to calculation
|
||||||
|
callIndex = NO_CALL; // don't want calls on Ns to factor in to calculation
|
||||||
|
} else if( chips.isEmpty() ) {
|
||||||
|
truthIndex = NO_TRUTH_DATA;
|
||||||
|
if ( eval == null ) {
|
||||||
|
callIndex = NO_CALL;
|
||||||
|
} else {
|
||||||
|
callIndex = ( pooledCallIsRef( eval, ref ) ) ? REF_CALL : VARIANT_CALL_NONHAPMAP;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
frequencyIndex = calcVariantFrequencyIndex(ref, chips);
|
||||||
|
|
||||||
|
if ( freqIndexToFrequency( frequencyIndex ) != 0 ) {
|
||||||
|
truthIndex = TRUTH_VAR;
|
||||||
|
} else if ( chips.size() == poolSize ) {
|
||||||
|
truthIndex = TRUTH_REF;
|
||||||
|
} else {
|
||||||
|
truthIndex = TRUTH_UNKNOWN;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( eval == null ) {
|
||||||
|
callIndex = NO_CALL;
|
||||||
|
} else {
|
||||||
|
if ( pooledCallIsRef( eval, ref ) ) {
|
||||||
|
callIndex = REF_CALL;
|
||||||
|
} else if (truthIndex == TRUTH_REF ) {
|
||||||
|
callIndex = VARIANT_CALL;
|
||||||
|
} else if (truthIndex == TRUTH_UNKNOWN) {
|
||||||
|
callIndex = VARIANT_CALL_UNKNOWN;
|
||||||
|
} else if ( mismatchingCalls( eval, chips, ref ) ) {
|
||||||
|
callIndex = VARIANT_CALL_MISMATCH;
|
||||||
|
} else {
|
||||||
|
callIndex = VARIANT_CALL_MATCH;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tableByHMFrequency[frequencyIndex][truthIndex][callIndex] ++; // note that this updates only on HAPMAP sites. This is good.
|
||||||
|
}
|
||||||
|
|
||||||
|
table[truthIndex][callIndex] ++;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<Variation> getChips(String[] names, RefMetaDataTracker tracker) {
|
||||||
|
LinkedList<Variation> chips = new LinkedList<Variation>();
|
||||||
|
for ( String name : names ) {
|
||||||
|
Variation chip = (Variation) tracker.lookup(name,null);
|
||||||
|
if ( chip != null ) {
|
||||||
|
chips.add(chip);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return chips;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean pooledCallIsRef(Variation eval, char ref) {
|
||||||
|
// code broken out for easy alteration when we start using pool-specific variations
|
||||||
|
return eval.getAlternateBases().equalsIgnoreCase((Utils.dupString(ref,2)));
|
||||||
|
}
|
||||||
|
|
||||||
|
public int calculateNumFrequencyIndeces(int poolSize) {
|
||||||
|
// code broken out for easy alteration when we start using pool-specific variations
|
||||||
|
return 2*(poolSize+1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int calcVariantFrequencyIndex( char ref, List<Variation> evals ) {
|
||||||
|
return frequencyToFrequencyIndex( calcVariantFrequency( evals, ref ) );
|
||||||
|
}
|
||||||
|
|
||||||
|
public int frequencyToFrequencyIndex( double frequency ) {
|
||||||
|
// code broken out for easy alteration when we start using pool-specific variations
|
||||||
|
return (int) frequency;
|
||||||
|
}
|
||||||
|
|
||||||
|
public double calcVariantFrequency( List<Variation> evals, char ref ) {
|
||||||
|
// code broken out for easy alteration when we start using pool-specific variations
|
||||||
|
Variation firstEval = evals.get(0);
|
||||||
|
double alternateFrequency = 0.0;
|
||||||
|
for ( Variation eval : evals ) {
|
||||||
|
if ( mismatchingCalls(firstEval, eval, ref) ) {
|
||||||
|
// todo -- make this not a StingException but go to the log
|
||||||
|
throw new StingException("Tri-Allelic Position "+eval.getAlternateBases()+"/"+firstEval.getAlternateBases() + " Ref: "+ ref + " not supported");
|
||||||
|
} else {
|
||||||
|
alternateFrequency += calledVariantFrequency(eval,ref);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return alternateFrequency;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean mismatchingCalls(Variation var1, List<Variation> vars, char ref) {
|
||||||
|
ListIterator varIter = vars.listIterator();
|
||||||
|
try {
|
||||||
|
while( pooledCallIsRef( (Variation) varIter.next(), ref ) ) {
|
||||||
|
// don't do squat!
|
||||||
|
}
|
||||||
|
} catch (NoSuchElementException e) {
|
||||||
|
String errMsg = "Comparison data given to mismatchingCalls(Variation, List<Variation>, char) was entirely reference. This is a bug and should never happen.";
|
||||||
|
throw new StingException(errMsg, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
return mismatchingCalls( var1, (Variation) varIter.previous(), ref);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean mismatchingCalls(Variation eval, Variation chip, char ref) {
|
||||||
|
// eval and chip guaranteed to be non-null
|
||||||
|
char chipF = chip.getAlternateBases().charAt(0);
|
||||||
|
char chipS = chip.getAlternateBases().charAt(1);
|
||||||
|
char evalF = chip.getAlternateBases().charAt(0);
|
||||||
|
char evalS = chip.getAlternateBases().charAt(1);
|
||||||
|
boolean mismatch;
|
||||||
|
if (chipF == ref) {
|
||||||
|
if ( chipS == ref ) {
|
||||||
|
mismatch = false; // no mismatch against hom ref
|
||||||
|
} else {
|
||||||
|
mismatch = ( evalF != chipS && evalS != chipS ); // test against het nonref
|
||||||
|
}
|
||||||
|
} else if ( chipS == ref ) {
|
||||||
|
mismatch = ( evalF != chipF && evalS != chipF ); // test against het nonref
|
||||||
|
} else {
|
||||||
|
if( evalF == ref ) {
|
||||||
|
mismatch = ( evalS != chipF && evalS != chipF ); // test against hom nonref
|
||||||
|
} else if ( evalS == ref ) {
|
||||||
|
mismatch = ( evalF != chipF && evalF != chipF ); // test against hom nonref
|
||||||
|
} else {
|
||||||
|
// both are hom nonref
|
||||||
|
mismatch = ( evalF != chipF );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return mismatch;
|
||||||
|
}
|
||||||
|
|
||||||
|
public double calledVariantFrequency( Variation var, char ref ) {
|
||||||
|
// code broken out for easy alteration when we start using pool-specific variations
|
||||||
|
String varStr = var.getAlternateBases();
|
||||||
|
double freq;
|
||||||
|
if ( varStr.charAt(0) != ref && varStr.charAt(1) != ref ) {
|
||||||
|
freq = (double) 2;
|
||||||
|
} else if ( varStr.charAt(0) == ref && varStr.charAt(1) == ref ) {
|
||||||
|
freq = (double) 0;
|
||||||
|
} else {
|
||||||
|
freq = (double) 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return freq;
|
||||||
|
}
|
||||||
|
|
||||||
|
public double freqIndexToFrequency( int freqIndex ) {
|
||||||
|
// code broken out for easy alteration when we start using pool-specific variations
|
||||||
|
return (double) freqIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> standardOutput() {
|
||||||
|
LinkedList<String> out = new LinkedList<String>();
|
||||||
|
int nSNPCallSites = 0;
|
||||||
|
int nHapmapSites = 0;
|
||||||
|
int nHapmapSNPSites = 0;
|
||||||
|
int nFullHapmapRefSites = 0;
|
||||||
|
int nUnknownHapmapSites = 0;
|
||||||
|
|
||||||
|
for( int truthIndex = 0; truthIndex < TRUTH_INDECES; truthIndex ++ ) {
|
||||||
|
nSNPCallSites += table[truthIndex][VARIANT_CALL_MATCH] + table[truthIndex][VARIANT_CALL_MISMATCH] + table[truthIndex][VARIANT_CALL_NONHAPMAP];
|
||||||
|
nSNPCallSites += table[truthIndex][VARIANT_CALL_UNKNOWN];
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( int callIndex = 0; callIndex < CALL_INDECES; callIndex ++ ) {
|
||||||
|
nHapmapSites += table[TRUTH_REF][callIndex] + table[TRUTH_VAR][callIndex] + table[TRUTH_UNKNOWN][callIndex];
|
||||||
|
nUnknownHapmapSites += table[TRUTH_UNKNOWN][callIndex];
|
||||||
|
nHapmapSNPSites += table[TRUTH_VAR][callIndex];
|
||||||
|
nFullHapmapRefSites += table[TRUTH_REF][callIndex];
|
||||||
|
}
|
||||||
|
|
||||||
|
int nRefsCalledCorrectly = table[TRUTH_REF][NO_CALL];
|
||||||
|
int nRefsCalledAsSNP = table[TRUTH_REF][VARIANT_CALL];
|
||||||
|
int nSNPsCalledCorrectly = table[TRUTH_VAR][VARIANT_CALL_MATCH];
|
||||||
|
int nSNPsCalledIncorrectly = table[TRUTH_VAR][VARIANT_CALL_MISMATCH];
|
||||||
|
int nSNPsCalledAsRef = table[TRUTH_VAR][REF_CALL];
|
||||||
|
int nSNPsOnHapmapSNP = table[TRUTH_VAR][VARIANT_CALL_MATCH] + table[TRUTH_VAR][VARIANT_CALL_MISMATCH];
|
||||||
|
int nSNPsAtNonHapmap = table[NO_TRUTH_DATA][VARIANT_CALL_NONHAPMAP];
|
||||||
|
int nSNPsAtHapmapWithRefDataOnSubsetOfIndividuals = table[TRUTH_UNKNOWN][VARIANT_CALL_UNKNOWN];
|
||||||
|
|
||||||
|
out.add(String.format("Total Number of SNP Calls: %d", nSNPCallSites));
|
||||||
|
out.add(String.format("Total SNP calls on non-Hapmap sites %d", nSNPsAtNonHapmap));
|
||||||
|
out.add(String.format("Number of Hapmap Sites: %d", nHapmapSites));
|
||||||
|
out.add("Data on Hapmap Reference Sites");
|
||||||
|
out.add(String.format("\tSites where all Hapmap chips were ref: %d", nFullHapmapRefSites));
|
||||||
|
out.add(String.format("\t\tReference sites correctly called: %d (%f)", nRefsCalledCorrectly, ((double)nRefsCalledCorrectly/nFullHapmapRefSites)));
|
||||||
|
out.add(String.format("\t\tReference sites called as variant: %d (%f)", nRefsCalledAsSNP, ((double)nRefsCalledAsSNP/nFullHapmapRefSites)));
|
||||||
|
out.add(String.format("\tSites where all seen Hapmap chips were ref, but not all Hapmap chips available: %d", nUnknownHapmapSites));
|
||||||
|
out.add(String.format("\t\tPutative reference sites called ref: %d (%f)", table[TRUTH_UNKNOWN][REF_CALL], ((double)table[TRUTH_UNKNOWN][REF_CALL]/nUnknownHapmapSites)));
|
||||||
|
out.add(String.format("\t\tPutative reference sites called SNP: %d (%f)", nSNPsAtHapmapWithRefDataOnSubsetOfIndividuals, ((double)nSNPsAtHapmapWithRefDataOnSubsetOfIndividuals/nUnknownHapmapSites)));
|
||||||
|
out.add("Data on Hapmap Variant Sites");
|
||||||
|
out.add(String.format("\tNumber of Hapmap SNP Sites: %d", nHapmapSNPSites));
|
||||||
|
out.add(String.format("\t\tSNP sites incorrectly called ref: %d (%f)", nSNPsCalledAsRef, ((double)nSNPsCalledAsRef/nHapmapSNPSites)));
|
||||||
|
out.add(String.format("\tSNP calls on Hapmap SNP Sites: %d", nSNPsOnHapmapSNP));
|
||||||
|
out.add(String.format("\t\tSNP sites correctly called SNP: %d (%f)", nSNPsCalledCorrectly, ((double)nSNPsCalledCorrectly/nSNPsOnHapmapSNP)));
|
||||||
|
out.add(String.format("\t\tSNP sites called a different base: %d (%f)", nSNPsCalledIncorrectly, ((double)nSNPsCalledIncorrectly/nSNPsOnHapmapSNP)));
|
||||||
|
out.add(String.format("Calls on reference N: %d", variantCallsAtRefN));
|
||||||
|
|
||||||
|
return out;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -12,91 +12,13 @@ import java.io.*;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
class HapmapConcordanceTableOld {
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: Ghost
|
|
||||||
* Date: Sep 15, 2009
|
|
||||||
* Time: 6:06:37 PM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
public class PooledGenotypeConcordanceNew extends BasicVariantAnalysis {
|
|
||||||
|
|
||||||
private String[] hapmapNames;
|
|
||||||
private DebugWriter debug;
|
|
||||||
private HapmapConcordanceTable table;
|
|
||||||
|
|
||||||
public PooledGenotypeConcordanceNew(String pathToPoolFile, String pathToDebugFile, int verbosity) {
|
|
||||||
super("Pooled Genotype Concordance");
|
|
||||||
if( pathToPoolFile == null ) {
|
|
||||||
debug = new DebugWriter();
|
|
||||||
} else {
|
|
||||||
if( pathToDebugFile != null ) {
|
|
||||||
debug = new DebugWriter( pathToDebugFile, verbosity );
|
|
||||||
}
|
|
||||||
generateNameTableFromFile( pathToPoolFile );
|
|
||||||
table = new HapmapConcordanceTable( hapmapNames.length );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
|
||||||
table.updateTable(tracker, ref, hapmapNames, eval, context.getLocation(), debug);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<String> done() {
|
|
||||||
return table.standardOutput();
|
|
||||||
}
|
|
||||||
|
|
||||||
// methods for reading pool information from file
|
|
||||||
|
|
||||||
private void generateNameTableFromFile(String file) {
|
|
||||||
BufferedReader reader;
|
|
||||||
try {
|
|
||||||
reader = new BufferedReader(new FileReader(file));
|
|
||||||
} catch( FileNotFoundException e) {
|
|
||||||
String errMsg = "Hapmap pool file at "+file+" was not found. Please check filepath.";
|
|
||||||
throw new StingException(errMsg, e);
|
|
||||||
}
|
|
||||||
|
|
||||||
LinkedList<String> nameList = new LinkedList<String>();
|
|
||||||
|
|
||||||
while(continueReading(reader)) {
|
|
||||||
String line = readLine(reader);
|
|
||||||
nameList.add(line);
|
|
||||||
}
|
|
||||||
|
|
||||||
hapmapNames = nameList.toArray(new String[nameList.size()]);
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean continueReading(BufferedReader reader) {
|
|
||||||
boolean continueReading;
|
|
||||||
try {
|
|
||||||
continueReading = reader.ready();
|
|
||||||
} catch(IOException e) {
|
|
||||||
continueReading = false;
|
|
||||||
}
|
|
||||||
return continueReading;
|
|
||||||
}
|
|
||||||
|
|
||||||
private String readLine(BufferedReader reader) {
|
|
||||||
String line;
|
|
||||||
try {
|
|
||||||
line = reader.readLine();
|
|
||||||
} catch( IOException e) {
|
|
||||||
String errMsg = "BufferedReader pointing to "+reader.toString()+" was declared ready but no line could be read from it.";
|
|
||||||
throw new StingException(errMsg,e);
|
|
||||||
}
|
|
||||||
return line;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
class HapmapConcordanceTable {
|
|
||||||
|
|
||||||
private int[][][] truthTableByName;
|
private int[][][] truthTableByName;
|
||||||
private int[][] poolCallConcordance;
|
private int[][] poolCallConcordance;
|
||||||
private int[][] callsByName;
|
private int[][] callsByName;
|
||||||
private int[][] truthByName;
|
private int[][] truthByName;
|
||||||
private int absoluteFalseNegatives;
|
private int absoluteFalsePositives;
|
||||||
private int sitesWithFullDataAndNoSNPs;
|
private int sitesWithFullDataAndNoSNPs;
|
||||||
|
|
||||||
private final int REF = 0;
|
private final int REF = 0;
|
||||||
|
|
@ -107,14 +29,15 @@ class HapmapConcordanceTable {
|
||||||
private final int MATCH = 1;
|
private final int MATCH = 1;
|
||||||
private final int MISMATCH = 2;
|
private final int MISMATCH = 2;
|
||||||
private final int NO_CALL = 3;
|
private final int NO_CALL = 3;
|
||||||
|
private final int DE_NOVO_CALL = 3; //synonym
|
||||||
private final int ONE_SNP = 0;
|
private final int ONE_SNP = 0;
|
||||||
private final int MULTI_SNP = 1;
|
private final int MULTI_SNP = 1;
|
||||||
private final int DE_NOVO = 2;
|
private final int DE_NOVO = 2;
|
||||||
private final int TRUE_INT = 1;
|
private final int TRUE_INT = 1;
|
||||||
private final int FALSE_INT = 0;
|
private final int FALSE_INT = 0;
|
||||||
|
|
||||||
public HapmapConcordanceTable( int numberOfPeople ) {
|
public HapmapConcordanceTableOld( int numberOfPeople ) {
|
||||||
absoluteFalseNegatives = 0;
|
absoluteFalsePositives = 0;
|
||||||
sitesWithFullDataAndNoSNPs = 0;
|
sitesWithFullDataAndNoSNPs = 0;
|
||||||
truthTableByName = new int[numberOfPeople][4][4];
|
truthTableByName = new int[numberOfPeople][4][4];
|
||||||
callsByName = new int[numberOfPeople][4];
|
callsByName = new int[numberOfPeople][4];
|
||||||
|
|
@ -145,12 +68,12 @@ class HapmapConcordanceTable {
|
||||||
int hapmapCoverage = 0;
|
int hapmapCoverage = 0;
|
||||||
for ( int name = 0; name < names.length; name ++ ) {
|
for ( int name = 0; name < names.length; name ++ ) {
|
||||||
Variation chip = (Variation) tracker.lookup(names[name], null);
|
Variation chip = (Variation) tracker.lookup(names[name], null);
|
||||||
debug.printLocusInfo(eval, names[name], ref, chip, loc);
|
|
||||||
SQuad<Integer> variantAndCall = update(eval, name, chip, Character.toUpperCase(ref), debug);
|
SQuad<Integer> variantAndCall = update(eval, name, chip, Character.toUpperCase(ref), debug);
|
||||||
nHapmapVariants += variantAndCall.getFirst();
|
nHapmapVariants += variantAndCall.getFirst();
|
||||||
nCorrectCalls += variantAndCall.getSecond();
|
nCorrectCalls += variantAndCall.getSecond();
|
||||||
hapmapCoverage += variantAndCall.getThird();
|
hapmapCoverage += variantAndCall.getThird();
|
||||||
updateFalseNegatives(hapmapCoverage,nHapmapVariants,variantAndCall.getFourth());
|
updateFalsePositives(hapmapCoverage,nHapmapVariants,variantAndCall.getFourth());
|
||||||
|
debug.printLocusInfo(eval, names[name], ref, chip, loc, variantAndCall.getFourth());
|
||||||
}
|
}
|
||||||
updatePoolCallConcordance(nHapmapVariants,nCorrectCalls);
|
updatePoolCallConcordance(nHapmapVariants,nCorrectCalls);
|
||||||
debug.printVerbose(String.format("%s Variants In Pool: %d Correctly Called: %d%n", loc.toString(), nHapmapVariants, nCorrectCalls));
|
debug.printVerbose(String.format("%s Variants In Pool: %d Correctly Called: %d%n", loc.toString(), nHapmapVariants, nCorrectCalls));
|
||||||
|
|
@ -187,12 +110,12 @@ class HapmapConcordanceTable {
|
||||||
poolCallConcordance[hapmapAllelesIndex][callConcordanceIndex]++;
|
poolCallConcordance[hapmapAllelesIndex][callConcordanceIndex]++;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void updateFalseNegatives(int coverage, int nHapmapVariants, int callConcordanceIndex) {
|
public void updateFalsePositives(int coverage, int nHapmapVariants, int callConcordanceIndex) {
|
||||||
if( coverage == truthByName.length ) {
|
if( coverage == truthByName.length ) {
|
||||||
if( nHapmapVariants == 0 ) {
|
if( nHapmapVariants == 0 ) {
|
||||||
if( callConcordanceIndex != NO_CALL ) {
|
if( callConcordanceIndex != NO_CALL ) {
|
||||||
if (callConcordanceIndex != NOVARIANT ) {
|
if (callConcordanceIndex != NOVARIANT ) {
|
||||||
absoluteFalseNegatives++;
|
absoluteFalsePositives++;
|
||||||
sitesWithFullDataAndNoSNPs++;
|
sitesWithFullDataAndNoSNPs++;
|
||||||
} else {
|
} else {
|
||||||
sitesWithFullDataAndNoSNPs++;
|
sitesWithFullDataAndNoSNPs++;
|
||||||
|
|
@ -225,7 +148,10 @@ class HapmapConcordanceTable {
|
||||||
if( eval == null ) {
|
if( eval == null ) {
|
||||||
callIndex = NO_CALL;
|
callIndex = NO_CALL;
|
||||||
isCorrectCallAndThereIsVariant = FALSE_INT;
|
isCorrectCallAndThereIsVariant = FALSE_INT;
|
||||||
} else if ( eval.getAlternateBases().charAt(0) == ref && chip.getAlternateBases().charAt(1) == ref ) {
|
} else if ( chip == null ) {
|
||||||
|
callIndex = DE_NOVO_CALL;
|
||||||
|
isCorrectCallAndThereIsVariant = FALSE_INT;
|
||||||
|
} else if ( eval.getAlternateBases().charAt(0) == ref && eval.getAlternateBases().charAt(1) == ref ) {
|
||||||
callIndex = NOVARIANT;
|
callIndex = NOVARIANT;
|
||||||
isCorrectCallAndThereIsVariant = FALSE_INT;
|
isCorrectCallAndThereIsVariant = FALSE_INT;
|
||||||
} else if ( callWrongBase(eval, chip, ref, debug) ) {
|
} else if ( callWrongBase(eval, chip, ref, debug) ) {
|
||||||
|
|
@ -249,6 +175,7 @@ class HapmapConcordanceTable {
|
||||||
// eval and chip guaranteed to be non-null
|
// eval and chip guaranteed to be non-null
|
||||||
char evalRef;
|
char evalRef;
|
||||||
char evalSNP;
|
char evalSNP;
|
||||||
|
|
||||||
if ( eval.getAlternateBases().charAt(0) == ref ) {
|
if ( eval.getAlternateBases().charAt(0) == ref ) {
|
||||||
evalRef = eval.getAlternateBases().charAt(0);
|
evalRef = eval.getAlternateBases().charAt(0);
|
||||||
evalSNP = eval.getAlternateBases().charAt(1);
|
evalSNP = eval.getAlternateBases().charAt(1);
|
||||||
|
|
@ -266,14 +193,32 @@ class HapmapConcordanceTable {
|
||||||
LinkedList<String> outLines = new LinkedList<String>();
|
LinkedList<String> outLines = new LinkedList<String>();
|
||||||
int numSingleSNPSites = poolCallConcordance[ONE_SNP][MATCH] + poolCallConcordance[ONE_SNP][MISMATCH];
|
int numSingleSNPSites = poolCallConcordance[ONE_SNP][MATCH] + poolCallConcordance[ONE_SNP][MISMATCH];
|
||||||
int numMultiSNPSites = poolCallConcordance[MULTI_SNP][MATCH] + poolCallConcordance[MULTI_SNP][MISMATCH];
|
int numMultiSNPSites = poolCallConcordance[MULTI_SNP][MATCH] + poolCallConcordance[MULTI_SNP][MISMATCH];
|
||||||
outLines.add(String.format("Sites with variants in One hapmap individual: %d%n", numSingleSNPSites));
|
int numSNPSites = numSingleSNPSites + numMultiSNPSites;
|
||||||
outLines.add(String.format("\tNumber called correctly: %d (%f)%n",poolCallConcordance[ONE_SNP][MATCH], ((double)poolCallConcordance[ONE_SNP][MATCH]/numSingleSNPSites)));
|
int numCorrect = poolCallConcordance[ONE_SNP][MATCH] + poolCallConcordance[MULTI_SNP][MATCH];
|
||||||
outLines.add(String.format("\tNumber called incorrectly: %d (%f)%n", poolCallConcordance[ONE_SNP][MISMATCH], ((double)poolCallConcordance[ONE_SNP][MISMATCH])/numSingleSNPSites));
|
int numIncorrect = poolCallConcordance[ONE_SNP][MISMATCH] + poolCallConcordance[MULTI_SNP][MISMATCH];
|
||||||
outLines.add(String.format("Sites with variants in multiple hapmap individuals: %d%n", numMultiSNPSites));
|
outLines.add(String.format("Number of Hapmap SNP Sites: %d", numSNPSites));
|
||||||
outLines.add(String.format("\tNumber called correctly: %d (%f)%n", poolCallConcordance[MULTI_SNP][MATCH], ((double)poolCallConcordance[MULTI_SNP][MISMATCH])/numMultiSNPSites));
|
outLines.add(String.format("\tNumber correctly called: %d (%f)", numCorrect, ((double) numCorrect)/numSNPSites));
|
||||||
outLines.add(String.format("\tNumber called incorrectly: %d (%f)%n", poolCallConcordance[MULTI_SNP][MISMATCH], ((double)poolCallConcordance[MULTI_SNP][MISMATCH])/numMultiSNPSites));
|
outLines.add(String.format("\tNumber incorrectly called: %d (%f)", numIncorrect, ((double) numIncorrect)/numSNPSites));
|
||||||
outLines.add(String.format("Number of called sites with chip data on all individuals: %d%n", sitesWithFullDataAndNoSNPs));
|
outLines.add(String.format("\tSites with variants in One hapmap individual: %d", numSingleSNPSites));
|
||||||
outLines.add(String.format("\tNumber incorrectly called SNPs: %d (%f)%n", absoluteFalseNegatives, ((double)absoluteFalseNegatives)/sitesWithFullDataAndNoSNPs));
|
outLines.add(String.format("\t\tNumber called correctly: %d (%f)",poolCallConcordance[ONE_SNP][MATCH], ((double)poolCallConcordance[ONE_SNP][MATCH]/numSingleSNPSites)));
|
||||||
|
outLines.add(String.format("\t\tNumber called incorrectly: %d (%f)", poolCallConcordance[ONE_SNP][MISMATCH], ((double)poolCallConcordance[ONE_SNP][MISMATCH])/numSingleSNPSites));
|
||||||
|
outLines.add(String.format("\tSites with variants in multiple hapmap individuals: %d", numMultiSNPSites));
|
||||||
|
outLines.add(String.format("\t\tNumber called correctly: %d (%f)", poolCallConcordance[MULTI_SNP][MATCH], ((double)poolCallConcordance[MULTI_SNP][MATCH])/numMultiSNPSites));
|
||||||
|
outLines.add(String.format("\t\tNumber called incorrectly: %d (%f)", poolCallConcordance[MULTI_SNP][MISMATCH], ((double)poolCallConcordance[MULTI_SNP][MISMATCH])/numMultiSNPSites));
|
||||||
|
outLines.add(String.format("\tNumber of called sites with homozygous reference chip data on all individuals: %d", sitesWithFullDataAndNoSNPs));
|
||||||
|
outLines.add(String.format("\t\tNumber incorrectly called SNPs: %d (%f)", absoluteFalsePositives, ((double)absoluteFalsePositives)/sitesWithFullDataAndNoSNPs));
|
||||||
|
return outLines;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> verboseOutput(String[] names) {
|
||||||
|
LinkedList<String> outLines = (LinkedList<String>) this.standardOutput();
|
||||||
|
for( int nameOffset = 0; nameOffset < names.length; nameOffset ++ ) {
|
||||||
|
outLines.add(String.format("Concordance for Hapmap individual %s:", names[nameOffset]));
|
||||||
|
int numSNPSites = truthByName[nameOffset][HET] + truthByName[nameOffset][HOM];
|
||||||
|
int numCallSites = callsByName[nameOffset][MATCH] + callsByName[nameOffset][MISMATCH];
|
||||||
|
outLines.add(String.format("hi %s", "hello"));
|
||||||
|
}
|
||||||
|
|
||||||
return outLines;
|
return outLines;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -347,11 +292,11 @@ class DebugWriter {
|
||||||
return verbosity;
|
return verbosity;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void printLocusInfo(Variation eval, String name, char ref, Variation chip, GenomeLoc loc) {
|
public void printLocusInfo(Variation eval, String name, char ref, Variation chip, GenomeLoc loc, int callIndex) {
|
||||||
if(verbosity == INFO || verbosity == ALL) {
|
if(verbosity == INFO || verbosity == ALL) {
|
||||||
String callStr = (eval == null) ? "NoCall" : eval.getAlternateBases();
|
String callStr = (eval == null) ? "NoCall" : eval.getAlternateBases();
|
||||||
String varStr = (chip == null) ? "NoChip" : chip.getAlternateBases();
|
String varStr = (chip == null) ? "NoChip" : chip.getAlternateBases();
|
||||||
this.print(String.format("%s %s Call: %s Chip: %s Ref: %s%n", name, loc, callStr, varStr, ref));
|
this.print(String.format("%s %s Call: %s Chip: %s Ref: %s CallIndex: %d%n", name, loc, callStr, varStr, ref, callIndex));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue