Completely refactored the Genotype Concordance module(s).
Now PooledConcordance and GenotypeConcordance inherit from the same super class (and can therefore share data structures and functionality). Also, they now use ConcordanceTruthTable to keep track of necessary info. GenotypeConcordance passes integration tests. PooledConcordance needs to be finished by Chris. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1979 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d549347f25
commit
0a55fa5bb1
|
|
@ -0,0 +1,144 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.FileReader;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
public abstract class ChipConcordance extends BasicVariantAnalysis {
|
||||
|
||||
// the array of truth tables used to store data in this analysis
|
||||
private ConcordanceTruthTable[] truthTables;
|
||||
|
||||
// mapping from sample name to index into truthTables for the corresponding table
|
||||
private Map<String, Integer> sampleToArrayIndex;
|
||||
|
||||
// list of all chip rods used
|
||||
private ArrayList<String> rodNames = new ArrayList<String>();
|
||||
|
||||
public ChipConcordance(final String name, final String chipName, boolean chipNameIsFile) {
|
||||
super(name);
|
||||
|
||||
// read sample names from file if appropriate, otherwise use the chip name
|
||||
if ( chipNameIsFile ) {
|
||||
List<String> sampleNames = readSampleNamesFromFile(chipName);
|
||||
rodNames.addAll(sampleNames);
|
||||
} else {
|
||||
rodNames.add(chipName);
|
||||
}
|
||||
|
||||
// initialize the truth table storage data
|
||||
sampleToArrayIndex = new HashMap<String, Integer>();
|
||||
truthTables = createTruthTableMappings(rodNames, sampleToArrayIndex);
|
||||
}
|
||||
|
||||
private List<String> readSampleNamesFromFile(String file) {
|
||||
ArrayList<String> samples = new ArrayList<String>();
|
||||
|
||||
try {
|
||||
BufferedReader reader = new BufferedReader(new FileReader(file));
|
||||
String line = reader.readLine();
|
||||
while ( line != null ) {
|
||||
samples.add(line);
|
||||
line = reader.readLine();
|
||||
}
|
||||
} catch( FileNotFoundException e) {
|
||||
throw new StingException("Chip file at "+file+" was not found. Please check filepath.");
|
||||
} catch( IOException e) {
|
||||
throw new StingException(e.getMessage());
|
||||
}
|
||||
|
||||
return samples;
|
||||
}
|
||||
|
||||
// these methods need to be implemented by subclasses
|
||||
protected abstract ConcordanceTruthTable[] createTruthTableMappings(List<String> rodNames, Map<String, Integer> sampleToArrayIndex);
|
||||
protected abstract void assertVariationIsValid(Variation eval);
|
||||
protected abstract HashMap<String, Genotype> makeGenotypeHash(List<Genotype> evals, List<String> rodNames);
|
||||
|
||||
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
// get all of the chip rods at this locus
|
||||
HashMap<String, Genotype> chips = new HashMap<String, Genotype>();
|
||||
for ( String name : rodNames ) {
|
||||
RODRecordList<ReferenceOrderedDatum> rods = tracker.getTrackData(name, null);
|
||||
Variation chip = (rods == null ? null : (Variation)rods.getRecords().get(0));
|
||||
if ( chip != null ) {
|
||||
// chips must be Genotypes
|
||||
if ( !(chip instanceof VariantBackedByGenotype) )
|
||||
throw new StingException("Failure: trying to analyze genotypes using non-genotype truth data");
|
||||
chips.put(name, ((VariantBackedByGenotype)chip).getCalledGenotype());
|
||||
}
|
||||
}
|
||||
|
||||
// don't procede if we have no truth data and no call
|
||||
if ( eval != null || chips.size() > 0 )
|
||||
inc(chips, eval, ref);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public void inc(Map<String, Genotype> chips, Variation eval, char ref) {
|
||||
|
||||
// each implementing class can decide whether the Variation is valid
|
||||
assertVariationIsValid(eval);
|
||||
|
||||
// This shouldn't happen, but let's check anyways to be safe
|
||||
if (BaseUtils.simpleBaseToBaseIndex(ref) == -1)
|
||||
return;
|
||||
|
||||
// create a hash of samples to their Genotypes
|
||||
List<Genotype> evals = (eval != null ? ((VariantBackedByGenotype)eval).getGenotypes() : new ArrayList<Genotype>());
|
||||
HashMap<String, Genotype> evalHash = makeGenotypeHash(evals, rodNames);
|
||||
|
||||
// make chip/eval Genotype pairs
|
||||
List<Pair<Genotype, Genotype>>[] chipEvals = new List[truthTables.length];
|
||||
for ( String name : rodNames ) {
|
||||
Genotype chipG = chips.get(name);
|
||||
Genotype evalG = evalHash.get(name);
|
||||
|
||||
if (chipG == null && evalG == null)
|
||||
continue;
|
||||
|
||||
int index = sampleToArrayIndex.get(name);
|
||||
if ( chipEvals[index] == null )
|
||||
chipEvals[index] = new ArrayList<Pair<Genotype, Genotype>>();
|
||||
chipEvals[index].add(new Pair<Genotype, Genotype>(chipG, evalG));
|
||||
}
|
||||
|
||||
// now we can finally update our truth tables with the truth vs. calls data
|
||||
for (int i = 0; i < truthTables.length; i++) {
|
||||
ConcordanceTruthTable table = truthTables[i];
|
||||
table.addEntry(chipEvals[i], eval, ref);
|
||||
}
|
||||
}
|
||||
|
||||
public List<String> done() {
|
||||
List<String> s = new ArrayList<String>();
|
||||
|
||||
// let the truth tables do all the printing
|
||||
for ( ConcordanceTruthTable table : truthTables ) {
|
||||
table.addAllStats(s);
|
||||
s.add("");
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,199 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||
|
||||
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
* <p/>
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*/
|
||||
public class ConcordanceTruthTable {
|
||||
|
||||
private static final int TRUE_POSITIVE = 0;
|
||||
private static final int TRUE_NEGATIVE = 1;
|
||||
private static final int FALSE_POSITIVE = 2;
|
||||
private static final int FALSE_NEGATIVE = 3;
|
||||
|
||||
private static final int REF = 0;
|
||||
private static final int VAR_HET = 1;
|
||||
private static final int VAR_HOM = 2;
|
||||
private static final int UNKNOWN = 3;
|
||||
private static final int NO_CALL = 3; // synonym
|
||||
private static final String[] TRUTH_NAMES = {"IS_REF", "IS_VAR_HET", "IS_VAR_HOM", "UNKNOWN"};
|
||||
private static final String[] CALL_NAMES = {"CALLED_REF", "CALLED_VAR_HET", "CALLED_VAR_HOM", "NO_CALL"};
|
||||
|
||||
private String name = null;
|
||||
private boolean singleSampleMode;
|
||||
|
||||
private int[][] table;
|
||||
private int[] truth_totals;
|
||||
private int[] calls_totals;
|
||||
|
||||
public ConcordanceTruthTable(String name) {
|
||||
// there's a specific sample associated with this truth table
|
||||
this.name = name;
|
||||
singleSampleMode = true;
|
||||
|
||||
table = new int[4][4];
|
||||
truth_totals = new int[4];
|
||||
calls_totals = new int[4];
|
||||
for (int i = 0; i < 4; i++) {
|
||||
truth_totals[i] = 0;
|
||||
calls_totals[i] = 0;
|
||||
for (int j = 0; j < 4; j++)
|
||||
table[i][j] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
public ConcordanceTruthTable() {
|
||||
// there's no specific sample associated with this truth table
|
||||
singleSampleMode = false;
|
||||
}
|
||||
|
||||
public void addEntry(List<Pair<Genotype, Genotype>> chipEvals, Variation eval, char ref) {
|
||||
|
||||
// if the table represents a single sample, then we can calculate genotype stats
|
||||
if ( singleSampleMode ) {
|
||||
for ( Pair<Genotype, Genotype> chipEval : chipEvals ) {
|
||||
|
||||
Genotype chipG = chipEval.first;
|
||||
Genotype evalG = chipEval.second;
|
||||
|
||||
if (chipG == null && evalG == null)
|
||||
continue;
|
||||
|
||||
int truthType = getGenotype(chipG, ref);
|
||||
int callType = getGenotype(evalG, ref);
|
||||
|
||||
//System.out.printf("TEST: %d/%d %s vs. %s%n", truthIndex, callIndex, chip, eval);
|
||||
addGenotypeEntry(truthType, callType);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO -- implement me for pooled mode with frequency stats
|
||||
// TODO -- You'll want to use eval and the chips from chipEvals (these are the first members of the pair)
|
||||
// TODO -- You'll also need to declare (and initialize) the relevant data arrays for the data
|
||||
// TODO -- Indexes like TRUE_POSITIVE are defined above for you
|
||||
}
|
||||
|
||||
private static int getGenotype(Genotype g, char ref) {
|
||||
int type;
|
||||
|
||||
if ( g == null )
|
||||
type = NO_CALL;
|
||||
else if ( !g.isVariant(ref) )
|
||||
type = REF;
|
||||
else if ( g.isHet() )
|
||||
type = VAR_HET;
|
||||
else
|
||||
type = VAR_HOM;
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
private void addGenotypeEntry(int truthIndex, int callIndex) {
|
||||
table[truthIndex][callIndex]++;
|
||||
truth_totals[truthIndex]++;
|
||||
calls_totals[callIndex]++;
|
||||
}
|
||||
|
||||
public void addAllStats(List<String> s) {
|
||||
if ( singleSampleMode )
|
||||
addGenotypeStats(s);
|
||||
else
|
||||
addFrequencyStats(s);
|
||||
}
|
||||
|
||||
private void addFrequencyStats(List<String> s) {
|
||||
|
||||
// TODO -- implement me for pooled mode with frequency stats
|
||||
|
||||
}
|
||||
|
||||
private void addGenotypeStats(List<String> s) {
|
||||
s.add(String.format("name %s", name));
|
||||
s.add(String.format("TRUTH_STATE\tCALLED_REF\tCALLED_VAR_HET\tCALLED_VAR_HOM\tNO_CALL\t\tTOTALS\tTRUE_GENOTYPE_CONCORDANCE\tGENOTYPE_SENSITIVITY"));
|
||||
for (int i = 0; i < 4; i++) {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
sb.append(String.format("%15s ", TRUTH_NAMES[i]));
|
||||
for (int j = 0; j < 4; j++)
|
||||
sb.append(String.format("%9d ", table[i][j]));
|
||||
sb.append(String.format("%9d ", truth_totals[i]));
|
||||
if (i == VAR_HET || i == VAR_HOM) {
|
||||
sb.append(String.format("\t%s\t\t", cellPercent(table[i][i], table[i][REF] + table[i][VAR_HET] + table[i][VAR_HOM])));
|
||||
sb.append(String.format("%s", cellPercent(truth_totals[i] - table[i][NO_CALL], truth_totals[i])));
|
||||
} else {
|
||||
sb.append("\tN/A\t\t\tN/A");
|
||||
}
|
||||
s.add(sb.toString());
|
||||
}
|
||||
|
||||
addCalledGenotypeConcordance(s);
|
||||
addOverallStats(s);
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int j = 0; j < 4; j++) {
|
||||
s.add(String.format("%s_%s_%s %d", TRUTH_NAMES[i], CALL_NAMES[j], "NO_SITES", table[i][j]));
|
||||
s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_TRUTH", cellPercent(table[i][j], truth_totals[i])));
|
||||
s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_CALLS", cellPercent(table[i][j], calls_totals[j])));
|
||||
}
|
||||
if (i == VAR_HET || i == VAR_HOM) {
|
||||
s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "TRUE_GENOTYPE_CONCORDANCE", cellPercent(table[i][i], table[i][REF] + table[i][VAR_HET] + table[i][VAR_HOM])));
|
||||
s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "GENOTYPE_SENSITIVITY", cellPercent(truth_totals[i] - table[i][NO_CALL], truth_totals[i])));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void addCalledGenotypeConcordance(List<String> s) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("CALLED_GENOTYPE_CONCORDANCE\t");
|
||||
for (int i = 0; i < 4; i++) {
|
||||
int nConcordantCallsI = table[i][i];
|
||||
String value = "N/A";
|
||||
if (i != UNKNOWN)
|
||||
value = String.format("%s\t", cellPercent(nConcordantCallsI, calls_totals[i] - table[UNKNOWN][i]));
|
||||
sb.append(value);
|
||||
}
|
||||
s.add(sb.toString());
|
||||
}
|
||||
|
||||
// How many overall calls where made that aren't NO_CALLS or UNKNOWNS?
|
||||
private int getNCalled() {
|
||||
int n = 0;
|
||||
for (int i = 0; i < 4; i++)
|
||||
for (int j = 0; j < 4; j++)
|
||||
if (i != NO_CALL && j != NO_CALL) n += table[i][j];
|
||||
return n;
|
||||
}
|
||||
|
||||
private void addOverallStats(List<String> s) {
|
||||
int nConcordantRefCalls = table[REF][REF];
|
||||
int nConcordantHetCalls = table[VAR_HET][VAR_HET];
|
||||
int nConcordantVarHomCalls = table[VAR_HOM][VAR_HOM];
|
||||
int nVarCalls = table[VAR_HOM][VAR_HET] + table[VAR_HOM][VAR_HOM] + table[VAR_HET][VAR_HET] + table[VAR_HET][VAR_HOM];
|
||||
int nConcordantVarCalls = nConcordantHetCalls + nConcordantVarHomCalls;
|
||||
int nConcordantCalls = nConcordantRefCalls + nConcordantVarCalls;
|
||||
int nTrueVar = truth_totals[VAR_HET] + truth_totals[VAR_HOM];
|
||||
int nCalled = getNCalled();
|
||||
s.add(String.format("VARIANT_SENSITIVITY %s", cellPercent(nVarCalls, nTrueVar)));
|
||||
s.add(String.format("VARIANT_CONCORDANCE %s", cellPercent(nConcordantVarCalls, nVarCalls)));
|
||||
s.add(String.format("OVERALL_CONCORDANCE %s", cellPercent(nConcordantCalls, nCalled)));
|
||||
}
|
||||
|
||||
private static String cellPercent(int count, int total) {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
total = Math.max(total, 0);
|
||||
sb.append(String.format("%.2f", (100.0 * count) / total));
|
||||
sb.append("%");
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,10 +1,5 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.RODRecordList;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
|
||||
|
|
@ -19,84 +14,35 @@ import java.util.*;
|
|||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*/
|
||||
public class GenotypeConcordance extends BasicVariantAnalysis implements GenotypeAnalysis {
|
||||
public class GenotypeConcordance extends ChipConcordance implements GenotypeAnalysis {
|
||||
|
||||
private static final int REF = 0;
|
||||
private static final int VAR_HET = 1;
|
||||
private static final int VAR_HOM = 2;
|
||||
private static final int UNKNOWN = 3;
|
||||
private static final int NO_CALL = 3; // synonym
|
||||
private static final String[] TRUTH_NAMES = {"IS_REF", "IS_VAR_HET", "IS_VAR_HOM", "UNKNOWN"};
|
||||
private static final String[] CALL_NAMES = {"CALLED_REF", "CALLED_VAR_HET", "CALLED_VAR_HOM", "NO_CALL"};
|
||||
|
||||
private ArrayList<String> rodNames = new ArrayList<String>();
|
||||
private HashMap<String, TruthTable> tables = new HashMap<String, TruthTable>();
|
||||
|
||||
public GenotypeConcordance(final String name) {
|
||||
super("genotype_concordance");
|
||||
rodNames.add(name);
|
||||
initialize();
|
||||
public GenotypeConcordance(final String chipName, boolean chipNameIsFile) {
|
||||
super("genotype_concordance", chipName, chipNameIsFile);
|
||||
}
|
||||
|
||||
public GenotypeConcordance(final List<String> names) {
|
||||
super("genotype_concordance");
|
||||
rodNames.addAll(names);
|
||||
initialize();
|
||||
}
|
||||
|
||||
private void initialize() {
|
||||
for ( String name : rodNames )
|
||||
tables.put(name, new TruthTable());
|
||||
}
|
||||
|
||||
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
|
||||
// get all of the chip rods at this locus
|
||||
HashMap<String, Genotype> chips = new HashMap<String, Genotype>();
|
||||
for ( String name : rodNames ) {
|
||||
RODRecordList<ReferenceOrderedDatum> rods = tracker.getTrackData(name, null);
|
||||
Variation chip = (rods == null ? null : (Variation)rods.getRecords().get(0));
|
||||
if ( chip != null ) {
|
||||
if ( !(chip instanceof VariantBackedByGenotype) )
|
||||
throw new StingException("Failure: trying to analyze genotypes using non-genotype truth data");
|
||||
chips.put(name, ((VariantBackedByGenotype)chip).getCalledGenotype());
|
||||
}
|
||||
}
|
||||
|
||||
protected void assertVariationIsValid(Variation eval) {
|
||||
// must be a genotype
|
||||
if ( eval != null && !(eval instanceof VariantBackedByGenotype) )
|
||||
throw new StingException("Failure: trying to analyze genotypes of non-genotype data");
|
||||
|
||||
// don't procede if we have no truth data and no call
|
||||
if ( eval != null || chips.size() > 0 )
|
||||
inc(chips, (eval != null ? ((VariantBackedByGenotype)eval).getGenotypes() : new ArrayList<Genotype>()), ref);
|
||||
return null;
|
||||
throw new StingException("Failure: trying to analyze genotypes of non-genotype data");
|
||||
}
|
||||
|
||||
public void inc(Map<String, Genotype> chips, List<Genotype> evals, char ref) {
|
||||
protected ConcordanceTruthTable[] createTruthTableMappings(List<String> rodNames, Map<String, Integer> sampleToArrayIndex) {
|
||||
ConcordanceTruthTable[] tables = new ConcordanceTruthTable[rodNames.size()];
|
||||
|
||||
// This shouldn't happen, but let's check anyways
|
||||
if (BaseUtils.simpleBaseToBaseIndex(ref) == -1)
|
||||
return;
|
||||
|
||||
HashMap<String, Genotype> evalHash = makeGenotypeHash(evals);
|
||||
|
||||
for ( String name : rodNames ) {
|
||||
Genotype chip = chips.get(name);
|
||||
Genotype eval = evalHash.get(name);
|
||||
|
||||
if (chip == null && eval == null)
|
||||
continue;
|
||||
|
||||
int truthType = getGenotypeType(chip, ref);
|
||||
int callType = getGenotypeType(eval, ref);
|
||||
TruthTable table = tables.get(name);
|
||||
|
||||
//System.out.printf("TEST: %d/%d %s vs. %s%n", truthIndex, callIndex, chip, eval);
|
||||
table.addEntry(truthType, callType);
|
||||
// each sample gets its own truth table
|
||||
for (int i = 0; i < rodNames.size(); i++) {
|
||||
String name = rodNames.get(i);
|
||||
tables[i] = new ConcordanceTruthTable(name);
|
||||
sampleToArrayIndex.put(name, i);
|
||||
}
|
||||
|
||||
return tables;
|
||||
}
|
||||
|
||||
private HashMap<String, Genotype> makeGenotypeHash(List<Genotype> evals) {
|
||||
protected HashMap<String, Genotype> makeGenotypeHash(List<Genotype> evals, List<String> rodNames) {
|
||||
HashMap<String, Genotype> hash = new HashMap<String, Genotype>();
|
||||
|
||||
// associate each Genotype with the appropriate sample
|
||||
for ( Genotype eval : evals ) {
|
||||
if ( eval instanceof SampleBacked )
|
||||
hash.put(((SampleBacked)eval).getSampleName(), eval);
|
||||
|
|
@ -107,130 +53,4 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
|
|||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
private static int getGenotypeType(Genotype g, char ref) {
|
||||
int type;
|
||||
|
||||
if ( g == null )
|
||||
type = NO_CALL;
|
||||
else if ( !g.isVariant(ref) )
|
||||
type = REF;
|
||||
else if ( g.isHet() )
|
||||
type = VAR_HET;
|
||||
else
|
||||
type = VAR_HOM;
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
public List<String> done() {
|
||||
List<String> s = new ArrayList<String>();
|
||||
|
||||
for ( String name : rodNames ) {
|
||||
TruthTable table = tables.get(name);
|
||||
table.addAllStats(s, name);
|
||||
s.add("");
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
private static String cellPercent(int count, int total) {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
total = Math.max(total, 0);
|
||||
sb.append(String.format("%.2f", (100.0 * count) / total));
|
||||
sb.append("%");
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private class TruthTable {
|
||||
int[][] table = new int[4][4];
|
||||
int[] truth_totals = new int[4];
|
||||
int[] calls_totals = new int[4];
|
||||
|
||||
public TruthTable() {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
truth_totals[i] = 0;
|
||||
calls_totals[i] = 0;
|
||||
for (int j = 0; j < 4; j++)
|
||||
table[i][j] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
public void addEntry(int truthIndex, int callIndex) {
|
||||
table[truthIndex][callIndex]++;
|
||||
truth_totals[truthIndex]++;
|
||||
calls_totals[callIndex]++;
|
||||
}
|
||||
|
||||
public void addAllStats(List<String> s, String name) {
|
||||
s.add(String.format("name %s", name));
|
||||
s.add(String.format("TRUTH_STATE\tCALLED_REF\tCALLED_VAR_HET\tCALLED_VAR_HOM\tNO_CALL\t\tTOTALS\tTRUE_GENOTYPE_CONCORDANCE\tGENOTYPE_SENSITIVITY"));
|
||||
for (int i = 0; i < 4; i++) {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
sb.append(String.format("%15s ", TRUTH_NAMES[i]));
|
||||
for (int j = 0; j < 4; j++)
|
||||
sb.append(String.format("%9d ", table[i][j]));
|
||||
sb.append(String.format("%9d ", truth_totals[i]));
|
||||
if (i == VAR_HET || i == VAR_HOM) {
|
||||
sb.append(String.format("\t%s\t\t", cellPercent(table[i][i], table[i][REF] + table[i][VAR_HET] + table[i][VAR_HOM])));
|
||||
sb.append(String.format("%s", cellPercent(truth_totals[i] - table[i][NO_CALL], truth_totals[i])));
|
||||
} else {
|
||||
sb.append("\tN/A\t\t\tN/A");
|
||||
}
|
||||
s.add(sb.toString());
|
||||
}
|
||||
|
||||
addCalledGenotypeConcordance(s);
|
||||
addOverallStats(s);
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int j = 0; j < 4; j++) {
|
||||
s.add(String.format("%s_%s_%s %d", TRUTH_NAMES[i], CALL_NAMES[j], "NO_SITES", table[i][j]));
|
||||
s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_TRUTH", cellPercent(table[i][j], truth_totals[i])));
|
||||
s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_CALLS", cellPercent(table[i][j], calls_totals[j])));
|
||||
}
|
||||
if (i == VAR_HET || i == VAR_HOM) {
|
||||
s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "TRUE_GENOTYPE_CONCORDANCE", cellPercent(table[i][i], table[i][REF] + table[i][VAR_HET] + table[i][VAR_HOM])));
|
||||
s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "GENOTYPE_SENSITIVITY", cellPercent(truth_totals[i] - table[i][NO_CALL], truth_totals[i])));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void addCalledGenotypeConcordance(List<String> s) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("CALLED_GENOTYPE_CONCORDANCE\t");
|
||||
for (int i = 0; i < 4; i++) {
|
||||
int nConcordantCallsI = table[i][i];
|
||||
String value = "N/A";
|
||||
if (i != UNKNOWN)
|
||||
value = String.format("%s\t", cellPercent(nConcordantCallsI, calls_totals[i] - table[UNKNOWN][i]));
|
||||
sb.append(value);
|
||||
}
|
||||
s.add(sb.toString());
|
||||
}
|
||||
|
||||
// How many overall calls where made that aren't NO_CALLS or UNKNOWNS?
|
||||
private int getNCalled() {
|
||||
int n = 0;
|
||||
for (int i = 0; i < 4; i++)
|
||||
for (int j = 0; j < 4; j++)
|
||||
if (i != NO_CALL && j != NO_CALL) n += table[i][j];
|
||||
return n;
|
||||
}
|
||||
|
||||
private void addOverallStats(List<String> s) {
|
||||
int nConcordantRefCalls = table[REF][REF];
|
||||
int nConcordantHetCalls = table[VAR_HET][VAR_HET];
|
||||
int nConcordantVarHomCalls = table[VAR_HOM][VAR_HOM];
|
||||
int nVarCalls = table[VAR_HOM][VAR_HET] + table[VAR_HOM][VAR_HOM] + table[VAR_HET][VAR_HET] + table[VAR_HET][VAR_HOM];
|
||||
int nConcordantVarCalls = nConcordantHetCalls + nConcordantVarHomCalls;
|
||||
int nConcordantCalls = nConcordantRefCalls + nConcordantVarCalls;
|
||||
int nTrueVar = truth_totals[VAR_HET] + truth_totals[VAR_HOM];
|
||||
int nCalled = getNCalled();
|
||||
s.add(String.format("VARIANT_SENSITIVITY %s", cellPercent(nVarCalls, nTrueVar)));
|
||||
s.add(String.format("VARIANT_CONCORDANCE %s", cellPercent(nConcordantVarCalls, nVarCalls)));
|
||||
s.add(String.format("OVERALL_CONCORDANCE %s", cellPercent(nConcordantCalls, nCalled)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,54 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.RodVCF;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
* <p/>
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*/
|
||||
public class PooledConcordance extends ChipConcordance implements PoolAnalysis {
|
||||
|
||||
public PooledConcordance(final String chipName, boolean chipNameIsFile) {
|
||||
super("pooled_concordance", chipName, chipNameIsFile);
|
||||
}
|
||||
|
||||
protected void assertVariationIsValid(Variation eval) {
|
||||
// we only support VCF for now
|
||||
if ( eval != null && !(eval instanceof RodVCF) )
|
||||
throw new StingException("Failure: we currently only support analyzing pooled data in VCF format");
|
||||
}
|
||||
|
||||
protected ConcordanceTruthTable[] createTruthTableMappings(List<String> rodNames, Map<String, Integer> sampleToArrayIndex) {
|
||||
// there is only one truth table - the pool-wide one
|
||||
ConcordanceTruthTable[] tables = new ConcordanceTruthTable[1];
|
||||
tables[0] = new ConcordanceTruthTable();
|
||||
|
||||
for ( String name : rodNames ) {
|
||||
sampleToArrayIndex.put(name, 0);
|
||||
}
|
||||
|
||||
return tables;
|
||||
}
|
||||
|
||||
protected HashMap<String, Genotype> makeGenotypeHash(List<Genotype> evals, List<String> rodNames) {
|
||||
|
||||
HashMap<String, Genotype> hash = new HashMap<String, Genotype>();
|
||||
|
||||
// te Genotype is pool-wide so all samples are associated with it
|
||||
if ( evals.size() > 0 ) {
|
||||
for ( String name : rodNames )
|
||||
hash.put(name, evals.get(0));
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
}
|
||||
|
|
@ -133,7 +133,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
|||
analyses.add(new VariantCounter());
|
||||
analyses.add(new VariantDBCoverage(knownSNPDBName));
|
||||
analyses.add(new PooledFrequencyAnalysis(numPeopleInPool,knownSNPDBName));
|
||||
analyses.add(new GenotypeConcordance(genotypeChipName));
|
||||
if ( samplesFile == null )
|
||||
analyses.add(new GenotypeConcordance(genotypeChipName, false));
|
||||
else
|
||||
analyses.add(new GenotypeConcordance(samplesFile, true));
|
||||
analyses.add(new TransitionTranversionAnalysis());
|
||||
analyses.add(new NeighborDistanceAnalysis());
|
||||
analyses.add(new HardyWeinbergEquilibrium(badHWEThreshold));
|
||||
|
|
|
|||
Loading…
Reference in New Issue