Completely refactored the Genotype Concordance module(s).

Now PooledConcordance and GenotypeConcordance inherit from the same super class (and can therefore share data structures and functionality).  Also, they now use ConcordanceTruthTable to keep track of necessary info.
GenotypeConcordance passes integration tests.
PooledConcordance needs to be finished by Chris.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1979 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-11-06 16:27:16 +00:00
parent d549347f25
commit 0a55fa5bb1
5 changed files with 419 additions and 199 deletions

View File

@ -0,0 +1,144 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import java.util.*;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.FileNotFoundException;
import java.io.IOException;
/**
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/
public abstract class ChipConcordance extends BasicVariantAnalysis {
// the array of truth tables used to store data in this analysis
private ConcordanceTruthTable[] truthTables;
// mapping from sample name to index into truthTables for the corresponding table
private Map<String, Integer> sampleToArrayIndex;
// list of all chip rods used
private ArrayList<String> rodNames = new ArrayList<String>();
public ChipConcordance(final String name, final String chipName, boolean chipNameIsFile) {
super(name);
// read sample names from file if appropriate, otherwise use the chip name
if ( chipNameIsFile ) {
List<String> sampleNames = readSampleNamesFromFile(chipName);
rodNames.addAll(sampleNames);
} else {
rodNames.add(chipName);
}
// initialize the truth table storage data
sampleToArrayIndex = new HashMap<String, Integer>();
truthTables = createTruthTableMappings(rodNames, sampleToArrayIndex);
}
private List<String> readSampleNamesFromFile(String file) {
ArrayList<String> samples = new ArrayList<String>();
try {
BufferedReader reader = new BufferedReader(new FileReader(file));
String line = reader.readLine();
while ( line != null ) {
samples.add(line);
line = reader.readLine();
}
} catch( FileNotFoundException e) {
throw new StingException("Chip file at "+file+" was not found. Please check filepath.");
} catch( IOException e) {
throw new StingException(e.getMessage());
}
return samples;
}
// these methods need to be implemented by subclasses
protected abstract ConcordanceTruthTable[] createTruthTableMappings(List<String> rodNames, Map<String, Integer> sampleToArrayIndex);
protected abstract void assertVariationIsValid(Variation eval);
protected abstract HashMap<String, Genotype> makeGenotypeHash(List<Genotype> evals, List<String> rodNames);
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
// get all of the chip rods at this locus
HashMap<String, Genotype> chips = new HashMap<String, Genotype>();
for ( String name : rodNames ) {
RODRecordList<ReferenceOrderedDatum> rods = tracker.getTrackData(name, null);
Variation chip = (rods == null ? null : (Variation)rods.getRecords().get(0));
if ( chip != null ) {
// chips must be Genotypes
if ( !(chip instanceof VariantBackedByGenotype) )
throw new StingException("Failure: trying to analyze genotypes using non-genotype truth data");
chips.put(name, ((VariantBackedByGenotype)chip).getCalledGenotype());
}
}
// don't procede if we have no truth data and no call
if ( eval != null || chips.size() > 0 )
inc(chips, eval, ref);
return null;
}
public void inc(Map<String, Genotype> chips, Variation eval, char ref) {
// each implementing class can decide whether the Variation is valid
assertVariationIsValid(eval);
// This shouldn't happen, but let's check anyways to be safe
if (BaseUtils.simpleBaseToBaseIndex(ref) == -1)
return;
// create a hash of samples to their Genotypes
List<Genotype> evals = (eval != null ? ((VariantBackedByGenotype)eval).getGenotypes() : new ArrayList<Genotype>());
HashMap<String, Genotype> evalHash = makeGenotypeHash(evals, rodNames);
// make chip/eval Genotype pairs
List<Pair<Genotype, Genotype>>[] chipEvals = new List[truthTables.length];
for ( String name : rodNames ) {
Genotype chipG = chips.get(name);
Genotype evalG = evalHash.get(name);
if (chipG == null && evalG == null)
continue;
int index = sampleToArrayIndex.get(name);
if ( chipEvals[index] == null )
chipEvals[index] = new ArrayList<Pair<Genotype, Genotype>>();
chipEvals[index].add(new Pair<Genotype, Genotype>(chipG, evalG));
}
// now we can finally update our truth tables with the truth vs. calls data
for (int i = 0; i < truthTables.length; i++) {
ConcordanceTruthTable table = truthTables[i];
table.addEntry(chipEvals[i], eval, ref);
}
}
public List<String> done() {
List<String> s = new ArrayList<String>();
// let the truth tables do all the printing
for ( ConcordanceTruthTable table : truthTables ) {
table.addAllStats(s);
s.add("");
}
return s;
}
}

View File

@ -0,0 +1,199 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.Pair;
import java.util.*;
/**
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
* <p/>
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*/
public class ConcordanceTruthTable {
private static final int TRUE_POSITIVE = 0;
private static final int TRUE_NEGATIVE = 1;
private static final int FALSE_POSITIVE = 2;
private static final int FALSE_NEGATIVE = 3;
private static final int REF = 0;
private static final int VAR_HET = 1;
private static final int VAR_HOM = 2;
private static final int UNKNOWN = 3;
private static final int NO_CALL = 3; // synonym
private static final String[] TRUTH_NAMES = {"IS_REF", "IS_VAR_HET", "IS_VAR_HOM", "UNKNOWN"};
private static final String[] CALL_NAMES = {"CALLED_REF", "CALLED_VAR_HET", "CALLED_VAR_HOM", "NO_CALL"};
private String name = null;
private boolean singleSampleMode;
private int[][] table;
private int[] truth_totals;
private int[] calls_totals;
public ConcordanceTruthTable(String name) {
// there's a specific sample associated with this truth table
this.name = name;
singleSampleMode = true;
table = new int[4][4];
truth_totals = new int[4];
calls_totals = new int[4];
for (int i = 0; i < 4; i++) {
truth_totals[i] = 0;
calls_totals[i] = 0;
for (int j = 0; j < 4; j++)
table[i][j] = 0;
}
}
public ConcordanceTruthTable() {
// there's no specific sample associated with this truth table
singleSampleMode = false;
}
public void addEntry(List<Pair<Genotype, Genotype>> chipEvals, Variation eval, char ref) {
// if the table represents a single sample, then we can calculate genotype stats
if ( singleSampleMode ) {
for ( Pair<Genotype, Genotype> chipEval : chipEvals ) {
Genotype chipG = chipEval.first;
Genotype evalG = chipEval.second;
if (chipG == null && evalG == null)
continue;
int truthType = getGenotype(chipG, ref);
int callType = getGenotype(evalG, ref);
//System.out.printf("TEST: %d/%d %s vs. %s%n", truthIndex, callIndex, chip, eval);
addGenotypeEntry(truthType, callType);
}
}
// TODO -- implement me for pooled mode with frequency stats
// TODO -- You'll want to use eval and the chips from chipEvals (these are the first members of the pair)
// TODO -- You'll also need to declare (and initialize) the relevant data arrays for the data
// TODO -- Indexes like TRUE_POSITIVE are defined above for you
}
private static int getGenotype(Genotype g, char ref) {
int type;
if ( g == null )
type = NO_CALL;
else if ( !g.isVariant(ref) )
type = REF;
else if ( g.isHet() )
type = VAR_HET;
else
type = VAR_HOM;
return type;
}
private void addGenotypeEntry(int truthIndex, int callIndex) {
table[truthIndex][callIndex]++;
truth_totals[truthIndex]++;
calls_totals[callIndex]++;
}
public void addAllStats(List<String> s) {
if ( singleSampleMode )
addGenotypeStats(s);
else
addFrequencyStats(s);
}
private void addFrequencyStats(List<String> s) {
// TODO -- implement me for pooled mode with frequency stats
}
private void addGenotypeStats(List<String> s) {
s.add(String.format("name %s", name));
s.add(String.format("TRUTH_STATE\tCALLED_REF\tCALLED_VAR_HET\tCALLED_VAR_HOM\tNO_CALL\t\tTOTALS\tTRUE_GENOTYPE_CONCORDANCE\tGENOTYPE_SENSITIVITY"));
for (int i = 0; i < 4; i++) {
StringBuffer sb = new StringBuffer();
sb.append(String.format("%15s ", TRUTH_NAMES[i]));
for (int j = 0; j < 4; j++)
sb.append(String.format("%9d ", table[i][j]));
sb.append(String.format("%9d ", truth_totals[i]));
if (i == VAR_HET || i == VAR_HOM) {
sb.append(String.format("\t%s\t\t", cellPercent(table[i][i], table[i][REF] + table[i][VAR_HET] + table[i][VAR_HOM])));
sb.append(String.format("%s", cellPercent(truth_totals[i] - table[i][NO_CALL], truth_totals[i])));
} else {
sb.append("\tN/A\t\t\tN/A");
}
s.add(sb.toString());
}
addCalledGenotypeConcordance(s);
addOverallStats(s);
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
s.add(String.format("%s_%s_%s %d", TRUTH_NAMES[i], CALL_NAMES[j], "NO_SITES", table[i][j]));
s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_TRUTH", cellPercent(table[i][j], truth_totals[i])));
s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_CALLS", cellPercent(table[i][j], calls_totals[j])));
}
if (i == VAR_HET || i == VAR_HOM) {
s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "TRUE_GENOTYPE_CONCORDANCE", cellPercent(table[i][i], table[i][REF] + table[i][VAR_HET] + table[i][VAR_HOM])));
s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "GENOTYPE_SENSITIVITY", cellPercent(truth_totals[i] - table[i][NO_CALL], truth_totals[i])));
}
}
}
private void addCalledGenotypeConcordance(List<String> s) {
StringBuilder sb = new StringBuilder();
sb.append("CALLED_GENOTYPE_CONCORDANCE\t");
for (int i = 0; i < 4; i++) {
int nConcordantCallsI = table[i][i];
String value = "N/A";
if (i != UNKNOWN)
value = String.format("%s\t", cellPercent(nConcordantCallsI, calls_totals[i] - table[UNKNOWN][i]));
sb.append(value);
}
s.add(sb.toString());
}
// How many overall calls where made that aren't NO_CALLS or UNKNOWNS?
private int getNCalled() {
int n = 0;
for (int i = 0; i < 4; i++)
for (int j = 0; j < 4; j++)
if (i != NO_CALL && j != NO_CALL) n += table[i][j];
return n;
}
private void addOverallStats(List<String> s) {
int nConcordantRefCalls = table[REF][REF];
int nConcordantHetCalls = table[VAR_HET][VAR_HET];
int nConcordantVarHomCalls = table[VAR_HOM][VAR_HOM];
int nVarCalls = table[VAR_HOM][VAR_HET] + table[VAR_HOM][VAR_HOM] + table[VAR_HET][VAR_HET] + table[VAR_HET][VAR_HOM];
int nConcordantVarCalls = nConcordantHetCalls + nConcordantVarHomCalls;
int nConcordantCalls = nConcordantRefCalls + nConcordantVarCalls;
int nTrueVar = truth_totals[VAR_HET] + truth_totals[VAR_HOM];
int nCalled = getNCalled();
s.add(String.format("VARIANT_SENSITIVITY %s", cellPercent(nVarCalls, nTrueVar)));
s.add(String.format("VARIANT_CONCORDANCE %s", cellPercent(nConcordantVarCalls, nVarCalls)));
s.add(String.format("OVERALL_CONCORDANCE %s", cellPercent(nConcordantCalls, nCalled)));
}
private static String cellPercent(int count, int total) {
StringBuffer sb = new StringBuffer();
total = Math.max(total, 0);
sb.append(String.format("%.2f", (100.0 * count) / total));
sb.append("%");
return sb.toString();
}
}

View File

@ -1,10 +1,5 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.RODRecordList;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.*;
@ -19,84 +14,35 @@ import java.util.*;
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*/
public class GenotypeConcordance extends BasicVariantAnalysis implements GenotypeAnalysis {
public class GenotypeConcordance extends ChipConcordance implements GenotypeAnalysis {
private static final int REF = 0;
private static final int VAR_HET = 1;
private static final int VAR_HOM = 2;
private static final int UNKNOWN = 3;
private static final int NO_CALL = 3; // synonym
private static final String[] TRUTH_NAMES = {"IS_REF", "IS_VAR_HET", "IS_VAR_HOM", "UNKNOWN"};
private static final String[] CALL_NAMES = {"CALLED_REF", "CALLED_VAR_HET", "CALLED_VAR_HOM", "NO_CALL"};
private ArrayList<String> rodNames = new ArrayList<String>();
private HashMap<String, TruthTable> tables = new HashMap<String, TruthTable>();
public GenotypeConcordance(final String name) {
super("genotype_concordance");
rodNames.add(name);
initialize();
public GenotypeConcordance(final String chipName, boolean chipNameIsFile) {
super("genotype_concordance", chipName, chipNameIsFile);
}
public GenotypeConcordance(final List<String> names) {
super("genotype_concordance");
rodNames.addAll(names);
initialize();
}
private void initialize() {
for ( String name : rodNames )
tables.put(name, new TruthTable());
}
public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
// get all of the chip rods at this locus
HashMap<String, Genotype> chips = new HashMap<String, Genotype>();
for ( String name : rodNames ) {
RODRecordList<ReferenceOrderedDatum> rods = tracker.getTrackData(name, null);
Variation chip = (rods == null ? null : (Variation)rods.getRecords().get(0));
if ( chip != null ) {
if ( !(chip instanceof VariantBackedByGenotype) )
throw new StingException("Failure: trying to analyze genotypes using non-genotype truth data");
chips.put(name, ((VariantBackedByGenotype)chip).getCalledGenotype());
}
}
protected void assertVariationIsValid(Variation eval) {
// must be a genotype
if ( eval != null && !(eval instanceof VariantBackedByGenotype) )
throw new StingException("Failure: trying to analyze genotypes of non-genotype data");
// don't procede if we have no truth data and no call
if ( eval != null || chips.size() > 0 )
inc(chips, (eval != null ? ((VariantBackedByGenotype)eval).getGenotypes() : new ArrayList<Genotype>()), ref);
return null;
throw new StingException("Failure: trying to analyze genotypes of non-genotype data");
}
public void inc(Map<String, Genotype> chips, List<Genotype> evals, char ref) {
protected ConcordanceTruthTable[] createTruthTableMappings(List<String> rodNames, Map<String, Integer> sampleToArrayIndex) {
ConcordanceTruthTable[] tables = new ConcordanceTruthTable[rodNames.size()];
// This shouldn't happen, but let's check anyways
if (BaseUtils.simpleBaseToBaseIndex(ref) == -1)
return;
HashMap<String, Genotype> evalHash = makeGenotypeHash(evals);
for ( String name : rodNames ) {
Genotype chip = chips.get(name);
Genotype eval = evalHash.get(name);
if (chip == null && eval == null)
continue;
int truthType = getGenotypeType(chip, ref);
int callType = getGenotypeType(eval, ref);
TruthTable table = tables.get(name);
//System.out.printf("TEST: %d/%d %s vs. %s%n", truthIndex, callIndex, chip, eval);
table.addEntry(truthType, callType);
// each sample gets its own truth table
for (int i = 0; i < rodNames.size(); i++) {
String name = rodNames.get(i);
tables[i] = new ConcordanceTruthTable(name);
sampleToArrayIndex.put(name, i);
}
return tables;
}
private HashMap<String, Genotype> makeGenotypeHash(List<Genotype> evals) {
protected HashMap<String, Genotype> makeGenotypeHash(List<Genotype> evals, List<String> rodNames) {
HashMap<String, Genotype> hash = new HashMap<String, Genotype>();
// associate each Genotype with the appropriate sample
for ( Genotype eval : evals ) {
if ( eval instanceof SampleBacked )
hash.put(((SampleBacked)eval).getSampleName(), eval);
@ -107,130 +53,4 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
}
return hash;
}
private static int getGenotypeType(Genotype g, char ref) {
int type;
if ( g == null )
type = NO_CALL;
else if ( !g.isVariant(ref) )
type = REF;
else if ( g.isHet() )
type = VAR_HET;
else
type = VAR_HOM;
return type;
}
public List<String> done() {
List<String> s = new ArrayList<String>();
for ( String name : rodNames ) {
TruthTable table = tables.get(name);
table.addAllStats(s, name);
s.add("");
}
return s;
}
private static String cellPercent(int count, int total) {
StringBuffer sb = new StringBuffer();
total = Math.max(total, 0);
sb.append(String.format("%.2f", (100.0 * count) / total));
sb.append("%");
return sb.toString();
}
private class TruthTable {
int[][] table = new int[4][4];
int[] truth_totals = new int[4];
int[] calls_totals = new int[4];
public TruthTable() {
for (int i = 0; i < 4; i++) {
truth_totals[i] = 0;
calls_totals[i] = 0;
for (int j = 0; j < 4; j++)
table[i][j] = 0;
}
}
public void addEntry(int truthIndex, int callIndex) {
table[truthIndex][callIndex]++;
truth_totals[truthIndex]++;
calls_totals[callIndex]++;
}
public void addAllStats(List<String> s, String name) {
s.add(String.format("name %s", name));
s.add(String.format("TRUTH_STATE\tCALLED_REF\tCALLED_VAR_HET\tCALLED_VAR_HOM\tNO_CALL\t\tTOTALS\tTRUE_GENOTYPE_CONCORDANCE\tGENOTYPE_SENSITIVITY"));
for (int i = 0; i < 4; i++) {
StringBuffer sb = new StringBuffer();
sb.append(String.format("%15s ", TRUTH_NAMES[i]));
for (int j = 0; j < 4; j++)
sb.append(String.format("%9d ", table[i][j]));
sb.append(String.format("%9d ", truth_totals[i]));
if (i == VAR_HET || i == VAR_HOM) {
sb.append(String.format("\t%s\t\t", cellPercent(table[i][i], table[i][REF] + table[i][VAR_HET] + table[i][VAR_HOM])));
sb.append(String.format("%s", cellPercent(truth_totals[i] - table[i][NO_CALL], truth_totals[i])));
} else {
sb.append("\tN/A\t\t\tN/A");
}
s.add(sb.toString());
}
addCalledGenotypeConcordance(s);
addOverallStats(s);
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
s.add(String.format("%s_%s_%s %d", TRUTH_NAMES[i], CALL_NAMES[j], "NO_SITES", table[i][j]));
s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_TRUTH", cellPercent(table[i][j], truth_totals[i])));
s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_CALLS", cellPercent(table[i][j], calls_totals[j])));
}
if (i == VAR_HET || i == VAR_HOM) {
s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "TRUE_GENOTYPE_CONCORDANCE", cellPercent(table[i][i], table[i][REF] + table[i][VAR_HET] + table[i][VAR_HOM])));
s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "GENOTYPE_SENSITIVITY", cellPercent(truth_totals[i] - table[i][NO_CALL], truth_totals[i])));
}
}
}
private void addCalledGenotypeConcordance(List<String> s) {
StringBuilder sb = new StringBuilder();
sb.append("CALLED_GENOTYPE_CONCORDANCE\t");
for (int i = 0; i < 4; i++) {
int nConcordantCallsI = table[i][i];
String value = "N/A";
if (i != UNKNOWN)
value = String.format("%s\t", cellPercent(nConcordantCallsI, calls_totals[i] - table[UNKNOWN][i]));
sb.append(value);
}
s.add(sb.toString());
}
// How many overall calls where made that aren't NO_CALLS or UNKNOWNS?
private int getNCalled() {
int n = 0;
for (int i = 0; i < 4; i++)
for (int j = 0; j < 4; j++)
if (i != NO_CALL && j != NO_CALL) n += table[i][j];
return n;
}
private void addOverallStats(List<String> s) {
int nConcordantRefCalls = table[REF][REF];
int nConcordantHetCalls = table[VAR_HET][VAR_HET];
int nConcordantVarHomCalls = table[VAR_HOM][VAR_HOM];
int nVarCalls = table[VAR_HOM][VAR_HET] + table[VAR_HOM][VAR_HOM] + table[VAR_HET][VAR_HET] + table[VAR_HET][VAR_HOM];
int nConcordantVarCalls = nConcordantHetCalls + nConcordantVarHomCalls;
int nConcordantCalls = nConcordantRefCalls + nConcordantVarCalls;
int nTrueVar = truth_totals[VAR_HET] + truth_totals[VAR_HOM];
int nCalled = getNCalled();
s.add(String.format("VARIANT_SENSITIVITY %s", cellPercent(nVarCalls, nTrueVar)));
s.add(String.format("VARIANT_CONCORDANCE %s", cellPercent(nConcordantVarCalls, nVarCalls)));
s.add(String.format("OVERALL_CONCORDANCE %s", cellPercent(nConcordantCalls, nCalled)));
}
}
}

View File

@ -0,0 +1,54 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.refdata.RodVCF;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.*;
import java.util.*;
/**
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
* <p/>
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*/
public class PooledConcordance extends ChipConcordance implements PoolAnalysis {
public PooledConcordance(final String chipName, boolean chipNameIsFile) {
super("pooled_concordance", chipName, chipNameIsFile);
}
protected void assertVariationIsValid(Variation eval) {
// we only support VCF for now
if ( eval != null && !(eval instanceof RodVCF) )
throw new StingException("Failure: we currently only support analyzing pooled data in VCF format");
}
protected ConcordanceTruthTable[] createTruthTableMappings(List<String> rodNames, Map<String, Integer> sampleToArrayIndex) {
// there is only one truth table - the pool-wide one
ConcordanceTruthTable[] tables = new ConcordanceTruthTable[1];
tables[0] = new ConcordanceTruthTable();
for ( String name : rodNames ) {
sampleToArrayIndex.put(name, 0);
}
return tables;
}
protected HashMap<String, Genotype> makeGenotypeHash(List<Genotype> evals, List<String> rodNames) {
HashMap<String, Genotype> hash = new HashMap<String, Genotype>();
// te Genotype is pool-wide so all samples are associated with it
if ( evals.size() > 0 ) {
for ( String name : rodNames )
hash.put(name, evals.get(0));
}
return hash;
}
}

View File

@ -133,7 +133,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
analyses.add(new VariantCounter());
analyses.add(new VariantDBCoverage(knownSNPDBName));
analyses.add(new PooledFrequencyAnalysis(numPeopleInPool,knownSNPDBName));
analyses.add(new GenotypeConcordance(genotypeChipName));
if ( samplesFile == null )
analyses.add(new GenotypeConcordance(genotypeChipName, false));
else
analyses.add(new GenotypeConcordance(samplesFile, true));
analyses.add(new TransitionTranversionAnalysis());
analyses.add(new NeighborDistanceAnalysis());
analyses.add(new HardyWeinbergEquilibrium(badHWEThreshold));