diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/ChipConcordance.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/ChipConcordance.java new file mode 100755 index 000000000..1049786ba --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/ChipConcordance.java @@ -0,0 +1,144 @@ +package org.broadinstitute.sting.playground.gatk.walkers.varianteval; + +import org.broadinstitute.sting.utils.genotype.Variation; +import org.broadinstitute.sting.utils.genotype.Genotype; +import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; +import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.gatk.refdata.*; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; + +import java.util.*; +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.FileNotFoundException; +import java.io.IOException; + +/** + * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + * + * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + * + */ +public abstract class ChipConcordance extends BasicVariantAnalysis { + + // the array of truth tables used to store data in this analysis + private ConcordanceTruthTable[] truthTables; + + // mapping from sample name to index into truthTables for the corresponding table + private Map sampleToArrayIndex; + + // list of all chip rods used + private ArrayList rodNames = new ArrayList(); + + public ChipConcordance(final String name, final String chipName, boolean chipNameIsFile) { + super(name); + + // read sample names from file if appropriate, otherwise use the chip name + if ( chipNameIsFile ) { + List sampleNames = readSampleNamesFromFile(chipName); + rodNames.addAll(sampleNames); + } else { + rodNames.add(chipName); + } + + // initialize the truth table storage data + sampleToArrayIndex = new HashMap(); + truthTables = createTruthTableMappings(rodNames, sampleToArrayIndex); + } + + private List readSampleNamesFromFile(String file) { + ArrayList samples = new ArrayList(); + + try { + BufferedReader reader = new BufferedReader(new FileReader(file)); + String line = reader.readLine(); + while ( line != null ) { + samples.add(line); + line = reader.readLine(); + } + } catch( FileNotFoundException e) { + throw new StingException("Chip file at "+file+" was not found. Please check filepath."); + } catch( IOException e) { + throw new StingException(e.getMessage()); + } + + return samples; + } + + // these methods need to be implemented by subclasses + protected abstract ConcordanceTruthTable[] createTruthTableMappings(List rodNames, Map sampleToArrayIndex); + protected abstract void assertVariationIsValid(Variation eval); + protected abstract HashMap makeGenotypeHash(List evals, List rodNames); + + public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { + // get all of the chip rods at this locus + HashMap chips = new HashMap(); + for ( String name : rodNames ) { + RODRecordList rods = tracker.getTrackData(name, null); + Variation chip = (rods == null ? null : (Variation)rods.getRecords().get(0)); + if ( chip != null ) { + // chips must be Genotypes + if ( !(chip instanceof VariantBackedByGenotype) ) + throw new StingException("Failure: trying to analyze genotypes using non-genotype truth data"); + chips.put(name, ((VariantBackedByGenotype)chip).getCalledGenotype()); + } + } + + // don't procede if we have no truth data and no call + if ( eval != null || chips.size() > 0 ) + inc(chips, eval, ref); + + return null; + } + + public void inc(Map chips, Variation eval, char ref) { + + // each implementing class can decide whether the Variation is valid + assertVariationIsValid(eval); + + // This shouldn't happen, but let's check anyways to be safe + if (BaseUtils.simpleBaseToBaseIndex(ref) == -1) + return; + + // create a hash of samples to their Genotypes + List evals = (eval != null ? ((VariantBackedByGenotype)eval).getGenotypes() : new ArrayList()); + HashMap evalHash = makeGenotypeHash(evals, rodNames); + + // make chip/eval Genotype pairs + List>[] chipEvals = new List[truthTables.length]; + for ( String name : rodNames ) { + Genotype chipG = chips.get(name); + Genotype evalG = evalHash.get(name); + + if (chipG == null && evalG == null) + continue; + + int index = sampleToArrayIndex.get(name); + if ( chipEvals[index] == null ) + chipEvals[index] = new ArrayList>(); + chipEvals[index].add(new Pair(chipG, evalG)); + } + + // now we can finally update our truth tables with the truth vs. calls data + for (int i = 0; i < truthTables.length; i++) { + ConcordanceTruthTable table = truthTables[i]; + table.addEntry(chipEvals[i], eval, ref); + } + } + + public List done() { + List s = new ArrayList(); + + // let the truth tables do all the printing + for ( ConcordanceTruthTable table : truthTables ) { + table.addAllStats(s); + s.add(""); + } + + return s; + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/ConcordanceTruthTable.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/ConcordanceTruthTable.java new file mode 100755 index 000000000..ce30cd3df --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/ConcordanceTruthTable.java @@ -0,0 +1,199 @@ +package org.broadinstitute.sting.playground.gatk.walkers.varianteval; + + +import org.broadinstitute.sting.utils.genotype.Genotype; +import org.broadinstitute.sting.utils.genotype.Variation; +import org.broadinstitute.sting.utils.Pair; + +import java.util.*; + +/** + * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + *

+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + */ +public class ConcordanceTruthTable { + + private static final int TRUE_POSITIVE = 0; + private static final int TRUE_NEGATIVE = 1; + private static final int FALSE_POSITIVE = 2; + private static final int FALSE_NEGATIVE = 3; + + private static final int REF = 0; + private static final int VAR_HET = 1; + private static final int VAR_HOM = 2; + private static final int UNKNOWN = 3; + private static final int NO_CALL = 3; // synonym + private static final String[] TRUTH_NAMES = {"IS_REF", "IS_VAR_HET", "IS_VAR_HOM", "UNKNOWN"}; + private static final String[] CALL_NAMES = {"CALLED_REF", "CALLED_VAR_HET", "CALLED_VAR_HOM", "NO_CALL"}; + + private String name = null; + private boolean singleSampleMode; + + private int[][] table; + private int[] truth_totals; + private int[] calls_totals; + + public ConcordanceTruthTable(String name) { + // there's a specific sample associated with this truth table + this.name = name; + singleSampleMode = true; + + table = new int[4][4]; + truth_totals = new int[4]; + calls_totals = new int[4]; + for (int i = 0; i < 4; i++) { + truth_totals[i] = 0; + calls_totals[i] = 0; + for (int j = 0; j < 4; j++) + table[i][j] = 0; + } + } + + public ConcordanceTruthTable() { + // there's no specific sample associated with this truth table + singleSampleMode = false; + } + + public void addEntry(List> chipEvals, Variation eval, char ref) { + + // if the table represents a single sample, then we can calculate genotype stats + if ( singleSampleMode ) { + for ( Pair chipEval : chipEvals ) { + + Genotype chipG = chipEval.first; + Genotype evalG = chipEval.second; + + if (chipG == null && evalG == null) + continue; + + int truthType = getGenotype(chipG, ref); + int callType = getGenotype(evalG, ref); + + //System.out.printf("TEST: %d/%d %s vs. %s%n", truthIndex, callIndex, chip, eval); + addGenotypeEntry(truthType, callType); + } + } + + // TODO -- implement me for pooled mode with frequency stats + // TODO -- You'll want to use eval and the chips from chipEvals (these are the first members of the pair) + // TODO -- You'll also need to declare (and initialize) the relevant data arrays for the data + // TODO -- Indexes like TRUE_POSITIVE are defined above for you + } + + private static int getGenotype(Genotype g, char ref) { + int type; + + if ( g == null ) + type = NO_CALL; + else if ( !g.isVariant(ref) ) + type = REF; + else if ( g.isHet() ) + type = VAR_HET; + else + type = VAR_HOM; + + return type; + } + + private void addGenotypeEntry(int truthIndex, int callIndex) { + table[truthIndex][callIndex]++; + truth_totals[truthIndex]++; + calls_totals[callIndex]++; + } + + public void addAllStats(List s) { + if ( singleSampleMode ) + addGenotypeStats(s); + else + addFrequencyStats(s); + } + + private void addFrequencyStats(List s) { + + // TODO -- implement me for pooled mode with frequency stats + + } + + private void addGenotypeStats(List s) { + s.add(String.format("name %s", name)); + s.add(String.format("TRUTH_STATE\tCALLED_REF\tCALLED_VAR_HET\tCALLED_VAR_HOM\tNO_CALL\t\tTOTALS\tTRUE_GENOTYPE_CONCORDANCE\tGENOTYPE_SENSITIVITY")); + for (int i = 0; i < 4; i++) { + StringBuffer sb = new StringBuffer(); + sb.append(String.format("%15s ", TRUTH_NAMES[i])); + for (int j = 0; j < 4; j++) + sb.append(String.format("%9d ", table[i][j])); + sb.append(String.format("%9d ", truth_totals[i])); + if (i == VAR_HET || i == VAR_HOM) { + sb.append(String.format("\t%s\t\t", cellPercent(table[i][i], table[i][REF] + table[i][VAR_HET] + table[i][VAR_HOM]))); + sb.append(String.format("%s", cellPercent(truth_totals[i] - table[i][NO_CALL], truth_totals[i]))); + } else { + sb.append("\tN/A\t\t\tN/A"); + } + s.add(sb.toString()); + } + + addCalledGenotypeConcordance(s); + addOverallStats(s); + + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + s.add(String.format("%s_%s_%s %d", TRUTH_NAMES[i], CALL_NAMES[j], "NO_SITES", table[i][j])); + s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_TRUTH", cellPercent(table[i][j], truth_totals[i]))); + s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_CALLS", cellPercent(table[i][j], calls_totals[j]))); + } + if (i == VAR_HET || i == VAR_HOM) { + s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "TRUE_GENOTYPE_CONCORDANCE", cellPercent(table[i][i], table[i][REF] + table[i][VAR_HET] + table[i][VAR_HOM]))); + s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "GENOTYPE_SENSITIVITY", cellPercent(truth_totals[i] - table[i][NO_CALL], truth_totals[i]))); + } + } + } + + private void addCalledGenotypeConcordance(List s) { + StringBuilder sb = new StringBuilder(); + sb.append("CALLED_GENOTYPE_CONCORDANCE\t"); + for (int i = 0; i < 4; i++) { + int nConcordantCallsI = table[i][i]; + String value = "N/A"; + if (i != UNKNOWN) + value = String.format("%s\t", cellPercent(nConcordantCallsI, calls_totals[i] - table[UNKNOWN][i])); + sb.append(value); + } + s.add(sb.toString()); + } + + // How many overall calls where made that aren't NO_CALLS or UNKNOWNS? + private int getNCalled() { + int n = 0; + for (int i = 0; i < 4; i++) + for (int j = 0; j < 4; j++) + if (i != NO_CALL && j != NO_CALL) n += table[i][j]; + return n; + } + + private void addOverallStats(List s) { + int nConcordantRefCalls = table[REF][REF]; + int nConcordantHetCalls = table[VAR_HET][VAR_HET]; + int nConcordantVarHomCalls = table[VAR_HOM][VAR_HOM]; + int nVarCalls = table[VAR_HOM][VAR_HET] + table[VAR_HOM][VAR_HOM] + table[VAR_HET][VAR_HET] + table[VAR_HET][VAR_HOM]; + int nConcordantVarCalls = nConcordantHetCalls + nConcordantVarHomCalls; + int nConcordantCalls = nConcordantRefCalls + nConcordantVarCalls; + int nTrueVar = truth_totals[VAR_HET] + truth_totals[VAR_HOM]; + int nCalled = getNCalled(); + s.add(String.format("VARIANT_SENSITIVITY %s", cellPercent(nVarCalls, nTrueVar))); + s.add(String.format("VARIANT_CONCORDANCE %s", cellPercent(nConcordantVarCalls, nVarCalls))); + s.add(String.format("OVERALL_CONCORDANCE %s", cellPercent(nConcordantCalls, nCalled))); + } + + private static String cellPercent(int count, int total) { + StringBuffer sb = new StringBuffer(); + total = Math.max(total, 0); + sb.append(String.format("%.2f", (100.0 * count) / total)); + sb.append("%"); + return sb.toString(); + } +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/GenotypeConcordance.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/GenotypeConcordance.java index 853df213f..7a760e392 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/GenotypeConcordance.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/GenotypeConcordance.java @@ -1,10 +1,5 @@ package org.broadinstitute.sting.playground.gatk.walkers.varianteval; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; -import org.broadinstitute.sting.gatk.refdata.RODRecordList; -import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.genotype.*; @@ -19,84 +14,35 @@ import java.util.*; * This software is supplied without any warranty or guaranteed support whatsoever. Neither * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. */ -public class GenotypeConcordance extends BasicVariantAnalysis implements GenotypeAnalysis { +public class GenotypeConcordance extends ChipConcordance implements GenotypeAnalysis { - private static final int REF = 0; - private static final int VAR_HET = 1; - private static final int VAR_HOM = 2; - private static final int UNKNOWN = 3; - private static final int NO_CALL = 3; // synonym - private static final String[] TRUTH_NAMES = {"IS_REF", "IS_VAR_HET", "IS_VAR_HOM", "UNKNOWN"}; - private static final String[] CALL_NAMES = {"CALLED_REF", "CALLED_VAR_HET", "CALLED_VAR_HOM", "NO_CALL"}; - - private ArrayList rodNames = new ArrayList(); - private HashMap tables = new HashMap(); - - public GenotypeConcordance(final String name) { - super("genotype_concordance"); - rodNames.add(name); - initialize(); + public GenotypeConcordance(final String chipName, boolean chipNameIsFile) { + super("genotype_concordance", chipName, chipNameIsFile); } - public GenotypeConcordance(final List names) { - super("genotype_concordance"); - rodNames.addAll(names); - initialize(); - } - - private void initialize() { - for ( String name : rodNames ) - tables.put(name, new TruthTable()); - } - - public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { - // get all of the chip rods at this locus - HashMap chips = new HashMap(); - for ( String name : rodNames ) { - RODRecordList rods = tracker.getTrackData(name, null); - Variation chip = (rods == null ? null : (Variation)rods.getRecords().get(0)); - if ( chip != null ) { - if ( !(chip instanceof VariantBackedByGenotype) ) - throw new StingException("Failure: trying to analyze genotypes using non-genotype truth data"); - chips.put(name, ((VariantBackedByGenotype)chip).getCalledGenotype()); - } - } - + protected void assertVariationIsValid(Variation eval) { + // must be a genotype if ( eval != null && !(eval instanceof VariantBackedByGenotype) ) - throw new StingException("Failure: trying to analyze genotypes of non-genotype data"); - - // don't procede if we have no truth data and no call - if ( eval != null || chips.size() > 0 ) - inc(chips, (eval != null ? ((VariantBackedByGenotype)eval).getGenotypes() : new ArrayList()), ref); - return null; + throw new StingException("Failure: trying to analyze genotypes of non-genotype data"); } - public void inc(Map chips, List evals, char ref) { + protected ConcordanceTruthTable[] createTruthTableMappings(List rodNames, Map sampleToArrayIndex) { + ConcordanceTruthTable[] tables = new ConcordanceTruthTable[rodNames.size()]; - // This shouldn't happen, but let's check anyways - if (BaseUtils.simpleBaseToBaseIndex(ref) == -1) - return; - - HashMap evalHash = makeGenotypeHash(evals); - - for ( String name : rodNames ) { - Genotype chip = chips.get(name); - Genotype eval = evalHash.get(name); - - if (chip == null && eval == null) - continue; - - int truthType = getGenotypeType(chip, ref); - int callType = getGenotypeType(eval, ref); - TruthTable table = tables.get(name); - - //System.out.printf("TEST: %d/%d %s vs. %s%n", truthIndex, callIndex, chip, eval); - table.addEntry(truthType, callType); + // each sample gets its own truth table + for (int i = 0; i < rodNames.size(); i++) { + String name = rodNames.get(i); + tables[i] = new ConcordanceTruthTable(name); + sampleToArrayIndex.put(name, i); } + + return tables; } - private HashMap makeGenotypeHash(List evals) { + protected HashMap makeGenotypeHash(List evals, List rodNames) { HashMap hash = new HashMap(); + + // associate each Genotype with the appropriate sample for ( Genotype eval : evals ) { if ( eval instanceof SampleBacked ) hash.put(((SampleBacked)eval).getSampleName(), eval); @@ -107,130 +53,4 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp } return hash; } - - private static int getGenotypeType(Genotype g, char ref) { - int type; - - if ( g == null ) - type = NO_CALL; - else if ( !g.isVariant(ref) ) - type = REF; - else if ( g.isHet() ) - type = VAR_HET; - else - type = VAR_HOM; - - return type; - } - - public List done() { - List s = new ArrayList(); - - for ( String name : rodNames ) { - TruthTable table = tables.get(name); - table.addAllStats(s, name); - s.add(""); - } - - return s; - } - - private static String cellPercent(int count, int total) { - StringBuffer sb = new StringBuffer(); - total = Math.max(total, 0); - sb.append(String.format("%.2f", (100.0 * count) / total)); - sb.append("%"); - return sb.toString(); - } - - private class TruthTable { - int[][] table = new int[4][4]; - int[] truth_totals = new int[4]; - int[] calls_totals = new int[4]; - - public TruthTable() { - for (int i = 0; i < 4; i++) { - truth_totals[i] = 0; - calls_totals[i] = 0; - for (int j = 0; j < 4; j++) - table[i][j] = 0; - } - } - - public void addEntry(int truthIndex, int callIndex) { - table[truthIndex][callIndex]++; - truth_totals[truthIndex]++; - calls_totals[callIndex]++; - } - - public void addAllStats(List s, String name) { - s.add(String.format("name %s", name)); - s.add(String.format("TRUTH_STATE\tCALLED_REF\tCALLED_VAR_HET\tCALLED_VAR_HOM\tNO_CALL\t\tTOTALS\tTRUE_GENOTYPE_CONCORDANCE\tGENOTYPE_SENSITIVITY")); - for (int i = 0; i < 4; i++) { - StringBuffer sb = new StringBuffer(); - sb.append(String.format("%15s ", TRUTH_NAMES[i])); - for (int j = 0; j < 4; j++) - sb.append(String.format("%9d ", table[i][j])); - sb.append(String.format("%9d ", truth_totals[i])); - if (i == VAR_HET || i == VAR_HOM) { - sb.append(String.format("\t%s\t\t", cellPercent(table[i][i], table[i][REF] + table[i][VAR_HET] + table[i][VAR_HOM]))); - sb.append(String.format("%s", cellPercent(truth_totals[i] - table[i][NO_CALL], truth_totals[i]))); - } else { - sb.append("\tN/A\t\t\tN/A"); - } - s.add(sb.toString()); - } - - addCalledGenotypeConcordance(s); - addOverallStats(s); - - for (int i = 0; i < 4; i++) { - for (int j = 0; j < 4; j++) { - s.add(String.format("%s_%s_%s %d", TRUTH_NAMES[i], CALL_NAMES[j], "NO_SITES", table[i][j])); - s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_TRUTH", cellPercent(table[i][j], truth_totals[i]))); - s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_CALLS", cellPercent(table[i][j], calls_totals[j]))); - } - if (i == VAR_HET || i == VAR_HOM) { - s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "TRUE_GENOTYPE_CONCORDANCE", cellPercent(table[i][i], table[i][REF] + table[i][VAR_HET] + table[i][VAR_HOM]))); - s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "GENOTYPE_SENSITIVITY", cellPercent(truth_totals[i] - table[i][NO_CALL], truth_totals[i]))); - } - } - } - - private void addCalledGenotypeConcordance(List s) { - StringBuilder sb = new StringBuilder(); - sb.append("CALLED_GENOTYPE_CONCORDANCE\t"); - for (int i = 0; i < 4; i++) { - int nConcordantCallsI = table[i][i]; - String value = "N/A"; - if (i != UNKNOWN) - value = String.format("%s\t", cellPercent(nConcordantCallsI, calls_totals[i] - table[UNKNOWN][i])); - sb.append(value); - } - s.add(sb.toString()); - } - - // How many overall calls where made that aren't NO_CALLS or UNKNOWNS? - private int getNCalled() { - int n = 0; - for (int i = 0; i < 4; i++) - for (int j = 0; j < 4; j++) - if (i != NO_CALL && j != NO_CALL) n += table[i][j]; - return n; - } - - private void addOverallStats(List s) { - int nConcordantRefCalls = table[REF][REF]; - int nConcordantHetCalls = table[VAR_HET][VAR_HET]; - int nConcordantVarHomCalls = table[VAR_HOM][VAR_HOM]; - int nVarCalls = table[VAR_HOM][VAR_HET] + table[VAR_HOM][VAR_HOM] + table[VAR_HET][VAR_HET] + table[VAR_HET][VAR_HOM]; - int nConcordantVarCalls = nConcordantHetCalls + nConcordantVarHomCalls; - int nConcordantCalls = nConcordantRefCalls + nConcordantVarCalls; - int nTrueVar = truth_totals[VAR_HET] + truth_totals[VAR_HOM]; - int nCalled = getNCalled(); - s.add(String.format("VARIANT_SENSITIVITY %s", cellPercent(nVarCalls, nTrueVar))); - s.add(String.format("VARIANT_CONCORDANCE %s", cellPercent(nConcordantVarCalls, nVarCalls))); - s.add(String.format("OVERALL_CONCORDANCE %s", cellPercent(nConcordantCalls, nCalled))); - } - } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledConcordance.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledConcordance.java new file mode 100755 index 000000000..bf967a6aa --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledConcordance.java @@ -0,0 +1,54 @@ +package org.broadinstitute.sting.playground.gatk.walkers.varianteval; + +import org.broadinstitute.sting.gatk.refdata.RodVCF; +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.genotype.*; + +import java.util.*; + +/** + * The Broad Institute + * SOFTWARE COPYRIGHT NOTICE AGREEMENT + * This software and its documentation are copyright 2009 by the + * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. + *

+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither + * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality. + */ +public class PooledConcordance extends ChipConcordance implements PoolAnalysis { + + public PooledConcordance(final String chipName, boolean chipNameIsFile) { + super("pooled_concordance", chipName, chipNameIsFile); + } + + protected void assertVariationIsValid(Variation eval) { + // we only support VCF for now + if ( eval != null && !(eval instanceof RodVCF) ) + throw new StingException("Failure: we currently only support analyzing pooled data in VCF format"); + } + + protected ConcordanceTruthTable[] createTruthTableMappings(List rodNames, Map sampleToArrayIndex) { + // there is only one truth table - the pool-wide one + ConcordanceTruthTable[] tables = new ConcordanceTruthTable[1]; + tables[0] = new ConcordanceTruthTable(); + + for ( String name : rodNames ) { + sampleToArrayIndex.put(name, 0); + } + + return tables; + } + + protected HashMap makeGenotypeHash(List evals, List rodNames) { + + HashMap hash = new HashMap(); + + // te Genotype is pool-wide so all samples are associated with it + if ( evals.size() > 0 ) { + for ( String name : rodNames ) + hash.put(name, evals.get(0)); + } + + return hash; + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java index 1d3cba67e..d6fce2628 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java @@ -133,7 +133,10 @@ public class VariantEvalWalker extends RodWalker { analyses.add(new VariantCounter()); analyses.add(new VariantDBCoverage(knownSNPDBName)); analyses.add(new PooledFrequencyAnalysis(numPeopleInPool,knownSNPDBName)); - analyses.add(new GenotypeConcordance(genotypeChipName)); + if ( samplesFile == null ) + analyses.add(new GenotypeConcordance(genotypeChipName, false)); + else + analyses.add(new GenotypeConcordance(samplesFile, true)); analyses.add(new TransitionTranversionAnalysis()); analyses.add(new NeighborDistanceAnalysis()); analyses.add(new HardyWeinbergEquilibrium(badHWEThreshold));