Generate chip concordance table.

This should work, although I need to test it with some real GLFs


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1265 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-07-16 17:44:47 +00:00
parent 88ffb08af4
commit 8bc0832215
1 changed files with 58 additions and 43 deletions

View File

@ -1,12 +1,10 @@
package org.broadinstitute.sting.playground.gatk.walkers.varianteval; package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant; import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.utils.StingException;
import java.io.PrintStream;
import java.util.List; import java.util.List;
import java.util.Arrays;
import java.util.ArrayList; import java.util.ArrayList;
/** /**
@ -21,61 +19,78 @@ import java.util.ArrayList;
*/ */
public class GenotypeConcordance extends BasicVariantAnalysis implements GenotypeAnalysis { public class GenotypeConcordance extends BasicVariantAnalysis implements GenotypeAnalysis {
private String dbName; private String dbName;
private int nDBObs = 0;
private int nEvalObs = 0; private static final int TRUTH_REF = 0;
private int nOverlapping = 0; private static final int TRUTH_VAR_HET = 1;
private static final int TRUTH_VAR_HOM = 2;
private static final int TRUTH_UNKNOWN = 3;
private static final int CALL_REF = 0;
private static final int CALL_VAR_HET = 1;
private static final int CALL_VAR_HOM = 2;
private static final int CALL_NO_CONF = 3;
private static final int UNCALLABLE = 4;
private int[][] table = new int[4][5];
public GenotypeConcordance(final String name) { public GenotypeConcordance(final String name) {
super("genotype_concordance"); super("genotype_concordance");
dbName = name; dbName = name;
} }
public void inc(boolean inDB, boolean inEval) { public void inc(AllelicVariant chip, AllelicVariant eval) {
if (inDB) nDBObs++; if ( (chip != null && !chip.isGenotype()) || (eval != null && !eval.isGenotype()) )
if (inEval) nEvalObs++; throw new StingException("Failure: trying to analyze genotypes of non-genotype data");
if (inDB && inEval) nOverlapping++;
}
public int nDBSites() { return nDBObs; } int truthIndex, callIndex;
public int nEvalSites() { return nEvalObs; } if ( chip == null )
public int nOverlappingSites() { return nOverlapping; } truthIndex = TRUTH_UNKNOWN;
public int nNovelSites() { return Math.abs(nEvalSites() - nOverlappingSites()); } else if ( chip.isReference() )
truthIndex = TRUTH_REF;
else if ( isHet(chip) )
truthIndex = TRUTH_VAR_HET;
else
truthIndex = TRUTH_VAR_HOM;
/** if ( eval == null )
* What fraction of the evaluated site variants were also found in the db? callIndex = UNCALLABLE;
* else if ( eval.getVariationConfidence() < 5.0 )
* @return callIndex = CALL_NO_CONF;
*/ else if ( eval.isReference() )
public double fractionEvalSitesCoveredByDB() { callIndex = CALL_REF;
return nOverlappingSites() / (1.0 * nEvalSites()); else if ( isHet(eval) )
callIndex = CALL_VAR_HET;
else
callIndex = CALL_VAR_HOM;
table[truthIndex][callIndex]++;
} }
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, LocusContext context) { public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, LocusContext context) {
// There are four cases here: AllelicVariant chip = (AllelicVariant)tracker.lookup(dbName, null);
AllelicVariant dbsnp = (AllelicVariant)tracker.lookup(dbName, null); inc(chip, eval);
inc(dbsnp != null, eval != null); return chip == null && eval != null ? "Novel " + eval : null;
return dbsnp == null && eval != null ? "Novel " + eval : null;
}
/**
* What fraction of the DB sites were discovered in the evalution calls?
*
* @return
*/
public double fractionDBSitesDiscoveredInEval() {
return nOverlappingSites() / (1.0 * nDBSites());
} }
public List<String> done() { public List<String> done() {
List<String> s = new ArrayList<String>(); List<String> s = new ArrayList<String>();
s.add(String.format("%d\t%d\t%d\t%.2f\t%.2f", nDBSites(), nEvalSites(), nOverlappingSites(), fractionEvalSitesCoveredByDB(), fractionDBSitesDiscoveredInEval()));
s.add(String.format("name %s", dbName)); s.add(String.format("name %s", dbName));
s.add(String.format("n_db_sites %d", nDBSites())); s.add(String.format("\t\tCALLED REF\tCALLED VAR_HET\tCALLED_VAR_HOM\tNO CONF\tUNCALLABLE"));
s.add(String.format("n_eval_sites %d", nEvalSites())); s.add(String.format("IS REF\t%d\t%d\t%d\t%d\t%d", table[TRUTH_REF][CALL_REF], table[TRUTH_REF][CALL_VAR_HET], table[TRUTH_REF][CALL_VAR_HOM], table[TRUTH_REF][CALL_NO_CONF], table[TRUTH_REF][UNCALLABLE]));
s.add(String.format("n_overlapping_sites %d", nOverlappingSites())); s.add(String.format("IS VAR_HET\t%d\t%d\t%d\t%d\t%d", table[TRUTH_VAR_HET][CALL_REF], table[TRUTH_VAR_HET][CALL_VAR_HET], table[TRUTH_VAR_HET][CALL_VAR_HOM], table[TRUTH_VAR_HET][CALL_NO_CONF], table[TRUTH_VAR_HET][UNCALLABLE]));
s.add(String.format("n_novel_sites %d", nNovelSites())); s.add(String.format("IS VAR_HOM\t%d\t%d\t%d\t%d\t%d", table[TRUTH_VAR_HOM][CALL_REF], table[TRUTH_VAR_HOM][CALL_VAR_HET], table[TRUTH_VAR_HOM][CALL_VAR_HOM], table[TRUTH_VAR_HOM][CALL_NO_CONF], table[TRUTH_VAR_HOM][UNCALLABLE]));
s.add(String.format("per_eval_sites_in_db %.2f", 100*fractionEvalSitesCoveredByDB())); s.add(String.format("UNKNOWN\t%d\t%d\t%d\t%d\t%d", table[TRUTH_UNKNOWN][CALL_REF], table[TRUTH_UNKNOWN][CALL_VAR_HET], table[TRUTH_UNKNOWN][CALL_VAR_HOM], table[TRUTH_UNKNOWN][CALL_NO_CONF], table[TRUTH_UNKNOWN][UNCALLABLE]));
s.add(String.format("per_db_sites_in_eval %.2f", 100*fractionDBSitesDiscoveredInEval()));
return s; return s;
} }
private static boolean isHet(AllelicVariant var) {
if ( var instanceof Genotype )
return ((Genotype)var).isHet();
List<String> genotype = var.getGenotype();
if ( genotype.size() < 2 )
return false;
return (genotype.get(0) != genotype.get(1));
}
} }