Generate chip concordance table.
This should work, although I need to test it with some real GLFs git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1265 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
88ffb08af4
commit
8bc0832215
|
|
@ -1,12 +1,10 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
package org.broadinstitute.sting.playground.gatk.walkers.varianteval;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
import org.broadinstitute.sting.gatk.refdata.*;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
import java.io.PrintStream;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -21,61 +19,78 @@ import java.util.ArrayList;
|
||||||
*/
|
*/
|
||||||
public class GenotypeConcordance extends BasicVariantAnalysis implements GenotypeAnalysis {
|
public class GenotypeConcordance extends BasicVariantAnalysis implements GenotypeAnalysis {
|
||||||
private String dbName;
|
private String dbName;
|
||||||
private int nDBObs = 0;
|
|
||||||
private int nEvalObs = 0;
|
private static final int TRUTH_REF = 0;
|
||||||
private int nOverlapping = 0;
|
private static final int TRUTH_VAR_HET = 1;
|
||||||
|
private static final int TRUTH_VAR_HOM = 2;
|
||||||
|
private static final int TRUTH_UNKNOWN = 3;
|
||||||
|
|
||||||
|
private static final int CALL_REF = 0;
|
||||||
|
private static final int CALL_VAR_HET = 1;
|
||||||
|
private static final int CALL_VAR_HOM = 2;
|
||||||
|
private static final int CALL_NO_CONF = 3;
|
||||||
|
private static final int UNCALLABLE = 4;
|
||||||
|
|
||||||
|
private int[][] table = new int[4][5];
|
||||||
|
|
||||||
public GenotypeConcordance(final String name) {
|
public GenotypeConcordance(final String name) {
|
||||||
super("genotype_concordance");
|
super("genotype_concordance");
|
||||||
dbName = name;
|
dbName = name;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void inc(boolean inDB, boolean inEval) {
|
public void inc(AllelicVariant chip, AllelicVariant eval) {
|
||||||
if (inDB) nDBObs++;
|
if ( (chip != null && !chip.isGenotype()) || (eval != null && !eval.isGenotype()) )
|
||||||
if (inEval) nEvalObs++;
|
throw new StingException("Failure: trying to analyze genotypes of non-genotype data");
|
||||||
if (inDB && inEval) nOverlapping++;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int nDBSites() { return nDBObs; }
|
int truthIndex, callIndex;
|
||||||
public int nEvalSites() { return nEvalObs; }
|
if ( chip == null )
|
||||||
public int nOverlappingSites() { return nOverlapping; }
|
truthIndex = TRUTH_UNKNOWN;
|
||||||
public int nNovelSites() { return Math.abs(nEvalSites() - nOverlappingSites()); }
|
else if ( chip.isReference() )
|
||||||
|
truthIndex = TRUTH_REF;
|
||||||
|
else if ( isHet(chip) )
|
||||||
|
truthIndex = TRUTH_VAR_HET;
|
||||||
|
else
|
||||||
|
truthIndex = TRUTH_VAR_HOM;
|
||||||
|
|
||||||
/**
|
if ( eval == null )
|
||||||
* What fraction of the evaluated site variants were also found in the db?
|
callIndex = UNCALLABLE;
|
||||||
*
|
else if ( eval.getVariationConfidence() < 5.0 )
|
||||||
* @return
|
callIndex = CALL_NO_CONF;
|
||||||
*/
|
else if ( eval.isReference() )
|
||||||
public double fractionEvalSitesCoveredByDB() {
|
callIndex = CALL_REF;
|
||||||
return nOverlappingSites() / (1.0 * nEvalSites());
|
else if ( isHet(eval) )
|
||||||
|
callIndex = CALL_VAR_HET;
|
||||||
|
else
|
||||||
|
callIndex = CALL_VAR_HOM;
|
||||||
|
|
||||||
|
table[truthIndex][callIndex]++;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, LocusContext context) {
|
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||||
// There are four cases here:
|
AllelicVariant chip = (AllelicVariant)tracker.lookup(dbName, null);
|
||||||
AllelicVariant dbsnp = (AllelicVariant)tracker.lookup(dbName, null);
|
inc(chip, eval);
|
||||||
inc(dbsnp != null, eval != null);
|
return chip == null && eval != null ? "Novel " + eval : null;
|
||||||
return dbsnp == null && eval != null ? "Novel " + eval : null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* What fraction of the DB sites were discovered in the evalution calls?
|
|
||||||
*
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public double fractionDBSitesDiscoveredInEval() {
|
|
||||||
return nOverlappingSites() / (1.0 * nDBSites());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> done() {
|
public List<String> done() {
|
||||||
List<String> s = new ArrayList<String>();
|
List<String> s = new ArrayList<String>();
|
||||||
s.add(String.format("%d\t%d\t%d\t%.2f\t%.2f", nDBSites(), nEvalSites(), nOverlappingSites(), fractionEvalSitesCoveredByDB(), fractionDBSitesDiscoveredInEval()));
|
|
||||||
s.add(String.format("name %s", dbName));
|
s.add(String.format("name %s", dbName));
|
||||||
s.add(String.format("n_db_sites %d", nDBSites()));
|
s.add(String.format("\t\tCALLED REF\tCALLED VAR_HET\tCALLED_VAR_HOM\tNO CONF\tUNCALLABLE"));
|
||||||
s.add(String.format("n_eval_sites %d", nEvalSites()));
|
s.add(String.format("IS REF\t%d\t%d\t%d\t%d\t%d", table[TRUTH_REF][CALL_REF], table[TRUTH_REF][CALL_VAR_HET], table[TRUTH_REF][CALL_VAR_HOM], table[TRUTH_REF][CALL_NO_CONF], table[TRUTH_REF][UNCALLABLE]));
|
||||||
s.add(String.format("n_overlapping_sites %d", nOverlappingSites()));
|
s.add(String.format("IS VAR_HET\t%d\t%d\t%d\t%d\t%d", table[TRUTH_VAR_HET][CALL_REF], table[TRUTH_VAR_HET][CALL_VAR_HET], table[TRUTH_VAR_HET][CALL_VAR_HOM], table[TRUTH_VAR_HET][CALL_NO_CONF], table[TRUTH_VAR_HET][UNCALLABLE]));
|
||||||
s.add(String.format("n_novel_sites %d", nNovelSites()));
|
s.add(String.format("IS VAR_HOM\t%d\t%d\t%d\t%d\t%d", table[TRUTH_VAR_HOM][CALL_REF], table[TRUTH_VAR_HOM][CALL_VAR_HET], table[TRUTH_VAR_HOM][CALL_VAR_HOM], table[TRUTH_VAR_HOM][CALL_NO_CONF], table[TRUTH_VAR_HOM][UNCALLABLE]));
|
||||||
s.add(String.format("per_eval_sites_in_db %.2f", 100*fractionEvalSitesCoveredByDB()));
|
s.add(String.format("UNKNOWN\t%d\t%d\t%d\t%d\t%d", table[TRUTH_UNKNOWN][CALL_REF], table[TRUTH_UNKNOWN][CALL_VAR_HET], table[TRUTH_UNKNOWN][CALL_VAR_HOM], table[TRUTH_UNKNOWN][CALL_NO_CONF], table[TRUTH_UNKNOWN][UNCALLABLE]));
|
||||||
s.add(String.format("per_db_sites_in_eval %.2f", 100*fractionDBSitesDiscoveredInEval()));
|
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static boolean isHet(AllelicVariant var) {
|
||||||
|
if ( var instanceof Genotype )
|
||||||
|
return ((Genotype)var).isHet();
|
||||||
|
|
||||||
|
List<String> genotype = var.getGenotype();
|
||||||
|
if ( genotype.size() < 2 )
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return (genotype.get(0) != genotype.get(1));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue