Now includes statistics on the allele agreement with dbSNP -- counts concordant calls as dbSNP = A/C and we say A/C, vs. we say A/T

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1392 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-08-07 19:37:07 +00:00
parent 20baa80751
commit 6d3ef73868
4 changed files with 46 additions and 11 deletions

View File

@ -209,8 +209,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
@Override @Override
public List<String> getGenotype() throws IllegalStateException { public List<String> getGenotype() throws IllegalStateException {
// TODO Auto-generated method stub return Arrays.asList(Utils.join("", getAllelesFWD()));
return null;
} }
public double getMAF() { public double getMAF() {

View File

@ -113,7 +113,9 @@ public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVa
} }
public char getAltSnpFWD() throws IllegalStateException { public char getAltSnpFWD() throws IllegalStateException {
return (bestGenotype.charAt(0) == refBase) ? bestGenotype.charAt(1) : bestGenotype.charAt(0); char c = (bestGenotype.charAt(0) == refBase) ? bestGenotype.charAt(1) : bestGenotype.charAt(0);
//System.out.printf("%s : %c and %c%n", bestGenotype, refBase, c);
return c;
} }
public boolean isReference() { public boolean isReference() {

View File

@ -22,23 +22,50 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
private int nDBObs = 0; private int nDBObs = 0;
private int nEvalObs = 0; private int nEvalObs = 0;
private int nOverlapping = 0; private int nOverlapping = 0;
private int nConcordant = 0;
public VariantDBCoverage(final String name) { public VariantDBCoverage(final String name) {
super("db_coverage"); super("db_coverage");
dbName = name; dbName = name;
} }
public void inc(boolean inDB, boolean inEval) { public void inc(AllelicVariant dbSNP, AllelicVariant eval) {
boolean inDB = dbSNP != null;
boolean inEval = eval != null;
if (inDB) nDBObs++; if (inDB) nDBObs++;
if (inEval) nEvalObs++; if (inEval) nEvalObs++;
if (inDB && inEval) nOverlapping++; if (inDB && inEval) {
nOverlapping++;
if ( discordantP(dbSNP, eval) )
nConcordant++;
}
} }
public int nDBSites() { return nDBObs; } public int nDBSites() { return nDBObs; }
public int nEvalSites() { return nEvalObs; } public int nEvalSites() { return nEvalObs; }
public int nOverlappingSites() { return nOverlapping; } public int nOverlappingSites() { return nOverlapping; }
public int nConcordant() { return nConcordant; }
public int nNovelSites() { return Math.abs(nEvalSites() - nOverlappingSites()); } public int nNovelSites() { return Math.abs(nEvalSites() - nOverlappingSites()); }
public boolean discordantP(AllelicVariant dbSNP, AllelicVariant eval) {
if (dbSNP != null && dbSNP.isSNP() && eval != null ) {
boolean concordant = dbSNP.getAltSnpFWD() == eval.getAltSnpFWD() || dbSNP.getRefSnpFWD() == eval.getAltSnpFWD();
//System.out.printf("dbSNP=%s | %c, eval=%s | %c, concordant=%b %s %s%n",
// dbSNP.getGenotype().get(0), dbSNP.getAltSnpFWD(),
// eval.getGenotype().get(0), eval.getAltSnpFWD(),
// concordant,
// dbSNP, eval);
return ! concordant;
} else {
return false;
}
}
/** /**
* What fraction of the evaluated site variants were also found in the db? * What fraction of the evaluated site variants were also found in the db?
* *
@ -48,11 +75,16 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
return nOverlappingSites() / (1.0 * nEvalSites()); return nOverlappingSites() / (1.0 * nEvalSites());
} }
public double concordanceRate() {
return nConcordant() / (1.0 * nOverlappingSites());
}
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
// There are four cases here: // There are four cases here:
AllelicVariant dbsnp = (AllelicVariant)tracker.lookup(dbName, null); AllelicVariant dbsnp = (AllelicVariant)tracker.lookup(dbName, null);
inc(dbsnp != null, eval != null); boolean isSNP = dbsnp != null && dbsnp.isSNP();
return dbsnp == null && eval != null ? "Novel " + eval : null; inc(isSNP ? dbsnp : null, eval);
return ! isSNP && eval != null ? "Novel " + eval : (discordantP(dbsnp, eval) ? (String.format("Discordant DBSNP=%s %s", dbsnp.getGenotype().get(0), eval)) : null);
} }
/** /**
@ -71,8 +103,10 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
s.add(String.format("n_db_sites %d", nDBSites())); s.add(String.format("n_db_sites %d", nDBSites()));
s.add(String.format("n_eval_sites %d", nEvalSites())); s.add(String.format("n_eval_sites %d", nEvalSites()));
s.add(String.format("n_overlapping_sites %d", nOverlappingSites())); s.add(String.format("n_overlapping_sites %d", nOverlappingSites()));
s.add(String.format("n_concordant %d", nConcordant()));
s.add(String.format("n_novel_sites %d", nNovelSites())); s.add(String.format("n_novel_sites %d", nNovelSites()));
s.add(String.format("per_eval_sites_in_db %.2f", 100*fractionEvalSitesCoveredByDB())); s.add(String.format("per_eval_sites_in_db %.2f", 100*fractionEvalSitesCoveredByDB()));
s.add(String.format("concordance_rate %.2f", 100*concordanceRate()));
s.add(String.format("per_db_sites_in_eval %.2f", 100*fractionDBSitesDiscoveredInEval())); s.add(String.format("per_db_sites_in_eval %.2f", 100*fractionDBSitesDiscoveredInEval()));
return s; return s;
} }

View File

@ -30,8 +30,8 @@ import java.util.*;
@Requires(DataSource.REFERENCE) @Requires(DataSource.REFERENCE)
@Allows(DataSource.REFERENCE) @Allows(DataSource.REFERENCE)
public class VariantEvalWalker extends RefWalker<Integer, Integer> { public class VariantEvalWalker extends RefWalker<Integer, Integer> {
@Argument(shortName="minDiscoveryQ", doc="Phred-scaled minimum LOD to consider an evaluation SNP a variant", required=false) @Argument(shortName="minConfidenceScore", doc="Minimum confidence score to consider an evaluation SNP a variant", required=false)
public int minDiscoveryQ = -1; public int minConfidenceScore = -1;
@Argument(shortName="printVariants", doc="If true, prints the variants in all of the variant tracks that are examined", required=false) @Argument(shortName="printVariants", doc="If true, prints the variants in all of the variant tracks that are examined", required=false)
public boolean printVariants = false; public boolean printVariants = false;
@ -159,10 +159,10 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
if ( eval != null ) { if ( eval != null ) {
if ( evalContainsGenotypes ) { if ( evalContainsGenotypes ) {
// Genotyping - use best vs. next best lod // Genotyping - use best vs. next best lod
if ( eval.getConsensusConfidence() < minDiscoveryQ ) eval = null; if ( eval.getConsensusConfidence() < minConfidenceScore ) eval = null;
} else { } else {
// Variant discovery - use best vs. reference lod // Variant discovery - use best vs. reference lod
if ( Math.abs(eval.getVariationConfidence()) < minDiscoveryQ ) eval = null; if ( Math.abs(eval.getVariationConfidence()) < minConfidenceScore ) eval = null;
} }
} }