Now includes statistics on the allele agreement with dbSNP -- counts concordant calls as dbSNP = A/C and we say A/C, vs. we say A/T

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1392 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-08-07 19:37:07 +00:00
parent 20baa80751
commit 6d3ef73868
4 changed files with 46 additions and 11 deletions

View File

@ -209,8 +209,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
@Override
public List<String> getGenotype() throws IllegalStateException {
// TODO Auto-generated method stub
return null;
return Arrays.asList(Utils.join("", getAllelesFWD()));
}
public double getMAF() {

View File

@ -113,7 +113,9 @@ public class rodVariants extends BasicReferenceOrderedDatum implements AllelicVa
}
public char getAltSnpFWD() throws IllegalStateException {
return (bestGenotype.charAt(0) == refBase) ? bestGenotype.charAt(1) : bestGenotype.charAt(0);
char c = (bestGenotype.charAt(0) == refBase) ? bestGenotype.charAt(1) : bestGenotype.charAt(0);
//System.out.printf("%s : %c and %c%n", bestGenotype, refBase, c);
return c;
}
public boolean isReference() {

View File

@ -22,23 +22,50 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
private int nDBObs = 0;
private int nEvalObs = 0;
private int nOverlapping = 0;
private int nConcordant = 0;
public VariantDBCoverage(final String name) {
super("db_coverage");
dbName = name;
}
public void inc(boolean inDB, boolean inEval) {
public void inc(AllelicVariant dbSNP, AllelicVariant eval) {
boolean inDB = dbSNP != null;
boolean inEval = eval != null;
if (inDB) nDBObs++;
if (inEval) nEvalObs++;
if (inDB && inEval) nOverlapping++;
if (inDB && inEval) {
nOverlapping++;
if ( discordantP(dbSNP, eval) )
nConcordant++;
}
}
public int nDBSites() { return nDBObs; }
public int nEvalSites() { return nEvalObs; }
public int nOverlappingSites() { return nOverlapping; }
public int nConcordant() { return nConcordant; }
public int nNovelSites() { return Math.abs(nEvalSites() - nOverlappingSites()); }
public boolean discordantP(AllelicVariant dbSNP, AllelicVariant eval) {
if (dbSNP != null && dbSNP.isSNP() && eval != null ) {
boolean concordant = dbSNP.getAltSnpFWD() == eval.getAltSnpFWD() || dbSNP.getRefSnpFWD() == eval.getAltSnpFWD();
//System.out.printf("dbSNP=%s | %c, eval=%s | %c, concordant=%b %s %s%n",
// dbSNP.getGenotype().get(0), dbSNP.getAltSnpFWD(),
// eval.getGenotype().get(0), eval.getAltSnpFWD(),
// concordant,
// dbSNP, eval);
return ! concordant;
} else {
return false;
}
}
/**
* What fraction of the evaluated site variants were also found in the db?
*
@ -48,11 +75,16 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
return nOverlappingSites() / (1.0 * nEvalSites());
}
public double concordanceRate() {
return nConcordant() / (1.0 * nOverlappingSites());
}
public String update(AllelicVariant eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) {
// There are four cases here:
AllelicVariant dbsnp = (AllelicVariant)tracker.lookup(dbName, null);
inc(dbsnp != null, eval != null);
return dbsnp == null && eval != null ? "Novel " + eval : null;
boolean isSNP = dbsnp != null && dbsnp.isSNP();
inc(isSNP ? dbsnp : null, eval);
return ! isSNP && eval != null ? "Novel " + eval : (discordantP(dbsnp, eval) ? (String.format("Discordant DBSNP=%s %s", dbsnp.getGenotype().get(0), eval)) : null);
}
/**
@ -71,8 +103,10 @@ public class VariantDBCoverage extends BasicVariantAnalysis implements GenotypeA
s.add(String.format("n_db_sites %d", nDBSites()));
s.add(String.format("n_eval_sites %d", nEvalSites()));
s.add(String.format("n_overlapping_sites %d", nOverlappingSites()));
s.add(String.format("n_concordant %d", nConcordant()));
s.add(String.format("n_novel_sites %d", nNovelSites()));
s.add(String.format("per_eval_sites_in_db %.2f", 100*fractionEvalSitesCoveredByDB()));
s.add(String.format("concordance_rate %.2f", 100*concordanceRate()));
s.add(String.format("per_db_sites_in_eval %.2f", 100*fractionDBSitesDiscoveredInEval()));
return s;
}

View File

@ -30,8 +30,8 @@ import java.util.*;
@Requires(DataSource.REFERENCE)
@Allows(DataSource.REFERENCE)
public class VariantEvalWalker extends RefWalker<Integer, Integer> {
@Argument(shortName="minDiscoveryQ", doc="Phred-scaled minimum LOD to consider an evaluation SNP a variant", required=false)
public int minDiscoveryQ = -1;
@Argument(shortName="minConfidenceScore", doc="Minimum confidence score to consider an evaluation SNP a variant", required=false)
public int minConfidenceScore = -1;
@Argument(shortName="printVariants", doc="If true, prints the variants in all of the variant tracks that are examined", required=false)
public boolean printVariants = false;
@ -159,10 +159,10 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
if ( eval != null ) {
if ( evalContainsGenotypes ) {
// Genotyping - use best vs. next best lod
if ( eval.getConsensusConfidence() < minDiscoveryQ ) eval = null;
if ( eval.getConsensusConfidence() < minConfidenceScore ) eval = null;
} else {
// Variant discovery - use best vs. reference lod
if ( Math.abs(eval.getVariationConfidence()) < minDiscoveryQ ) eval = null;
if ( Math.abs(eval.getVariationConfidence()) < minConfidenceScore ) eval = null;
}
}