Tweaks to parameters for NQS analysis walkers; change to PowerAndCoverage for Jason Flannick (can input the number of alleles to compute power for - i.e. doubletons, tripletons; rather than statically checking singletons.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1757 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2009-10-02 19:11:27 +00:00
parent 7249fade05
commit 68cb2ee54b
5 changed files with 27 additions and 19 deletions

View File

@ -20,9 +20,9 @@ import net.sf.samtools.SAMRecord;
* To change this template use File | Settings | File Templates.
*/
public class NQSClusteredZScoreWalker extends LocusWalker<LocalMapType, int[][][]> {
static final int WIN_SIDE_SIZE = 4;
static final int Z_SCORE_MAX = 4;
static final int Z_SCORE_MULTIPLIER = 3; // bins are Z_SCORE * (this) rounded to the nearst int
static final int WIN_SIDE_SIZE = 5;
static final int Z_SCORE_MAX = 7;
static final int Z_SCORE_MULTIPLIER = 30; // bins are Z_SCORE * (this) rounded to the nearst int
static final int MM_OFFSET = 1;
static final int MATCH_OFFSET = 0;
static final int MAX_Q_SCORE = 2 + QualityUtils.MAX_REASONABLE_Q_SCORE;
@ -129,8 +129,8 @@ public class NQSClusteredZScoreWalker extends LocusWalker<LocalMapType, int[][][
}
public String header() {
String format = "%s\t%s\t%s\t%s\t%s%n";
return String.format(format, "ZScore", "Expected_Mismatch", "Empirical_Mismatch", "Expected_MM_As_Q", "Empirical_MM_As_Q");
String format = "%s\t%s\t%s\t%s\t%s\t%s%n";
return String.format(format, "ZScore", "N_obs", "Expected_Mismatch", "Empirical_Mismatch", "Expected_MM_As_Q", "Empirical_MM_As_Q");
}
public String formatData ( int[][] matchMismatchQ, int zScoreBin ) {
@ -142,18 +142,18 @@ public class NQSClusteredZScoreWalker extends LocusWalker<LocalMapType, int[][][
for ( int i = 0; i < MAX_Q_SCORE; i ++ ) {
match += matchMismatchQ[i][MATCH_OFFSET];
mismatch += matchMismatchQ[i][MM_OFFSET];
expMMR += QualityUtils.qualToProb(i)*matchMismatchQ[i][MATCH_OFFSET];
expMMR += QualityUtils.qualToProb(i)*matchMismatchQ[i][MM_OFFSET];
expMMR += QualityUtils.qualToErrorProb((byte)i)*matchMismatchQ[i][MATCH_OFFSET];
expMMR += QualityUtils.qualToErrorProb((byte)i)*matchMismatchQ[i][MM_OFFSET];
}
expMMR = (expMMR / ( match + mismatch ));
double empMMR = ((double) mismatch)/(match + mismatch);
int expMMRAsQ = QualityUtils.probToQual(expMMR);
int empMMRAsQ = QualityUtils.probToQual(empMMR);
int expMMRAsQ = QualityUtils.probToQual(1-expMMR);
int empMMRAsQ = QualityUtils.probToQual(1-empMMR);
String format = "%f\t%f\t%f\t%d\t%d%n";
String format = "%f\t%d\t%f\t%f\t%d\t%d%n";
return String.format(format, zScore, expMMR, empMMR, expMMRAsQ, empMMRAsQ);
return String.format(format, zScore, match+mismatch, expMMR, empMMR, expMMRAsQ, empMMRAsQ);
}
}

View File

@ -126,15 +126,15 @@ public class NQSCovariantByCountsWalker extends LocusWalker< LocalMapType, int[]
for ( int i = 0; i < MAX_Q_SCORE; i ++ ) {
match += qScores[i][MATCH_OFFSET];
mismatch += qScores[i][MM_OFFSET];
expErr += QualityUtils.qualToProb(i)*qScores[i][MATCH_OFFSET];
expErr += QualityUtils.qualToProb(i)*qScores[i][MM_OFFSET];
expErr += QualityUtils.qualToErrorProb((byte)i)*qScores[i][MATCH_OFFSET];
expErr += QualityUtils.qualToErrorProb((byte)i)*qScores[i][MM_OFFSET];
}
expErr = expErr/(match + mismatch);
int expAsQ = QualityUtils.probToQual(expErr);
int expAsQ = QualityUtils.probToQual(1-expErr);
double empErr = ((double)mismatch)/(match+mismatch);
int empAsQ = QualityUtils.probToQual(empErr);
int empAsQ = QualityUtils.probToQual(1-empErr);
return String.format(DATA_FORMAT, smDev, lgDev, match, mismatch, expErr, empErr, expAsQ, empAsQ);
}

View File

@ -26,7 +26,7 @@ public class NQSExtendedGroupsCovariantWalker extends LocusWalker<LocalMapType,
final int MAX_QSCORE = QualityUtils.MAX_REASONABLE_Q_SCORE+1;
int NQS_GROUPS;
public static final String DATA_FORMAT = "%d\t%d\t%d\t%d\t%f%n";
public static final String DATA_FORMAT = "%d\t%d\t%d\t%d\t%d%n";
public static final String TEXT_FORMAT = "%s\t%s\t%s\t%s\t%s%n";
@ -143,7 +143,7 @@ public class NQSExtendedGroupsCovariantWalker extends LocusWalker<LocalMapType,
}
public String createHeader() {
return String.format(TEXT_FORMAT, "Qscore_Reported", "NQS_Group", "Mismatches", "Total", "Mismatch_Rate");
return String.format(TEXT_FORMAT, "Qscore_Reported", "NQS_Group", "Mismatches", "Total", "Empirical_Qscore");
}
public String formatNQSMismatchCountString(int qscore, int group, long[][][] cumulativeBins) {
@ -151,6 +151,6 @@ public class NQSExtendedGroupsCovariantWalker extends LocusWalker<LocalMapType,
long ct = cumulativeBins[group][COUNT_OFFSET][qscore];
double mmr = (ct > 0) ? mm/ct : -1;
return String.format(DATA_FORMAT, qscore, group, mm, ct, mmr);
return String.format(DATA_FORMAT, qscore, group, mm, ct, QualityUtils.probToQual(1-mmr));
}
}

View File

@ -87,6 +87,7 @@ public class NQSTabularDistributionWalker extends LocusWalker<LocalMapType, NQSD
class NQSDistributionTable {
public final int MM_OFFSET = 0;
public final int MATCH_OFFSET = 1;

View File

@ -51,12 +51,19 @@ public class PowerAndCoverageWalker extends LocusWalker<SQuad<Integer>, SQuad<Lo
@Argument(fullName="poolSize", shortName="ps", doc="Number of individuals in the pool", required = true)
public int numIndividuals = 0;
@Argument(fullName="alleleFrequency", shortName="af", doc="Calculate power for this many alleles in the pool", required=false)
public int alleleFreq = 1;
protected PrintStream outputWriter = null;
public void initialize() {
if(numIndividuals <= 0) {
throw new StingException("Pool size must be greater than 1. You input "+numIndividuals);
}
if ( alleleFreq > 2*numIndividuals ) {
throw new StingException("Allele frequency must be less than the number of alleles in the pool");
}
if( outputFile != null ) {
try {
outputWriter = new PrintStream(outputFile);
@ -255,7 +262,7 @@ public class PowerAndCoverageWalker extends LocusWalker<SQuad<Integer>, SQuad<Lo
}
private double getSNPProportion() {
return 1/(2.0*numIndividuals);
return alleleFreq/(2.0*numIndividuals);
}
}