Major code refactoring:

@ Pooled utils & power
   - Removed two of the power walkers leaving only PowerBelowFrequency, added some additional
     flags on PowerBelowFrequency to give it some of the behavior that PowerAndCoverage had
   - Removed a number of PoolUtils variables and methods that were used in those walkers or simply
     not used
   - Removed AnalyzePowerWalker (un-necessary)
   - Changed the location of Quad/Squad/ReadOffsetQuad into poolseq

@NQS
   - Deleted all walkers but the minimum NQS walker, refactored not to use LocalMapType

@ BaseTransitionTable
   - Added a slew of new integration tests for different flaggable and integral parameters
   - (Scala) just a System.out that was added and commented out (no actual code change)
   - (Java) changed a < to <= and a boolean formula


Chris



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1887 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2009-10-20 14:58:04 +00:00
parent 4be6bb8e92
commit 3b1fabeff0
21 changed files with 232 additions and 1992 deletions

View File

@ -201,7 +201,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Referen
public int countMismatches(ReadBackedPileup p) {
int refM = 0;
switch (p.getRef()) {
switch (Character.toUpperCase(p.getRef())) {
case 'A': refM = BasicPileup.countBases(p.getBases()).a;
break;
case 'C': refM = BasicPileup.countBases(p.getBases()).c;
@ -322,6 +322,7 @@ class BaseTransitionTable {
}
class ReferenceContextWindow {
protected int windowSize;

View File

@ -1,34 +0,0 @@
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: Oct 9, 2009
* Time: 11:39:34 AM
* To change this template use File | Settings | File Templates.
*/
class LocalMapType {
public AlignmentContext context;
public ReferenceContext ref;
public RefMetaDataTracker tracker;
public LocalMapType(AlignmentContext context, ReferenceContext ref, RefMetaDataTracker tracker) {
this.context = context;
this.ref = ref;
this.tracker = tracker;
}
public int numReads() {
return context.numReads();
}
public int qScore(int read) {
return (int) context.getReads().get(read).getBaseQualities()[context.getOffsets().get(read)];
}
}

View File

@ -0,0 +1,142 @@
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RMD;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import java.util.List;
import java.util.ArrayList;
import net.sf.samtools.SAMRecord;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: Oct 20, 2009
* Time: 9:41:57 AM
* To change this template use File | Settings | File Templates.
*/
@Requires(value={DataSource.REFERENCE}, referenceMetaData = {@RMD(name="dbsnp",type= rodDbSNP.class)})
public class MinimumNQSWalker extends LocusWalker<Pair<List<Pair<Integer,Integer>>,List<Pair<Integer,Integer>>>, int[][][]> {
private static int MM_OFFSET = 1;
private static int MATCH_OFFSET = 0;
private static int QSCORE_MAX = 1 + QualityUtils.MAX_REASONABLE_Q_SCORE;
@Argument(fullName="windowSize", shortName="ws", doc="Size of the window (in one direction)", required=true)
private int winSide = 4;
public void initialize() {
out.printf("%s%n", makeHeader());
}
public int[][][] reduceInit() {
int[][][] counts = new int[QSCORE_MAX][QSCORE_MAX][2];
for ( int i = 0; i < QSCORE_MAX; i ++ ) {
for ( int j = 0; j < QSCORE_MAX; j ++ ) {
counts[i][j][1]=0;
counts[i][j][0]=0;
}
}
return counts;
}
public int[][][] reduce(Pair<List<Pair<Integer,Integer>>,List<Pair<Integer,Integer>>> map, int[][][] prevReduce) {
if ( map != null ) {
List<Pair<Integer,Integer>> matchingQualityNQSPairs = map.getFirst();
List<Pair<Integer,Integer>> mismatchingQualityNQSPairs = map.getSecond();
for ( Pair<Integer,Integer> p : matchingQualityNQSPairs ) {
prevReduce[p.getFirst()][p.getSecond()][MATCH_OFFSET] ++;
}
for ( Pair<Integer,Integer> p : mismatchingQualityNQSPairs ) {
prevReduce[p.getFirst()][p.getSecond()][MM_OFFSET] ++;
}
}
return prevReduce;
}
public Pair<List<Pair<Integer,Integer>>,List<Pair<Integer,Integer>>> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
ArrayList<Pair<Integer,Integer>> matchingQualityNQSPairs = new ArrayList<Pair<Integer,Integer>>();
ArrayList<Pair<Integer,Integer>> mismatchingQualityNQSPairs = new ArrayList<Pair<Integer,Integer>>();
if ( (Variation) tracker.lookup("dbsnp",null) == null ) {
for ( int r = 0; r < context.numReads(); r ++ ) {
SAMRecord read = context.getReads().get(r);
int offset = context.getOffsets().get(r);
int quality = read.getBaseQualities()[offset];
int NQS = getNghdQ(offset, read);
Pair<Integer,Integer> qualityNQSPair = new Pair<Integer,Integer> (quality,NQS);
if ( BaseUtils.basesAreEqual(read.getReadBases()[offset], (byte) ref.getBase()) ) {
matchingQualityNQSPairs.add(qualityNQSPair);
} else {
mismatchingQualityNQSPairs.add(qualityNQSPair);
}
}
return new Pair<List<Pair<Integer,Integer>>,List<Pair<Integer,Integer>>>(matchingQualityNQSPairs,mismatchingQualityNQSPairs);
} else {
return null;
}
}
public void onTraversalDone( int[][][] reduce ) {
for ( int qc = 0; qc < QSCORE_MAX; qc ++ ) {
for ( int qn = 0; qn < QSCORE_MAX; qn ++ ) {
out.printf("%s%n", formatData(reduce[qc][qn],qc,qn));
}
}
}
public int getNghdQ(int off, SAMRecord read) {
// System.out.println("getNghdQ");
byte minQ = Byte.MAX_VALUE;
byte[] quals = read.getBaseQualities();
int rdlnth = read.getReadLength();
int start;
int end;
if ( off - winSide < 0 ) {
start = 0;
} else {
start = off - winSide;
}
if ( off + winSide > rdlnth ) {
end = rdlnth;
} else {
end = off + winSide;
}
for ( int i = start; i < end; i ++ ) {
if ( i != off ) {
byte q = quals[i];
if ( q < minQ ) {
minQ = q;
}
}
}
return minQ;
}
private String makeHeader() {
return String.format("%s\t%s\t%s\t%s\t%s\t%s","Reported_Q","Min_Nghd_Q","N_observations","Mm_rate","Empirical_Q","Q_diff");
}
private String formatData( int[] mmArray, int qCenter, int qNghd ) {
int counts = mmArray[MM_OFFSET]+mmArray[MATCH_OFFSET];
double mismatch = ((double)mmArray[MM_OFFSET]/counts);
byte qEmp = QualityUtils.probToQual(1-mismatch);
return String.format("%d\t%d\t%d\t%f\t%d\t%d", qCenter, qNghd, counts, mismatch, qEmp,qEmp-qCenter);
}
}

View File

@ -1,155 +0,0 @@
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.playground.gatk.walkers.Recalibration.LocalMapType;
import net.sf.samtools.SAMRecord;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: Sep 29, 2009
* Time: 1:13:25 PM
* To change this template use File | Settings | File Templates.
*/
public class NQSClusteredZScoreWalker extends LocusWalker<LocalMapType, int[][][]> {
static final int WIN_SIDE_SIZE = 5;
static final int Z_SCORE_MIN = -6;
static final int Z_SCORE_MAX = 6;
static final int Z_SCORE_MULTIPLIER = 20; // bins are Z_SCORE * (this) rounded to the nearst int
static final int MM_OFFSET = 1;
static final int MATCH_OFFSET = 0;
static final int MAX_Q_SCORE = 2 + QualityUtils.MAX_REASONABLE_Q_SCORE;
protected int WINDOW;
protected int Z_SCORE_RANGE;
public void initialize() {
WINDOW = 2*WIN_SIDE_SIZE+1;
Z_SCORE_RANGE = Z_SCORE_MAX - Z_SCORE_MIN;
}
public int[][][] reduceInit() {
int[][][] q = new int[Z_SCORE_RANGE*Z_SCORE_MULTIPLIER+1][MAX_Q_SCORE][2];
for ( int i = 0; i < Z_SCORE_RANGE*Z_SCORE_MULTIPLIER+1; i ++ ) {
for ( int j = 0; j < MAX_Q_SCORE; j ++ ) {
for ( int k = 0; k < 2; k ++ ) {
q[i][j][k] = 0;
}
}
}
return q;
}
public LocalMapType map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
return new LocalMapType(context, ref, tracker);
}
public int[][][] reduce(LocalMapType map, int[][][] qScores) {
for ( int i = 0; i < map.numReads(); i ++ ) {
SAMRecord read = map.context.getReads().get(i);
int offset = map.context.getOffsets().get(i);
if ( isMismatch(read, offset, map.ref) ) {
qScores[calcZScoreBin(read,offset)][getQScoreAsInt(read,offset)][MM_OFFSET]++;
} else {
qScores[calcZScoreBin(read,offset)][getQScoreAsInt(read,offset)][MATCH_OFFSET]++;
}
}
return qScores;
}
public void onTraversalDone( int[][][] zScoreBins ) {
out.print( header() );
for ( int i = 0; i < Z_SCORE_RANGE*Z_SCORE_MULTIPLIER; i ++ ) {
for ( int j = 0; j < MAX_Q_SCORE; j ++ ) {
out.print( formatData(zScoreBins[i][j], i, j) );
}
}
}
public boolean isMismatch( SAMRecord read, int offset, ReferenceContext ref ) {
return (Character.toUpperCase((char) read.getReadBases()[offset]) != ref.getBase());
}
public int getQScoreAsInt( SAMRecord read, int offset ) {
return (int) read.getBaseQualities()[offset];
}
public int calcZScoreBin( SAMRecord read, int offset ) {
Pair<Double,Double> meanVar = calcWindowMeanVariance(read, offset);
double rawZ = (getQScoreAsInt(read, offset) - meanVar.first)/Math.sqrt(meanVar.second);
int zBin;
if ( rawZ >= Z_SCORE_MAX ) {
zBin = (int) Math.floor(Z_SCORE_RANGE*Z_SCORE_MULTIPLIER);
} else if ( rawZ <= Z_SCORE_MIN ) {
zBin = 0;
} else if ( rawZ > 0 ) {
zBin = (int) Math.floor(rawZ*Z_SCORE_MULTIPLIER) - (int) Math.floor(Z_SCORE_MIN*Z_SCORE_MULTIPLIER);
} else {
zBin = (int) Math.floor(-Z_SCORE_MIN*Z_SCORE_MULTIPLIER) + (int) Math.floor(rawZ*Z_SCORE_MULTIPLIER);
}
return zBin;
}
public Pair<Double,Double> calcWindowMeanVariance( SAMRecord read, int offset ) {
int start,end;
if ( offset - WIN_SIDE_SIZE < 0 ) {
start = 0;
} else {
start = offset - WIN_SIDE_SIZE;
}
if ( offset + WIN_SIDE_SIZE > read.getReadLength() ) {
end = read.getReadLength();
} else {
end = offset + WIN_SIDE_SIZE;
}
double mean = 0.0;
int n = end-start;
byte[] quals = read.getBaseQualities();
for ( int i = start; i < end; i ++ ) {
if ( i != offset ) {
mean += ((double) quals[i])/n;
}
}
double var = 0.0;
for ( int i = start; i < end; i ++ ) {
if ( i != offset ) {
var += Math.pow(((double) quals[i] - mean),2)/n;
}
}
return new Pair<Double,Double>(mean, var);
}
public String header() {
String format = "%s\t%s\t%s\t%s\t%s\t%s%n";
return String.format(format, "ZScore", "N_obs", "Expected_Mismatch", "Empirical_Mismatch", "Expected_MM_As_Q", "Empirical_MM_As_Q");
}
public String formatData ( int[] matchMismatch, int zScoreBin, int q ) {
String format = "%f\t%d\t%f\t%f\t%d\t%d%n";
double zScore = ( (double) zScoreBin )/Z_SCORE_MULTIPLIER;
int counts = matchMismatch[MATCH_OFFSET] + matchMismatch[MM_OFFSET];
double empMMR = (((double)matchMismatch[MM_OFFSET])/counts);
double expMMR = QualityUtils.qualToErrorProb((byte) q);
int empMMRAsQ = QualityUtils.probToQual(1-empMMR);
return String.format(format, zScore, counts, expMMR, empMMR, q, empMMRAsQ);
}
}

View File

@ -1,139 +0,0 @@
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.playground.gatk.walkers.Recalibration.LocalMapType;
import net.sf.samtools.SAMRecord;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: Sep 29, 2009
* Time: 10:54:18 AM
* To change this template use File | Settings | File Templates.
*/
public class NQSCovariantByCountsWalker extends LocusWalker<LocalMapType, int[][][][] > {
final static int SMALL_DEVIATION = 5;
final static int LARGE_DEVIATION = 8;
final static int WIN_SIZE_SIDE = 6;
final static int MATCH_OFFSET = 1;
final static int MM_OFFSET = 0;
final static int MAX_Q_SCORE = 2 + QualityUtils.MAX_REASONABLE_Q_SCORE;
final static String DATA_FORMAT = "%d\t%d\t%d\t%d\t%f\t%f\t%d\t%d%n";
final static String HEADER_FORMAT = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n";
int NGHD_SIZE;
public void initialize() {
NGHD_SIZE = 2*WIN_SIZE_SIDE+1;
}
public int[][][][] reduceInit() {
int[][][][] q = new int[2*WIN_SIZE_SIDE][2*WIN_SIZE_SIDE][MAX_Q_SCORE][2];
for ( int i = 0; i < 2*WIN_SIZE_SIDE; i ++ ) {
for ( int j = 0; j < 2*WIN_SIZE_SIDE; j ++ ) {
for ( int k = 0; k < MAX_Q_SCORE; k ++ ) {
for ( int h = 0; h < 2; h ++ ) {
q[i][j][k][h] = 0;
}
}
}
}
return q;
}
public LocalMapType map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
return new LocalMapType(context, ref, tracker);
}
public int [][][][] reduce( LocalMapType map, int [][][][] quals ) {
for ( int i = 0; i < map.numReads(); i ++ ) {
SAMRecord read = map.context.getReads().get(i);
int offset = map.context.getOffsets().get(i);
Pair<Integer,Integer> deviantCounts = countNumDeviantBases(read,offset);
if ( isMismatch(read,offset,map.ref) ) {
quals[deviantCounts.first][deviantCounts.second][(int) read.getBaseQualities()[offset]][MM_OFFSET]++;
} else {
quals[deviantCounts.first][deviantCounts.second][(int) read.getBaseQualities()[offset]][MATCH_OFFSET]++;
}
}
return quals;
}
public void onTraversalDone( int[][][][] counts ) {
out.print( header() );
for ( int i = 0; i < 2*WIN_SIZE_SIDE; i ++ ) {
for ( int j = 0; j < 2*WIN_SIZE_SIDE; j ++ ) {
out.print( formatData( counts[i][j], i, j ) );
}
}
}
public Pair<Integer,Integer> countNumDeviantBases(SAMRecord read, int offset) {
int start,end;
if ( offset - WIN_SIZE_SIDE < 0 ) {
start = 0;
} else {
start = offset - WIN_SIZE_SIDE;
}
if ( offset + WIN_SIZE_SIDE > read.getReadLength() ) {
end = read.getReadLength();
} else {
end = offset + WIN_SIZE_SIDE;
}
int largeCounts = 0;
int smallCounts = 0;
byte[] quals = read.getBaseQualities();
for ( int i = start; i < end; i ++ ) {
if ( Math.abs(quals[i] - quals[offset]) >= LARGE_DEVIATION ) {
largeCounts ++;
} else if ( Math.abs( quals[i] - quals[offset] ) >= SMALL_DEVIATION ) {
smallCounts ++;
}
}
return new Pair<Integer,Integer>(smallCounts,largeCounts);
}
public boolean isMismatch( SAMRecord read, int offset, ReferenceContext ref ) {
return ( Character.toUpperCase((char)read.getReadBases()[offset]) != ref.getBase() );
}
public String header() {
return String.format(HEADER_FORMAT, "N_SMALL_DEV", "N_LARGE_DEV", "MATCH", "MISMATCH", "EXPECTED_ERR", "EMPIRICAL_ERR", "EXP_AS_Q", "EMP_AS_Q");
}
public String formatData( int[][] qScores, int smDev, int lgDev ) {
int match = 0;
int mismatch = 0;
double expErr = 0.0;
for ( int i = 0; i < MAX_Q_SCORE; i ++ ) {
match += qScores[i][MATCH_OFFSET];
mismatch += qScores[i][MM_OFFSET];
expErr += QualityUtils.qualToErrorProb((byte)i)*qScores[i][MATCH_OFFSET];
expErr += QualityUtils.qualToErrorProb((byte)i)*qScores[i][MM_OFFSET];
}
expErr = expErr/(match + mismatch);
int expAsQ = QualityUtils.probToQual(1-expErr);
double empErr = ((double)mismatch)/(match+mismatch);
int empAsQ = QualityUtils.probToQual(1-empErr);
return String.format(DATA_FORMAT, smDev, lgDev, match, mismatch, expErr, empErr, expAsQ, empAsQ);
}
}

View File

@ -1,153 +0,0 @@
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.playground.gatk.walkers.Recalibration.LocalMapType;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: Sep 23, 2009
* Time: 11:26:52 AM
* To change this template use File | Settings | File Templates.
*/
public class NQSExtendedGroupsCovariantWalker extends LocusWalker<LocalMapType, long[][][]> {
final int NEIGHBORHOOD_SIZE = 5;
final int MM_OFFSET = 1;
final int COUNT_OFFSET = 0;
final int MAX_QSCORE = QualityUtils.MAX_REASONABLE_Q_SCORE+1;
int NQS_GROUPS;
public static final String DATA_FORMAT = "%d\t%d\t%d\t%d\t%d%n";
public static final String TEXT_FORMAT = "%s\t%s\t%s\t%s\t%s%n";
// Walker methods
public void initialize() {
NQS_GROUPS = (MAX_QSCORE)*(MAX_QSCORE+1)/2 + MAX_QSCORE;
// this is from solving the recurrent equation
// a_{n+1} = a_n + n + 1
}
public long[][][] reduceInit() {
long[][][] covariantCounts = new long[NQS_GROUPS][2][MAX_QSCORE];
for ( int i = 0; i < NQS_GROUPS; i ++ ) {
for ( int j = 0; j < 2; j++ ) {
for ( int k = 0; k < MAX_QSCORE; k ++) {
covariantCounts[i][j][k] = 0;
}
}
}
return covariantCounts;
}
public LocalMapType map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
return new LocalMapType(context, ref, tracker);
}
public long[][][] reduce( LocalMapType map, long[][][] cumulativeBins ) {
for ( int i = 0; i < map.numReads(); i ++ ) {
int groupNumber = calcGroupNumber(i, map.context);
int qualityAsInt = getBaseQualityAsInt(map.context,i);
cumulativeBins[groupNumber][COUNT_OFFSET][qualityAsInt] ++;
if ( ! isRef(i, map.context, map.ref ) ) {
cumulativeBins[groupNumber][MM_OFFSET][qualityAsInt] ++;
}
}
return cumulativeBins;
}
public void onTraversalDone(long[][][] cumulativeBins) {
printNQSMismatchTable(cumulativeBins);
}
// Map-Reduce helper methods
public int calcGroupNumber( int read, AlignmentContext context ) {
return calcGroupNumberFromQualities(getBaseQualityAsInt(context, read), getMinNeighborhoodBaseQualityAsInt(context, read));
}
/*
* calculates group number as
* #: nghd left base nghd right
* 0: 000 0 000
* 1: 000 1 000
* 2: 111 1 111
* 3: 000 2 000
* 4: 111 2 111
* 5: 222 2 222
* 6: 000 3 000
* 7: 111 3 111
* etc
*/
public int calcGroupNumberFromQualities( int baseQuality, int minNeighborhoodQuality ) {
int groupNumber = (baseQuality)*(baseQuality+1)/2;
if ( minNeighborhoodQuality <= baseQuality ) {
groupNumber += minNeighborhoodQuality;
} else {
groupNumber += baseQuality; // yes we want to do this
}
return groupNumber;
}
public int getBaseQualityAsInt( AlignmentContext context, int read ) {
return (int) context.getReads().get(read).getBaseQualities()[context.getOffsets().get(read)];
}
public int getMinNeighborhoodBaseQualityAsInt( AlignmentContext context, int read ) {
byte[] qualities = context.getReads().get(read).getBaseQualities();
int offset = context.getOffsets().get(read);
int readLength = qualities.length;
int start = (offset - NEIGHBORHOOD_SIZE < readLength) ? 0 : offset - NEIGHBORHOOD_SIZE;
int end = (offset + NEIGHBORHOOD_SIZE > readLength) ? readLength : offset+NEIGHBORHOOD_SIZE;
byte minQuality = Byte.MAX_VALUE;
for ( int i = start; i < end; i ++ ) {
if ( i != offset ) {
if ( qualities[i] < minQuality ) {
minQuality = qualities[i];
}
}
}
return (int) minQuality;
}
public boolean isRef(int read, AlignmentContext context, ReferenceContext ref ) {
return (context.getReads().get(read).getReadBases()[context.getOffsets().get(read)] == ref.getBase());
}
// printing helper methods
public void printNQSMismatchTable(long[][][] cumulativeBins) {
out.print(createHeader());
for ( int qscore = 0; qscore < MAX_QSCORE; qscore ++ ) {
for ( int group = 0; group < NQS_GROUPS; group ++ ) {
out.print(formatNQSMismatchCountString(qscore,group,cumulativeBins));
}
}
}
public String createHeader() {
return String.format(TEXT_FORMAT, "Qscore_Reported", "NQS_Group", "Mismatches", "Total", "Empirical_Qscore");
}
public String formatNQSMismatchCountString(int qscore, int group, long[][][] cumulativeBins) {
long mm = cumulativeBins[group][MM_OFFSET][qscore];
long ct = cumulativeBins[group][COUNT_OFFSET][qscore];
double mmr = (ct > 0) ? mm/ct : -1;
return String.format(DATA_FORMAT, qscore, group, mm, ct, QualityUtils.probToQual(1-mmr));
}
}

View File

@ -1,188 +0,0 @@
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
import org.broadinstitute.sting.playground.gatk.walkers.Recalibration.LocalMapType;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.QualityUtils;
import net.sf.samtools.SAMRecord;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: Sep 14, 2009
* Time: 1:37:44 PM
* To change this template use File | Settings | File Templates.
*/
public class NQSMismatchCovariantWalker extends LocusWalker<LocalMapType, long[][][]> {
public static final int NQS_GROUPS = 35;
public static final int NQS_RESOLUTION = 1;
public static final int NQS_DIFFERENCE = 5;
public static final int NEIGHBORHOOD_SIZE = 5;
public static final int NQS_QSCORE_START = 5;
public static final int MM_OFFSET = 0;
public static final int COUNT_OFFSET = 1;
public static final int FAIL_OFFSET = 2;
public static final String DATA_FORMAT = "%d %d %d %d %d %d%n";
public static final String HEADER_FORMAT = "%s %s %s %s %s %s%n";
public long[][][] reduceInit() {
long[][][] mismatchesByNQS = new long[(int) QualityUtils.MAX_QUAL_SCORE+1][this.numNQSGroups()+1][2];
for ( int qualityScore = 0; qualityScore <= (int) QualityUtils.MAX_QUAL_SCORE; qualityScore ++) {
for ( int nqsGroup = 0; nqsGroup <= numNQSGroups(); nqsGroup++ ) {
mismatchesByNQS[qualityScore][nqsGroup][MM_OFFSET] = 0;
mismatchesByNQS[qualityScore][nqsGroup][COUNT_OFFSET] = 0;
}
}
return mismatchesByNQS;
}
public LocalMapType map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
return new LocalMapType(context,ref,tracker);
}
public long[][][] reduce(LocalMapType rawMapData, long[][][] cumMismatchArray) {
if ( ! isDBSNP(rawMapData.tracker) ) {
for( int read = 0; read < rawMapData.numReads(); read ++ ) {
int bestNQSRelativeToStart = getBestNQSRelativeToStart(rawMapData.context, read, nqsDifference(),
nqsResolution(), centerBaseQualityStart(), windowSize());
if( isValidRelativeNQS(bestNQSRelativeToStart) ) {
cumMismatchArray[rawMapData.qScore(read)][bestNQSRelativeToStart][COUNT_OFFSET]++;
if( isMismatch(read, rawMapData.context, rawMapData.ref) ) {
cumMismatchArray[rawMapData.qScore(read)][bestNQSRelativeToStart][MM_OFFSET]++;
}
}
}
}
return cumMismatchArray;
}
public void onTraversalDone(long[][][] cumulativeCounts) {
printNQSMismatchTable(cumulativeCounts);
}
public int getBestNQSRelativeToStart(AlignmentContext context, int readNum, int ctrNghdNQSDif, int nqsStepSize,
int ctrBaseQStart, int windSize) {
int minNeighborhoodQualityScore = findMinQScoreInWindow(context, readNum, windSize);
int centerQScore = getQualityScore(context,readNum);
int stepsToCutoffNeighborhood = (int) Math.floor((minNeighborhoodQualityScore - (ctrBaseQStart - ctrNghdNQSDif))/nqsStepSize);
int stepsToCutoffCenter = (int) Math.floor((centerQScore-ctrBaseQStart )/nqsStepSize);
if(stepsToCutoffNeighborhood < stepsToCutoffCenter) {
return stepsToCutoffNeighborhood;
} else {
return stepsToCutoffCenter;
}
}
protected void printNQSMismatchTable(long[][][] cumulativeCounts) {
out.printf("%s", createHeader() );
for ( int qscore = 0; qscore <= QualityUtils.MAX_QUAL_SCORE; qscore ++ ) {
for( int nqsGroup = 0; nqsGroup <= numNQSGroups(); nqsGroup ++ ) {
out.printf("%s", formatNQSMismatchCountString(cumulativeCounts, nqsGroup, qscore));
}
}
}
protected String createHeader() {
return String.format(HEADER_FORMAT,"Qscore Reported","Qscore Threshold at Locus", "Minimum Neighborhood Quality", "NQS Group",
"# Non-dbSNP Mismatches", "Total Non-dbSNP Counts");
}
protected String formatNQSMismatchCountString(long[][][] counts, int nqsGroup, int qscore) {
return String.format(DATA_FORMAT,qscore,getCenterThreshold(nqsGroup),getNeighborhoodThreshold(nqsGroup),nqsGroup,
counts[qscore][nqsGroup][MM_OFFSET], counts[qscore][nqsGroup][COUNT_OFFSET]);
}
protected int getCenterThreshold(int nqsGroup) {
return centerBaseQualityStart() + nqsGroup*nqsResolution();
}
protected int getNeighborhoodThreshold(int nqsGroup) {
return getCenterThreshold(nqsGroup)-nqsDifference();
}
protected int numNQSGroups() {
return NQS_GROUPS;
}
protected int nqsResolution() {
return NQS_RESOLUTION;
}
protected int nqsDifference() {
return NQS_DIFFERENCE;
}
protected boolean isValidRelativeNQS(int relNQS) {
return relNQS >= 0;
}
protected boolean isDBSNP(RefMetaDataTracker tracker) {
return false;
// return ( tracker.lookup("dbSNP",null) != null );
}
protected boolean isMismatch(int readNum, AlignmentContext context, ReferenceContext ref) {
return ( (char) getBases(context,readNum)[ getOffset(context,readNum) ] ) != ref.getBase();
}
protected byte[] getBases(AlignmentContext context, int offset) {
return context.getReads().get(offset).getReadBases();
}
protected int getOffset(AlignmentContext context, int read) {
return context.getOffsets().get(read);
}
protected int windowSize() {
return NEIGHBORHOOD_SIZE;
}
protected int centerBaseQualityStart() {
return NQS_QSCORE_START;
}
protected int getQualityScore(AlignmentContext context, int readNo) {
return getQualityScore(context.getReads().get(readNo), context.getOffsets().get(readNo));
}
protected int getQualityScore(SAMRecord read, int offset) {
return (int) read.getBaseQualities()[offset];
}
protected int findMinQScoreInWindow(AlignmentContext context, int readNo, int windowSize) {
SAMRecord read = context.getReads().get(readNo);
int offset = context.getOffsets().get(readNo);
int start;
int end;
if ( offset - windowSize < 0) {
start = 0;
} else {
start = offset - windowSize;
}
if ( offset + windowSize > read.getReadLength() ) {
end = read.getReadLength();
} else {
end = offset + windowSize;
}
int minQualityScore = Integer.MAX_VALUE;
for (int positionInRead = start; positionInRead < end; positionInRead++ ) {
int qScoreAtPosition = getQualityScore(read,positionInRead);
if ( qScoreAtPosition < minQualityScore ) {
minQualityScore = qScoreAtPosition;
}
}
return minQualityScore;
}
}

View File

@ -1,160 +0,0 @@
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Pair;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.StringTokenizer;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMRecord;
/**
* Created by IntelliJ IDEA.
* User: Ghost
* Date: Oct 11, 2009
* Time: 11:04:07 AM
* To change this template use File | Settings | File Templates.
*/
public class NQSRecalibratorWalker extends ReadWalker<SAMRecord,SAMFileWriter> {
@Argument(fullName="recalibrationTable", shortName="rt", doc="Table detailing NQS recalibration by reported, minimum, and maximum", required=true)
String recalFile = null;
@Argument(fullName="outputBamFile", shortName="outputBAM", doc="output BAM file", required=false)
public SAMFileWriter outBam = null;
final static int MIN_RECALIBRATION_OBSERVATIONS = 10000;
final static int WIN_SIDE = 11;
final static int QMAX = 3 + QualityUtils.MAX_REASONABLE_Q_SCORE;
protected byte[][][] RECALIBRATION_TABLE;
public void initialize() {
// initialize the table
RECALIBRATION_TABLE = initializeQualityRecalibrationTable();
BufferedReader reader;
try {
reader = new BufferedReader(new FileReader(recalFile));
} catch (FileNotFoundException e) {
throw new StingException("File "+recalFile+" not found",e);
}
try {
String header = reader.readLine();
logger.debug(header);
while( reader.ready() ) {
StringTokenizer tok = new StringTokenizer(reader.readLine());
int repQ = Integer.valueOf(tok.nextToken());
int minQ = Integer.valueOf(tok.nextToken());
int maxQ = Integer.valueOf(tok.nextToken());
int nObserv = Integer.valueOf(tok.nextToken());
tok.nextToken(); // skip one - empirical mismatch rate
int empQ = Integer.valueOf(tok.nextToken());
if ( nObserv > MIN_RECALIBRATION_OBSERVATIONS ) {
RECALIBRATION_TABLE[repQ][minQ][maxQ] = (byte) empQ;
}
// System.out.println(repQ);
}
} catch (IOException e) {
throw new StingException("File "+recalFile+" could not be read",e);
}
}
public SAMFileWriter reduceInit() {
return outBam;
}
public SAMRecord map(char[] ref, SAMRecord read) {
byte[] initialQualities = read.getBaseQualities();
byte[] finalQualities = getNewQualitiesByNQS(initialQualities);
return recalibrateRead(read, finalQualities);
}
public byte[] getNewQualitiesByNQS( byte [] initQuals ) {
byte [] newQuals = new byte[initQuals.length];
for ( int offset = 0; offset < initQuals.length; offset ++ ) {
newQuals[offset] = qualityByNQS(initQuals,offset);
}
return newQuals;
}
public SAMFileWriter reduce( SAMRecord newRead, SAMFileWriter outwriter ) {
if ( outwriter == null ) {
out.println(newRead);
} else {
outwriter.addAlignment(newRead);
}
return outwriter;
}
public byte qualityByNQS( byte [] quals, int offset ) {
Pair<Byte,Byte> minMaxQuality = getMinMaxQuality(quals,offset);
return nqsLookup( quals[offset] , minMaxQuality.getFirst() , minMaxQuality.getSecond() );
}
public Pair<Byte,Byte> getMinMaxQuality( byte [] quals, int offset ) {
int start;
int end;
if ( offset-WIN_SIDE < 0 ) {
start = 0;
} else {
start = offset - WIN_SIDE;
}
if ( offset + WIN_SIDE > quals.length ) {
end = quals.length;
} else {
end = offset + WIN_SIDE;
}
byte minQuality = Byte.MAX_VALUE;
byte maxQuality = Byte.MIN_VALUE;
for ( int i = start; i < end; i ++ ) {
if ( i != offset ) {
if ( quals[i] < minQuality ) {
minQuality = quals[i];
}
if ( quals[i] > maxQuality ) {
maxQuality = quals[i];
}
}
}
return new Pair<Byte,Byte>(minQuality,maxQuality);
}
public byte nqsLookup( byte repQ, byte minQ, byte maxQ ) {
return RECALIBRATION_TABLE[(int) repQ][(int) minQ][(int) maxQ];
}
public SAMRecord recalibrateRead( SAMRecord read, byte[] newQualities ) {
read.setBaseQualities(newQualities);
return read;
}
private byte[][][] initializeQualityRecalibrationTable() {
byte [][][] table = new byte[QMAX][QMAX][QMAX];
for ( int qrep = 0; qrep < QMAX; qrep ++ ) {
for ( int qmin = 0; qmin < QMAX; qmin ++ ) {
for ( int qmax = 0; qmax < QMAX; qmax ++ ) {
table[qrep][qmin][qmax] = (byte) qrep;
// default value is the reported q-score
}
}
}
return table;
}
}

View File

@ -1,181 +0,0 @@
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Pair;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import org.apache.log4j.Logger;
import net.sf.samtools.SAMRecord;
/**
* Created by IntelliJ IDEA.
* User: Ghost
* Date: Sep 24, 2009
* Time: 10:40:29 AM
* To change this template use File | Settings | File Templates.
*/
public class NQSTabularDistributionWalker extends LocusWalker<LocalMapType, NQSDistributionTable> {
final int WINDOW_SIZE_SIDE = 3;
final int QSCORE_BIN_SIZE = 4;
final String ROW_FORMAT = "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d%n";
final String HEAD_FORMAT = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n";
int qBins;
int WINDOW_SIZE;
public void initialize() {
qBins = 2 + (int) Math.ceil(((double)QualityUtils.MAX_REASONABLE_Q_SCORE)/QSCORE_BIN_SIZE);
WINDOW_SIZE = 2*WINDOW_SIZE_SIDE+1;
}
public NQSDistributionTable reduceInit() {
return new NQSDistributionTable(WINDOW_SIZE, qBins, QSCORE_BIN_SIZE, logger);
}
public LocalMapType map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
return new LocalMapType(context, ref, tracker);
}
public NQSDistributionTable reduce( LocalMapType curMap, NQSDistributionTable prevReduce ) {
// iterate through reads
for ( int i = 0; i < curMap.numReads(); i ++ ) {
prevReduce.update(curMap.context.getReads().get(i), curMap.context.getOffsets().get(i), WINDOW_SIZE_SIDE, QSCORE_BIN_SIZE, curMap.ref);
}
return prevReduce;
}
public void onTraversalDone(NQSDistributionTable finalTable) {
out.print( makeHeader() );
for ( int a = 0; a < qBins; a ++ ) {
for ( int b = 0; b < qBins; b ++ ) {
for ( int c = 0; c < qBins; c ++ ) {
for ( int d = 0; d < qBins; d ++ ) {
for ( int e = 0; e < qBins; e ++ ) {
for ( int f = 0; f < qBins; f ++ ) {
for ( int g = 0; g < qBins; g ++ ) {
Pair<Integer,Integer> mm = finalTable.getPair(a,b,c,d,e,f,g);
out.print( formatOutput(a,b,c,d,e,f,g,mm.first,mm.second) );
}
}
}
}
}
}
}
}
public String makeHeader() {
return String.format(HEAD_FORMAT,"Q1","Q2","Q3","Q4","Q5","Q6","Q7","Mismatch","Match");
}
public String formatOutput( int a, int b, int c, int d, int e, int f, int g, int h, int i ) {
return String.format(ROW_FORMAT, a, b, c, d, e, f, g, h, i);
}
}
class NQSDistributionTable {
public final int MM_OFFSET = 0;
public final int MATCH_OFFSET = 1;
protected int [][][][][][][][] table;
protected Logger logger;
protected int OFF_END_OFFSET;
protected int OFF_START_OFFSET;
public NQSDistributionTable (int winSize, int qBins, int qStep, Logger logger) {
if ( 7 != winSize ) {
throw new StingException("Size positied in tabular distribution is not the size of the distribution table");
}
this.logger = logger;
table = new int[qBins][qBins][qBins][qBins][qBins][qBins][qBins][2];
// yes, this is absolutely positively horrendous brute-force code.
// ... Knuth would be proud. These could be lists for a decrease
// in memory but a slowdown in building the distribution.
for ( int a = 0; a < qBins; a ++ ) {
for ( int b = 0; b < qBins; b ++ ) {
for ( int c = 0; c < qBins; c ++ ) {
for ( int d = 0; d < qBins; d ++ ) {
for ( int e = 0; e < qBins; e ++ ) {
for ( int f = 0; f < qBins; f ++ ) {
for ( int g = 0; g < qBins; g ++ ) {
for ( int h = 0; h < 2; h ++ ) {
table[a][b][c][d][e][f][g][h] = 0;
}
}
}
}
}
}
}
}
OFF_END_OFFSET = qBins-2;
OFF_START_OFFSET = qBins-1;
}
public void update( SAMRecord read, int offset, int winSize, int binSize, ReferenceContext ref ) {
// note: there are two extra bins, one for "No quality: off the end of read"
// and one for "No quality: off the beginning of read"
List<Integer> coordinates = getQscoreCoordinates(read,offset,winSize, binSize);
if ( isMatch(read, offset, ref) ) {
coordinates.add(MATCH_OFFSET);
} else {
coordinates.add(MM_OFFSET);
}
ListIterator<Integer> o = coordinates.listIterator();
table[o.next()][o.next()][o.next()][o.next()][o.next()][o.next()][o.next()][o.next()] ++;
// i'm sure i could use recursion for this but my brain is too tired after swimming to figure
// out, in 20 seconds, how to do it
}
public List<Integer> getQscoreCoordinates( SAMRecord read, int offset, int win, int binSize ) {
LinkedList<Integer> coords = new LinkedList<Integer>();
for ( int i = offset - win; i <= offset + win ; i ++ ) {
coords.add(getQscoreBin(getQScoreAsInt(read,i), binSize));
}
logger.debug(Integer.toString(coords.size()));
return coords;
}
public int getQScoreAsInt( SAMRecord read, int offset ) {
int qscore;
if ( offset < 0 ) {
qscore = read.getReadNegativeStrandFlag() ? OFF_END_OFFSET : OFF_START_OFFSET;
} else if ( offset >= read.getReadLength() ) {
qscore = read.getReadNegativeStrandFlag() ? OFF_START_OFFSET : OFF_END_OFFSET;
} else {
qscore = (int) read.getBaseQualities()[offset];
}
return qscore;
}
public int getQscoreBin( int qscore, int binSize ) {
return (int) Math.floor(((double)qscore)/binSize);
}
public Pair<Integer,Integer> getPair( int a, int b, int c, int d, int e, int f, int g ) {
int mm = table[a][b][c][d][e][f][g][MM_OFFSET];
int ma = table[a][b][c][d][e][f][g][MATCH_OFFSET];
return new Pair<Integer,Integer>(mm,ma);
}
public boolean isMatch( SAMRecord read, int offset, ReferenceContext ref ) {
return ( Character.toUpperCase( (char) read.getReadBases()[offset] ) == ref.getBase() || ref.getBase() == 'N');
}
}

View File

@ -1,77 +0,0 @@
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.Pair;
import java.util.List;
import java.util.LinkedList;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: Sep 28, 2009
* Time: 12:02:33 PM
* To change this template use File | Settings | File Templates.
*/
public class QualityScoreDistributionWalker extends LocusWalker<List<Pair<Integer,Integer>>, int[][]> {
@Argument(fullName="maxReadLength", shortName="rl", doc="Maximum read length in the bam file", required=true)
protected int MAX_READ_LENGTH = 125;
protected final int MAX_REASONABLE_Q_SCORE = 3 + QualityUtils.MAX_REASONABLE_Q_SCORE;
public void initialize() {
}
public int[][] reduceInit() {
int [][] qualityCounts = new int[MAX_READ_LENGTH][MAX_REASONABLE_Q_SCORE];
for ( int i = 0; i < MAX_READ_LENGTH; i ++ ) {
for ( int j = 0; j < MAX_REASONABLE_Q_SCORE; j ++ ) {
qualityCounts[i][j] = 0;
}
}
return qualityCounts;
}
public List<Pair<Integer,Integer>> map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
LinkedList<Pair<Integer,Integer>> qualByPos = new LinkedList<Pair<Integer,Integer>>();
for ( int r = 0; r < context.numReads(); r ++ ) {
if( context.getReads().get(r).getReadNegativeStrandFlag() ) {
int of = context.getReads().get(r).getReadLength() - context.getOffsets().get(r);
int qu = context.getReads().get(r).getBaseQualities()[context.getOffsets().get(r)];
qualByPos.add(new Pair<Integer,Integer>(of, qu));
}
else {
int of = context.getOffsets().get(r);
int qu = context.getReads().get(r).getBaseQualities()[of];
qualByPos.add(new Pair<Integer,Integer>(of, qu));
}
}
return qualByPos;
}
public int[][] reduce( List<Pair<Integer,Integer>> qualByPos, int[][] counts ) {
for ( Pair<Integer,Integer> qPosPair : qualByPos ) {
counts[qPosPair.getFirst()][qPosPair.getSecond()] ++;
}
return counts;
}
public void onTraversalDone( int[][] counts ) {
for ( int pos = 0; pos < MAX_READ_LENGTH; pos ++ ) {
String outStr = Integer.toString(pos);
for ( int qual = 0; qual < MAX_REASONABLE_Q_SCORE; qual ++) {
outStr = outStr + "\t" + Integer.toString(counts[pos][qual]);
}
out.printf("%s%n", outStr);
}
}
}

View File

@ -1,218 +0,0 @@
package org.broadinstitute.sting.playground.gatk.walkers.poolseq;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.walkers.By;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.playground.utils.PoolUtils;
import org.broadinstitute.sting.playground.utils.ReadOffsetQuad;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.StringTokenizer;
import java.util.NoSuchElementException;
import java.util.List;
import java.rmi.NoSuchObjectException;
import net.sf.samtools.SAMRecord;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: Aug 25, 2009
* Time: 3:47:47 PM
* To change this template use File | Settings | File Templates.
*/
@By(DataSource.REFERENCE)
public class AnalyzePowerWalker extends CoverageAndPowerWalker{
// runs CoverageAndPowerWalker except compares to Syzygy outputs in a file
@Argument(fullName = "SyzygyOutputFile", shortName = "sof", doc="Syzygy output file to compare to", required = true)
String pathToSyzygyFile = null;
@Argument(fullName = "ColumnOffset", shortName = "co", doc = "Offset of column containing the power in the pf", required = true)
int colOffset = 0;
BufferedReader syzyFileReader;
final String pfFileDelimiter = " ";
boolean outOfLinesInSyzyFile = false;
private double[][] syzyPowerTable;
private String[][] syzySanityCheckTable;
@Override
public void initialize()
{
super.initialize();
syzyPowerTable = new double[8][1500000];
syzySanityCheckTable = new String[8][1500000];
try {
syzyFileReader = new BufferedReader(new FileReader(pathToSyzygyFile));
logger.error("generatePowerTable called");
generatePowerTable(syzyFileReader);
} catch (FileNotFoundException e) {
String newErrMsg = "Syzygy input file " + pathToSyzygyFile + " could be incorrect. File not found.";
throw new StingException(newErrMsg,e);
} catch (IOException e) {
String newErrMsg = "Syzygy input file error: could not read first line of "+pathToSyzygyFile;
throw new StingException(newErrMsg,e);
}
}
@Override
public Pair<Integer,Integer> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext rawContext)
{
AlignmentContext context;
if (super.getMinQualityScore() <= 0) {
context = rawContext;
} else {
Pair<List<SAMRecord>,List<Integer>> readsFilteredByQuality = filterByQuality(rawContext.getReads(),rawContext.getOffsets(), super.getMinQualityScore());
context = new AlignmentContext(rawContext.getLocation(),readsFilteredByQuality.getFirst(),readsFilteredByQuality.getSecond());
}
ReadOffsetQuad readsByDirection = PoolUtils.splitReadsByReadDirection(context.getReads(),context.getOffsets());
Pair<Pair<List<SAMRecord>, List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> splitReads = new Pair(new Pair(readsByDirection.getFirstReads(),readsByDirection.getSecondReads()),new Pair(readsByDirection.getFirstOffsets(),readsByDirection.getSecondOffsets()));
if ( !super.suppress_printing )
{
Pair<double[],byte[]> powPair = super.calculatePower(splitReads,false,context);
int tabIndexChrom = getTableChromIndex(context.getLocation().toString());
int tabIndexLoc = getTablePosIndex(context.getLocation().toString());
double syzyPow = 0;
String syzySanity = "NoLoc";
if(tabIndexChrom >= 0 && tabIndexLoc >= 0) {
syzyPow += syzyPowerTable[tabIndexChrom][tabIndexLoc];
syzySanity = "s " + syzySanityCheckTable[tabIndexChrom][tabIndexLoc];
}
out.printf("%s|%s: %d %d %d %d %d %d %f %f %f %f %n", context.getLocation(), syzySanity, splitReads.getFirst().getFirst().size(), splitReads.getFirst().getSecond().size(),
context.getReads().size(), powPair.getSecond()[0], powPair.getSecond()[1], powPair.getSecond()[2],
powPair.getFirst()[0], powPair.getFirst()[1], powPair.getFirst()[2], syzyPow);
}
return new Pair(splitReads.getFirst().getFirst().size(), splitReads.getFirst().getFirst().size());
}
public Pair<Double,String> getSyzyPowFromFile() {
String thisLine = null;
try {
thisLine = syzyFileReader.readLine();
} catch(IOException e) {
String newErrMsg = "Ran out of lines in the syzyfile; further output of Syzygy power will be suppressed.";
outOfLinesInSyzyFile=true;
logger.warn(newErrMsg + " " + e.toString());
return new Pair(-1.1, "Printing Stops Here");
}
String chromPosTar = null;
StringTokenizer lineTokenizer = new StringTokenizer(thisLine, pfFileDelimiter);
try {
chromPosTar = lineTokenizer.nextToken();
for(int j = 1; j < colOffset; j++) {
lineTokenizer.nextToken();
}
String chromPos = (new StringTokenizer(chromPosTar, "\t")).nextToken();
return new Pair((Double.valueOf(lineTokenizer.nextToken())/100.0),chromPos);
} catch (NoSuchElementException e) {
String errMsg = "The given column offset for the pool, " + colOffset + " exceeded the number of entries in the file " + pathToSyzygyFile;
throw new StingException(errMsg);
}
}
public String createHeaderString() {
return (super.createHeaderString() + " PowSyz");
}
//TODO: rewrite this so it's not a complete hack!!!
public void generatePowerTable(BufferedReader syzFile) {
System.out.println("generatePowerTable entered");
int numreads = 0;
try{
while(syzFile.ready()) {
String line = syzFile.readLine();
if(line == null || line.equals("")) {
break;
}
StringTokenizer tok = new StringTokenizer(line, " :\t");
String chrmNoStrWithChr = tok.nextToken();
String chrmNoStr = chrmNoStrWithChr.substring(3);
String locStr = tok.nextToken();
for(int j = colOffset - 2; j > 0; j --) {
tok.nextToken();
}
numreads++;
String syzyPowStr = tok.nextToken();
if ( chrmNoStr == null || locStr == null || chrmNoStr.equals("") || locStr.equals("")) {
break;
}
int chrIndex = getTableChromIndex(chrmNoStr,locStr);
int posIndex = getTablePosIndex(chrmNoStr,locStr);
syzyPowerTable[chrIndex][posIndex] = Double.valueOf(syzyPowStr)/100.0;
syzySanityCheckTable[chrIndex][posIndex] = "chrm" + chrmNoStr +":" + locStr;
if( (numreads % 1000) == 0){
System.out.println(numreads + " reads from Syzygy file");
}
}
} catch (IOException e) {
// do nothing
System.out.println("IOException caught");
}
System.out.println("Table generated.");
}
public int getTableChromIndex(String chromNo, String locNo) {
switch (Integer.valueOf(chromNo)) {
case 1: return 0;
case 2: return 1;
case 3:
if ( Integer.valueOf(locNo) < 63000000 )
return 2;
else
return 3;
case 7: return 4;
case 10: return 5;
case 11: return 6;
case 12: return 7;
default:
System.out.println(chromNo + " " + locNo);
return -1;
}
}
public int getTableChromIndex(String chromAndPos) {
StringTokenizer tok = new StringTokenizer(chromAndPos,":");
return getTableChromIndex(tok.nextToken().substring(3),tok.nextToken());
}
public int getTablePosIndex(String chromAndPos) {
StringTokenizer tok = new StringTokenizer(chromAndPos,":");
return getTablePosIndex(tok.nextToken().substring(3),tok.nextToken());
}
public int getTablePosIndex(String chromNo, String locNo) {
switch ( getTableChromIndex(chromNo, locNo) ) {
case 0: return Integer.valueOf(locNo) - 120065274;
case 1: return Integer.valueOf(locNo) - 43311321;
case 2: return Integer.valueOf(locNo) - 12157091;
case 3: return Integer.valueOf(locNo) - 64000000;
case 4: return Integer.valueOf(locNo) - 27838907;
case 5: return Integer.valueOf(locNo) - 12003199;
case 6: return Integer.valueOf(locNo) - 92342389;
case 7: return Integer.valueOf(locNo) - 69809219;
default:
System.out.println(chromNo + " " + locNo);
return -1;
}
}
}

View File

@ -1,232 +0,0 @@
package org.broadinstitute.sting.playground.gatk.walkers.poolseq;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.ListUtils;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.By;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.playground.utils.PoolUtils;
import org.broadinstitute.sting.playground.utils.ReadOffsetQuad;
import net.sf.samtools.SAMRecord;
import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: Aug 18, 2009
* Time: 11:57:07 AM
* To change this template use File | Settings | File Templates.
*/
@By(DataSource.REFERENCE)
public class CoverageAndPowerWalker extends LocusWalker<Pair<Integer, Integer>, Pair<Long, Long>> {
@Argument(fullName="suppressLocusPrinting",doc="Suppress printing",required=false)
public boolean suppress_printing = false;
@Argument(fullName="poolSize", shortName="ps", doc="Number of individuals in pool", required=true)
public int num_individuals = 0;
@Argument(fullName="bootStrap", shortName="bs", doc="Use a bootstrap method", required=false)
public boolean useBootstrap = false;
@Argument(fullName="lodThreshold", shortName="lt", doc="Threshold for LOD score for calls", required=false)
public double lodThreshold = 3.0;
@Argument(fullName="minQScore", shortName="qm", doc="Threshold for the minimum quality score, filter out bases below",required=false)
public byte minQScore = 0;
@Argument(fullName="outputUnthresholdedCoverage", shortName = "uc", doc="Print out the total coverage as well as the coverage above the quality threshold", required = false)
public boolean outputUnthreshCvg = false;
private static final int BOOTSTRAP_ITERATIONS = 300;
public void initialize() {
if(num_individuals <= 0)
throw new IllegalArgumentException("Positive nonzero parameter expected for poolSize");
}
public Pair<Long,Long> reduceInit() {
if ( ! suppress_printing ) { // print header
out.printf("%s%n",createHeaderString());
}
return new Pair(0l,0l);
}
public Pair<Long,Long> reduce(Pair<Integer,Integer> newCvg, Pair<Long,Long> prevCvg) {
return new Pair(prevCvg.first + newCvg.first, prevCvg.second + newCvg.second);
}
public Pair<Integer,Integer> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
AlignmentContext filteredContext;
if (this.getMinQualityScore() <= 0) {
filteredContext = context;
} else {
Pair<List<SAMRecord>,List<Integer>> readsFilteredByQuality = filterByQuality(context.getReads(),context.getOffsets(), this.getMinQualityScore());
filteredContext = new AlignmentContext(context.getLocation(),readsFilteredByQuality.getFirst(),readsFilteredByQuality.getSecond());
}
ReadOffsetQuad splitReads = PoolUtils.splitReadsByReadDirection(filteredContext.getReads(), filteredContext.getOffsets());
Pair<Pair<List<SAMRecord>,List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> readsByDirection = new Pair(new Pair(splitReads.getFirstReads(),splitReads.getSecondReads()),new Pair(splitReads.getFirstOffsets(),splitReads.getSecondOffsets()));
if ( ! suppress_printing && ! outputUnthreshCvg ) {
Pair<double[],byte[]> powers = calculatePower(readsByDirection, useBootstrap, filteredContext);
out.printf("%s %d %d %d %d %d %d %f %f %f%n", filteredContext.getLocation(), readsByDirection.getFirst().getFirst().size(), readsByDirection.getFirst().getSecond().size(),
filteredContext.getReads().size(), powers.getSecond()[0], powers.getSecond()[1], powers.getSecond()[2],
powers.getFirst()[0], powers.getFirst()[1], powers.getFirst()[2]);
} else if (! suppress_printing && outputUnthreshCvg) {
Pair<double[],byte[]> powers = calculatePower(readsByDirection, useBootstrap, filteredContext);
ReadOffsetQuad readsByDir = PoolUtils.splitReadsByReadDirection(context.getReads(),context.getOffsets());
Pair<Pair<List<SAMRecord>,List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> ufReadsByDirection = new Pair(new Pair(readsByDir.getFirstReads(), readsByDir.getSecondReads()), new Pair(readsByDir.getFirstOffsets(), readsByDir.getSecondOffsets()));
out.printf("%s %d %d %d %d %d %d %d %d %d %f %f %f%n", filteredContext.getLocation(), ufReadsByDirection.getFirst().getFirst().size(),
ufReadsByDirection.getFirst().getSecond().size(), context.getReads().size(),
readsByDirection.getFirst().getFirst().size(), readsByDirection.getFirst().getSecond().size(),
filteredContext.getReads().size(), powers.getSecond()[0], powers.getSecond()[1], powers.getSecond()[2],
powers.getFirst()[0], powers.getFirst()[1], powers.getFirst()[2]);
}
return new Pair(readsByDirection.getFirst().getFirst().size(),readsByDirection.getFirst().getSecond().size());
}
// helper methods
public Pair<double[],byte[]> calculatePower(Pair<Pair<List<SAMRecord>,List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> dirReads, boolean bootStrap, AlignmentContext context) {
Pair<double[],byte[]> powerAndMedianQ;
if( bootStrap ) {
powerAndMedianQ = calculateDirectionalPowersByBootstrap(dirReads,lodThreshold,context);
} else {
powerAndMedianQ = calculateDirectionalPowersTheoretical(dirReads,lodThreshold,context);
}
return powerAndMedianQ;
}
public Pair<double[],byte[]> calculateDirectionalPowersTheoretical(Pair<Pair<List<SAMRecord>,List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> dirReads, double thresh, AlignmentContext context) {
byte[] medQs = getMedianQScores(dirReads, context);
double powerForward = calculatePowerTheoretical(dirReads.getFirst().getFirst(),dirReads.getSecond().getFirst(),medQs[0],thresh);
double powerReverse = calculatePowerTheoretical(dirReads.getFirst().getSecond(), dirReads.getSecond().getSecond(),medQs[1],thresh);
double powerCombined = calculatePowerTheoretical(context.getReads(),context.getOffsets(),medQs[2],thresh);
double [] powers = {powerForward,powerReverse,powerCombined};
return new Pair(powers, medQs);
}
public byte[] getMedianQScores(Pair<Pair<List<SAMRecord>,List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> dirReads, AlignmentContext context) {
byte medQForward = getMedianQualityScore(dirReads.getFirst().getFirst(),dirReads.getSecond().getFirst());
byte medQReverse = getMedianQualityScore(dirReads.getFirst().getSecond(), dirReads.getSecond().getSecond());
byte medQCombined = getMedianQualityScore(context.getReads(),context.getOffsets());
byte [] medQs = {medQForward, medQReverse, medQCombined};
return medQs;
}
public byte getMedianQualityScore(List<SAMRecord> reads, List<Integer> offsets) {
return ListUtils.getQScoreMedian(reads,offsets);
}
public double calculatePowerTheoretical(List<SAMRecord> reads, List<Integer> offsets, byte medQ, double thresh) {
double p_error = QualityUtils.qualToErrorProb(medQ);
int depth = reads.size();
if(depth <= 0) {
return 0;
}
double snpProp = this.getSNPProportion(1);
// variable names from here on out come from the mathematics of a log likelihood test
// with binomial probabilities, of observing k bases consistent with the same SNP
// given that SNP occurs in one of the alleles, versus it does not occur.
// Numerator and denominator will each have a term raised to the kth power
// and a term raised to the (depth - kth) (or d-kth) power. Thus the names.
// Taking a log then brings those powers out as coefficients, where they can be solved for.
double kterm_num = Math.log10( snpProp * (1 - p_error) + (1 - snpProp) * (p_error/3) );
double kterm_denom = Math.log10( p_error/3 );
double dkterm_num = Math.log10( snpProp * (p_error/3) + (1 - snpProp) * (1 - p_error) );
double dkterm_denom = Math.log10( 1 - p_error);
int kaccept = (int) Math.ceil( ( thresh - ( (double) depth ) * ( dkterm_num - dkterm_denom ) )/( kterm_num - kterm_denom- dkterm_num + dkterm_denom ) );
// we will reject the null hypothesis if we see kaccept or more SNPs, the power is the probability that this occurs
// we can optimize this by checking to see which sum is smaller
double pow = 0;
if ( depth - kaccept < kaccept ) {// kaccept > depth/2 - calculate power as P[hits between kaccept and depth]
return MathUtils.cumBinomialProbLog(kaccept, depth, depth, snpProp);
} else { // kaccept < depth/2 - calculate power as 1-P[hits between 0 and kaccept]
return 1-MathUtils.cumBinomialProbLog(0,kaccept,depth,snpProp);
}
}
public double getSNPProportion(int numSNPAlleles) {
return ((double) numSNPAlleles / (2.0 * num_individuals));
}
public Pair<double[],byte[]> calculateDirectionalPowersByBootstrap(Pair<Pair<List<SAMRecord>,List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> dirReads, double thresh, AlignmentContext context) {
double powerForward = bootstrapPower(dirReads.getFirst().getFirst(),dirReads.getSecond().getFirst(),thresh);
double powerReverse = bootstrapPower(dirReads.getFirst().getSecond(),dirReads.getSecond().getSecond(),thresh);
double powerCombined = bootstrapPower(context.getReads(),context.getOffsets(),thresh);
double[] powers = {powerForward, powerReverse, powerCombined};
return new Pair(powers,getMedianQScores(dirReads,context));
}
public double bootstrapPower(List<SAMRecord> reads, List<Integer> offsets, double thresh) {
double power = 0.0;
for ( int boot = 0; boot < BOOTSTRAP_ITERATIONS; boot++) {
Pair<Pair<List<SAMRecord>,List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> snpReadsAndRefReads = coinTossPartition(reads,offsets,this.getSNPProportion(1));
if( PoolUtils.calculateLogLikelihoodOfSample(snpReadsAndRefReads, num_individuals) > thresh) {
power += 1.0/BOOTSTRAP_ITERATIONS;
}
}
return power;
}
public String createHeaderString() {
String headString;
if(! outputUnthreshCvg ) {
headString = "Chrom:Pos ForwardCoverage ReverseCoverage CombinedCoverage ForwardMedianQ ReverseMedianQ CombinedMedianQ PowerForward PowerReverse PowerCombined";
} else {
headString = "Chrom:Pos ForwardCoverage ReverseCoverage CombinedCoverage ForwardCoverageAboveThreshold ReverseCoverageAboveThreshold CombinedCoverageAboveThreshold ForwardMedianQ ReverseMedianQ CombinedMedianQ PowerForward PowerReverse PowerCombined";
}
return headString;
}
public byte getMinQualityScore() {
return minQScore;
}
public Pair<List<SAMRecord>,List<Integer>> filterByQuality(List<SAMRecord> reads, List<Integer> offsets, byte qMin) {
return PoolUtils.thresholdReadsByQuality(reads,offsets,qMin);
}
// class methods
public static Pair<Pair<List<SAMRecord>,List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> coinTossPartition(List<SAMRecord> reads, List<Integer> offsets, double snpProb) {
Pair<Pair<List<SAMRecord>,List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> partitionedReads;
if( reads == null || offsets == null ) {
partitionedReads = null;
} else {
ArrayList<SAMRecord> snpReads = new ArrayList();
ArrayList<SAMRecord> refReads = new ArrayList();
ArrayList<Integer> snpOff = new ArrayList();
ArrayList<Integer> refOff = new ArrayList();
for(int readNo = 0; readNo < reads.size(); readNo++) {
if ( Math.random() < snpProb) {
snpReads.add(reads.get(readNo));
snpOff.add(offsets.get(readNo));
} else {
refReads.add(reads.get(readNo));
refOff.add(offsets.get(readNo));
}
}
partitionedReads= new Pair(new Pair(snpReads,refReads), new Pair(snpOff, refOff));
}
return partitionedReads;
}
}

View File

@ -1,262 +0,0 @@
package org.broadinstitute.sting.playground.gatk.walkers.poolseq;
import org.broadinstitute.sting.playground.utils.SQuad;
import org.broadinstitute.sting.playground.utils.ReadOffsetQuad;
import org.broadinstitute.sting.playground.utils.PoolUtils;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.By;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.*;
import net.sf.samtools.SAMRecord;
import java.io.PrintStream;
import java.io.FileNotFoundException;
import java.util.List;
/**
* Created by IntelliJ IDEA.
* User: Ghost
* Date: Sep 12, 2009
* Time: 12:20:42 PM
* To change this template use File | Settings | File Templates.
*/
@By(DataSource.REFERENCE)
public class PowerAndCoverageWalker extends LocusWalker<SQuad<Integer>, SQuad<Long>> {
@Argument(fullName="suppressLocusPrinting", doc="Walker does not output locus data to any file. Only total coverage information is given at the end", required=false)
public boolean suppressPrinting = false;
@Argument(fullName="outputFile", shortName="of", doc="Locus information is printed to this file instead of standard out", required = false)
public String outputFile = null;
@Argument(fullName="bootStrapIterations", shortName="bs", doc="Use a bootstrap method with this many iterations", required=false)
public int bootstrapIterations = 0;
@Argument(fullName="lodThreshold", shortName="lod", doc="Threshold for log likelihood ratio to be called a SNP. Defaults to 3.0", required = false)
public double lodThresh = 3.0;
@Argument(fullName="minimumQScore", shortName="qm", doc="Use bases whose phred (Q) score meets or exceeds this number. Defaults to 0", required = false)
public byte minQ = 0;
@Argument(fullName="aboveQScoreOutputOnly", doc="Output only the information for reads that exceed the q-score threshold", required=false)
public boolean aboveQScoreOutputOnly = false;
@Argument(fullName="outputRawStatistics", shortName="rs", doc="Walker outputs the median quality score and power for the un-thresholded reads as well as those that are thresholded", required=false)
public boolean outputRawStatistics = false;
@Argument(fullName="poolSize", shortName="ps", doc="Number of individuals in the pool", required = true)
public int numIndividuals = 0;
protected PrintStream outputWriter = null;
public void initialize() {
if(numIndividuals <= 0) {
throw new StingException("Pool size must be greater than 1. You input "+numIndividuals);
}
if( outputFile != null ) {
try {
outputWriter = new PrintStream(outputFile);
} catch (FileNotFoundException e) {
String errMsg = "The filepath input as an argument to -of, "+outputFile+" was a directory or could not be found or created.";
throw new StingException(errMsg, e);
}
outputWriter.printf("%s%n", generateHeaderString());
} else if( ! suppressPrinting ) {
out.printf("%s%n", generateHeaderString());
}
}
public SQuad<Long> reduceInit() {
return new SQuad<Long>(0l,0l,0l,0l);
}
/* REDUCE
* Add up filtered and unfiltered coverage in the forward
* and reverse directions.
*/
public SQuad<Long> reduce( SQuad<Integer> newCvg, SQuad<Long> oldCvg) {
return new SQuad<Long>(newCvg.getFirst() + oldCvg.getFirst(), newCvg.getSecond() + oldCvg.getSecond(),
newCvg.getThird() + oldCvg.getThird(), newCvg.getFourth() + oldCvg.getFourth());
}
/* MAP
* Get the coverage in each direction above and below threshold and calculate the estimated power
* if printing is not suppressed print this information to the file
* pass coverage information on to Reduce
*/
public SQuad<Integer> map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
ReadOffsetQuad readsByDirection = splitReadsByDirection(context.getReads(),context.getOffsets());
ReadOffsetQuad readsByDirectionAboveThresh;
if(minQ > 0) {
readsByDirectionAboveThresh = thresholdReadsByQuality(readsByDirection);
} else {
readsByDirectionAboveThresh = readsByDirection;
}
SQuad<Byte> medianQScoresAboveThresh = getMedianQScores(readsByDirectionAboveThresh);
SQuad<Double> powerAboveThresh = calculatePower(readsByDirectionAboveThresh, medianQScoresAboveThresh);
SQuad<Byte> medianQScoresUnthresh;
SQuad<Double> powerUnthresh;
if(! outputRawStatistics) {
medianQScoresUnthresh = null;
powerUnthresh = null;
} else {
medianQScoresUnthresh = getMedianQScores(readsByDirection);
powerUnthresh= calculatePower(readsByDirection, medianQScoresUnthresh);
}
if ( ! suppressPrinting && outputWriter == null) {
out.printf("%s%n",outputString(readsByDirection,readsByDirectionAboveThresh,powerAboveThresh, powerUnthresh,
medianQScoresAboveThresh, medianQScoresUnthresh, context.getLocation()));
} else if (! suppressPrinting && outputWriter != null) {
outputWriter.printf("%s%n",outputString(readsByDirection,readsByDirectionAboveThresh,powerAboveThresh, powerUnthresh,
medianQScoresAboveThresh, medianQScoresUnthresh, context.getLocation()));
}
return new SQuad<Integer>(readsByDirection.numReadsFirst(), readsByDirection.numReadsSecond(),
readsByDirectionAboveThresh.numReadsFirst(), readsByDirectionAboveThresh.numReadsSecond());
}
// private helper methods
/*
* Make different headers for the output file depending on printing-related command line arguments
*/
private String generateHeaderString() {
String header;
if (minQ <= 0) {
header = "Chrom:Pos CoverageFwd CoverageRev CoverageCom MedianQFwd MedianQRev MedianQCom PowerFwd PowerRev PowerCom";
} else if (! aboveQScoreOutputOnly && ! outputRawStatistics){
String m = Byte.toString(minQ);
header = "Chrom:Pos CoverageFwd CoverageRev CoverageCom CoverageAboveQ"+m+"Fwd CoverageAboveQ"+m+"Rev CoverageAboveQ"+m+"Com MedianQFwd MedianQRev MedianQCom PowerFwd PowerRev PowerCom";
} else if (! outputRawStatistics){
header = "Chrom:Pos CoverageFwd CoverageRev CoverageCom MedianQFwd MedianQRev MedianQCom PowerFwd PowerRev PowerCom";
} else {
String m = Byte.toString(minQ);
header = "Chrom:Pos CoverageFwd CoverageRev CoverageCom CoverageAboveQ"+m+"Fwd CoverageAboveQ"+m+"Rev "+
"CoverageAboveQ"+m+"Com RawMedianQFwd RawMedianQRev RawMedianQCom MedianQAboveQ"+m+"Fwd "+
"MedianQAboveQ"+m+"Rev MedianQAboveQ"+m+"Com RawPowerFwd RawPowerRev RawPowerCom "+
"PowerAboveQ"+m+"Fwd PowerAboveQ"+m+"Rev PowerAboveQ"+m+"Com";
}
return header;
}
/*
* Make different output strings depending on printing-related command-line options
*/
private String outputString(ReadOffsetQuad reads, ReadOffsetQuad threshReads, SQuad<Double> pow, SQuad<Double> rawPow, SQuad<Byte> medQ, SQuad<Byte> rawQ, GenomeLoc loc) {
String outputString;
if(minQ <= 0) {
outputString = String.format("%s %d %d %d %d %d %d %f %f %f", loc.toString(), reads.numReadsFirst(), reads.numReadsSecond(), reads.numReads(),
medQ.getFirst(), medQ.getSecond(), medQ.getThird(), pow.getFirst(), pow.getSecond(), pow.getThird());
} else if (! aboveQScoreOutputOnly && ! outputRawStatistics ){
outputString = String.format("%s %d %d %d %d %d %d %d %d %d %f %f %f", loc.toString(), reads.numReadsFirst(), reads.numReadsSecond(), reads.numReads(),
threshReads.numReadsFirst(), threshReads.numReadsSecond(), threshReads.numReads(), medQ.getFirst(),
medQ.getSecond(), medQ.getThird(), pow.getFirst(), pow.getSecond(), pow.getThird());
} else if (! outputRawStatistics ){
outputString = String.format("%s %d %d %d %d %d %d %f %f %f", loc.toString(), threshReads.numReadsFirst(), threshReads.numReadsSecond(),
threshReads.numReads(), medQ.getFirst(), medQ.getSecond(), medQ.getThird(), pow.getFirst(),
pow.getSecond(), pow.getThird());
} else {
outputString = String.format("%s %d %d %d %d %d %d %d %d %d %d %d %d %f %f %f %f %f %f", loc.toString(), reads.numReadsFirst(),
reads.numReadsSecond(), reads.numReads(), threshReads.numReadsFirst(), threshReads.numReadsSecond(),
threshReads.numReads(), rawQ.getFirst(), rawQ.getSecond(), rawQ.getThird(), medQ.getFirst(),
medQ.getSecond(), medQ.getThird(), rawPow.getFirst(), rawPow.getSecond(), rawPow.getThird(),
pow.getFirst(), pow.getSecond(), pow.getThird());
}
return outputString;
}
private ReadOffsetQuad splitReadsByDirection(List<SAMRecord> reads, List<Integer> offsets) {
return PoolUtils.splitReadsByReadDirection(reads,offsets);
}
private ReadOffsetQuad thresholdReadsByQuality(ReadOffsetQuad readQuad) {
return new ReadOffsetQuad(PoolUtils.thresholdReadsByQuality(readQuad.getFirstReadOffsetPair(), minQ),
PoolUtils.thresholdReadsByQuality(readQuad.getSecondReadOffsetPair(), minQ));
}
private SQuad<Byte> getMedianQScores(ReadOffsetQuad reads) {
return new SQuad<Byte>(ListUtils.getQScoreMedian(reads.getFirstReads(), reads.getFirstOffsets()),
ListUtils.getQScoreMedian(reads.getSecondReads(), reads.getSecondOffsets()),
ListUtils.getQScoreMedian(reads.getReadsCombined(), reads.getOffsetsCombined()),
(byte) 0);
}
private SQuad<Double> calculatePower(ReadOffsetQuad readQuad, SQuad<Byte> medianQs) {
SQuad<Double> power;
if( bootstrapIterations > 0 ) {
power = bootstrapPowerByDirection(readQuad);
} else {
power = theoreticalPowerByDirection(readQuad,medianQs);
}
return power;
}
private SQuad<Double> theoreticalPowerByDirection(ReadOffsetQuad readQuad, SQuad<Byte> medianQs) {
return new SQuad<Double>( theoreticalPower(readQuad.numReadsFirst(), medianQs.getFirst()),
theoreticalPower(readQuad.numReadsSecond(), medianQs.getSecond()),
theoreticalPower(readQuad.numReads(), medianQs.getThird()),
0.0 );
}
private SQuad<Double> bootstrapPowerByDirection(ReadOffsetQuad readQuad) {
return new SQuad<Double> ( bootstrapPower(readQuad.getFirstReads(), readQuad.getFirstOffsets()),
bootstrapPower(readQuad.getSecondReads(), readQuad.getSecondOffsets()),
bootstrapPower(readQuad.getReadsCombined(), readQuad.getOffsetsCombined()),
0.0 );
}
private double theoreticalPower(int depth, byte q) {
double power;
if( depth <= 0 ) {
power = 0.0;
} else {
double p_error = QualityUtils.qualToErrorProb(q);
double snpProp = getSNPProportion();
double kterm_num = Math.log10( snpProp * (1 - p_error) + (1 - snpProp) * (p_error/3) );
double kterm_denom = Math.log10( p_error/3 );
double dkterm_num = Math.log10( snpProp * (p_error/3) + (1 - snpProp) * (1 - p_error) );
double dkterm_denom = Math.log10( 1 - p_error);
int kaccept = (int) Math.ceil( ( lodThresh - ( (double) depth ) * ( dkterm_num - dkterm_denom ) ) /
( kterm_num - kterm_denom- dkterm_num + dkterm_denom ) );
// we will reject the null hypothesis if we see kaccept or more SNPs, the power is the probability that this occurs
// we can optimize this by checking to see which sum is smaller
if ( depth - kaccept < kaccept ) {// kaccept > depth/2 - calculate power as P[hits between kaccept and depth]
power = MathUtils.cumBinomialProbLog(kaccept, depth, depth, snpProp);
} else { // kaccept < depth/2 - calculate power as 1-P[hits between 0 and kaccept]
power = 1-MathUtils.cumBinomialProbLog(0,kaccept,depth,snpProp);
}
}
return power;
}
private double bootstrapPower(List<SAMRecord> reads, List<Integer> offsets) {
double power = 0.0;
double snpProp = getSNPProportion();
for ( int boot = 0; boot < bootstrapIterations; boot ++) {
ReadOffsetQuad partition = PoolUtils.coinTossPartition(reads, offsets, getSNPProportion());
if ( PoolUtils.calculateLogLikelihoodOfSample(partition, snpProp) >= lodThresh ) {
power += 1.0/bootstrapIterations;
}
}
return power;
}
private double getSNPProportion() {
return 1/(2.0*numIndividuals);
}
}

View File

@ -44,6 +44,12 @@ public class PowerBelowFrequencyWalker extends LocusWalker<Integer,Integer> {
@Argument(fullName="minimumMappingQuality", shortName="mmq", doc="Only use reads above this mapping quality in the power calculation", required=false)
int minMappingQuality = -1;
@Argument(fullName="ignoreForwardReads",doc="Use the forward reads at a site. Defaults to true.", required = false)
boolean ignoreForwardReads = true;
@Argument(fullName="ignoreReverseReads",doc="Use the reverse reads at a site. Defaults to true.", required = false)
boolean ignoreReverseReads = true;
public void initialize() {
if ( alleleFreq < 1 ) {
String err = "Allele frequency (-af) must be greater than or equal to one.";
@ -64,6 +70,16 @@ public class PowerBelowFrequencyWalker extends LocusWalker<Integer,Integer> {
String output = String.format("%s", context.getLocation().toString());
// threshold reads if necessary
if ( ignoreForwardReads && ignoreReverseReads) {
throw new StingException("User has elected to ignore both forward and reverse reads. Power is zero.");
}
else if ( ! ignoreForwardReads && ignoreReverseReads ) {
org.broadinstitute.sting.playground.gatk.walkers.poolseq.ReadOffsetQuad rq = PoolUtils.splitReadsByReadDirection(context.getReads(),context.getOffsets());
context = new AlignmentContext(context.getLocation(),rq.getFirstReads(),rq.getFirstOffsets());
} else if ( ignoreForwardReads && ! ignoreReverseReads ) {
org.broadinstitute.sting.playground.gatk.walkers.poolseq.ReadOffsetQuad rq = PoolUtils.splitReadsByReadDirection(context.getReads(),context.getOffsets());
context = new AlignmentContext(context.getLocation(),rq.getSecondReads(),rq.getSecondOffsets());
}
if ( minQ > 0 ) {
Pair<List<SAMRecord>, List<Integer>> thresh = PoolUtils.thresholdReadsByQuality(context.getReads(),context.getOffsets(),minQ);

View File

@ -1,4 +1,4 @@
package org.broadinstitute.sting.playground.utils;
package org.broadinstitute.sting.playground.gatk.walkers.poolseq;
import org.broadinstitute.sting.utils.Pair;

View File

@ -1,4 +1,4 @@
package org.broadinstitute.sting.playground.utils;
package org.broadinstitute.sting.playground.gatk.walkers.poolseq;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.GenomeLoc;

View File

@ -1,4 +1,4 @@
package org.broadinstitute.sting.playground.utils;
package org.broadinstitute.sting.playground.gatk.walkers.poolseq;
/**
* Created by IntelliJ IDEA.

View File

@ -8,6 +8,7 @@ import java.util.ArrayList;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.playground.gatk.walkers.poolseq.ReadOffsetQuad;
/**
* Created by IntelliJ IDEA.
@ -21,39 +22,6 @@ public class PoolUtils {
private PoolUtils() {
}
public static final int BASE_A_OFFSET = 0;
public static final int BASE_C_OFFSET = 1;
public static final int BASE_G_OFFSET = 2;
public static final int BASE_T_OFFSET = 3;
public static final int BASE_INDEXED_ARRAY_SIZE = 4;
public static Pair<List<SAMRecord>, List<Integer>> splitReadsByIndels( List<SAMRecord> reads, List<Integer> offsets, boolean returnBases ) {
List<SAMRecord> baseReads = new ArrayList<SAMRecord>();
List<SAMRecord> indelReads = new ArrayList<SAMRecord>();
List<Integer> baseOffsets = new ArrayList<Integer>();
List<Integer> indelOffsets = new ArrayList<Integer>();
for ( int r = 0; r < reads.size(); r ++ ) {
SAMRecord read = reads.get(r);
int offset = offsets.get(r);
if (BaseUtils.isRegularBase( (char) read.getReadBases()[offset] ) ) {
baseReads.add(read);
baseOffsets.add(offset);
} else {
indelReads.add(read);
indelOffsets.add(offset);
}
}
if (returnBases) {
return new Pair<List<SAMRecord>,List<Integer>>(baseReads,baseOffsets);
} else {
return new Pair<List<SAMRecord>,List<Integer>>(indelReads,indelOffsets);
}
}
public static ReadOffsetQuad splitReadsByReadDirection(List<SAMRecord> reads, List<Integer> offsets) {
ArrayList<SAMRecord> forwardReads;
ArrayList<SAMRecord> reverseReads;
@ -66,10 +34,10 @@ public class PoolUtils {
forwardOffsets = null;
reverseOffsets = null;
} else {
forwardReads = new ArrayList();
reverseReads = new ArrayList();
forwardOffsets = new ArrayList();
reverseOffsets = new ArrayList();
forwardReads = new ArrayList<SAMRecord>();
reverseReads = new ArrayList<SAMRecord>();
forwardOffsets = new ArrayList<Integer>();
reverseOffsets = new ArrayList<Integer>();
for (int readNo = 0; readNo < reads.size(); readNo++) {
if (reads.get(readNo).getReadNegativeStrandFlag()) {
@ -85,44 +53,6 @@ public class PoolUtils {
return new ReadOffsetQuad(forwardReads,forwardOffsets,reverseReads,reverseOffsets);
}
public static Pair<List<SAMRecord>[], List<Integer>[]> splitReadsByBase(List<SAMRecord> reads, List<Integer> offsets) {
ArrayList<SAMRecord>[] readsByBase;
ArrayList<Integer>[] offsetsByBase;
if (reads == null) {
readsByBase = null;
offsetsByBase = null;
} else {
readsByBase = new ArrayList[4];
offsetsByBase = new ArrayList[4];
for (int readNum = 0; readNum < reads.size(); readNum++) {
switch (reads.get(readNum).getReadBases()[offsets.get(readNum)]) {
case 'A':
case 'a':
readsByBase[BASE_A_OFFSET].add(reads.get(readNum));
offsetsByBase[BASE_A_OFFSET].add(offsets.get(readNum));
break;
case 'C':
case 'c':
readsByBase[BASE_C_OFFSET].add(reads.get(readNum));
offsetsByBase[BASE_C_OFFSET].add(offsets.get(readNum));
break;
case 'G':
case 'g':
readsByBase[BASE_G_OFFSET].add(reads.get(readNum));
offsetsByBase[BASE_G_OFFSET].add(offsets.get(readNum));
break;
case 'T':
case 't':
readsByBase[BASE_T_OFFSET].add(reads.get(readNum));
offsetsByBase[BASE_T_OFFSET].add(offsets.get(readNum));
break;
default:
break; // TODO: INDEL AWARENESS
}
}
}
return new Pair(readsByBase, offsetsByBase);
}
public static Pair<List<SAMRecord>, List<Integer>> thresholdReadsByQuality(List<SAMRecord> reads, List<Integer> offsets, byte qThresh) {
List<SAMRecord> threshReads;
@ -134,8 +64,8 @@ public class PoolUtils {
threshReads = reads;
threshOffsets = offsets;
} else {
threshReads = new ArrayList();
threshOffsets = new ArrayList();
threshReads = new ArrayList<SAMRecord>();
threshOffsets = new ArrayList<Integer>();
for (int readNo = 0; readNo < reads.size(); readNo++) {
if (reads.get(readNo).getBaseQualities()[offsets.get(readNo)] >= qThresh) {
@ -172,109 +102,5 @@ public class PoolUtils {
return new Pair<List<SAMRecord>,List<Integer>>(goodMapReads,goodMapOffsets);
}
public static Pair<List<SAMRecord>,List<Integer>> thresholdReadsByQuality(Pair<List<SAMRecord>,List<Integer>> readPair, byte qThresh) {
return thresholdReadsByQuality(readPair.getFirst(),readPair.getSecond(),qThresh);
}
public static int getBaseOffset(char base) {
switch (base) {
case 'A':
case 'a':
return getBaseAOffset();
case 'C':
case 'c':
return getBaseCOffset();
case 'G':
case 'g':
return getBaseGOffset();
case 'T':
case 't':
return getBaseTOffset();
default:
return -1;
}
//TODO: indel offsets
}
public static int getBaseAOffset() {
return BASE_A_OFFSET;
}
public static int getBaseCOffset() {
return BASE_C_OFFSET;
}
public static int getBaseGOffset() {
return BASE_G_OFFSET;
}
public static int getBaseTOffset() {
return BASE_T_OFFSET;
}
public static List<Byte> getReadBaseQualities(List<SAMRecord> reads, List<Integer> offsets) {
List<Byte> qualities = new ArrayList<Byte>(reads.size());
for (int readNo = 0; readNo < reads.size(); readNo++) {
qualities.add(reads.get(readNo).getBaseQualities()[offsets.get(readNo)]);
}
return qualities;
}
public static double calculateLogLikelihoodOfSample(ReadOffsetQuad readsBySupport, double alleleFreq) {
List<Byte> qSNP = getReadBaseQualities(readsBySupport.getFirstReads(), readsBySupport.getFirstOffsets());
List<Byte> qRef = getReadBaseQualities(readsBySupport.getSecondReads(), readsBySupport.getSecondOffsets());
Pair<Double,Double> logsumSNP = qListToSumLogProbabilities(true, qSNP, 1/alleleFreq);
Pair<Double,Double> logsumRef = qListToSumLogProbabilities(false, qRef, 1/alleleFreq);
return 0.0 - logsumSNP.getFirst() - logsumRef.getFirst() + logsumSNP.getSecond() + logsumRef.getSecond();
}
public static double calculateLogLikelihoodOfSample(Pair<Pair<List<SAMRecord>,List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> snpReadsRefReads, int nIndivids) {
List<Byte> qListSnps = getReadBaseQualities(snpReadsRefReads.getFirst().getFirst(),snpReadsRefReads.getSecond().getFirst());
List<Byte> qListRefs = getReadBaseQualities(snpReadsRefReads.getFirst().getSecond(),snpReadsRefReads.getSecond().getSecond());
Pair<Double,Double> logsumSNP = qListToSumLogProbabilities(true,qListSnps, 2.0*nIndivids);
Pair<Double,Double> logsumRef = qListToSumLogProbabilities(false,qListRefs, 2.0*nIndivids);
return 0.0 - logsumSNP.first - logsumRef.first + logsumSNP.second + logsumRef.second;
}
public static Pair<Double,Double> qListToSumLogProbabilities(boolean listRepresentsSNPObservations, List<Byte> qList, double denom)
{
double logProbObserveXAndSNPTrue = 0; // note "error" for SNP is observing a ref
double logProbObserveXAndRefTrue = 0;// and "error" for ref is observing a SNP
for (byte qual : qList) {
double p_err = QualityUtils.qualToErrorProb(qual);
if (listRepresentsSNPObservations) {
logProbObserveXAndSNPTrue += Math.log10((1 - p_err) / denom +((denom - 1)*p_err) / denom);
logProbObserveXAndRefTrue += Math.log10(p_err);
} else {
logProbObserveXAndSNPTrue += Math.log10((denom - 1) * (1 - p_err)/denom + p_err/denom);
logProbObserveXAndRefTrue+= Math.log10(1 -p_err);
}
}
return new Pair<Double,Double>(logProbObserveXAndSNPTrue,logProbObserveXAndRefTrue);
}
public static ReadOffsetQuad coinTossPartition(List<SAMRecord> reads, List<Integer> offsets, double alleleFreq) {
ArrayList<SAMRecord> snpReads = new ArrayList<SAMRecord>();
ArrayList<Integer> snpOffsets = new ArrayList<Integer>();
ArrayList<SAMRecord> refReads = new ArrayList<SAMRecord>();
ArrayList<Integer> refOffsets = new ArrayList<Integer>();
for( int i = 0; i < reads.size(); i ++ ) {
if ( Math.random() < alleleFreq ) {
snpReads.add(reads.get(i));
snpOffsets.add(offsets.get(i));
} else {
refReads.add(reads.get(i));
refOffsets.add(offsets.get(i));
}
}
return new ReadOffsetQuad(snpReads,snpOffsets,refReads,refOffsets);
}
}

View File

@ -33,6 +33,8 @@ public class ReadBackedPileup extends BasicPileup {
this.offsets = offsets;
}
public int size() { return reads.size(); }
public List<SAMRecord> getReads() { return reads; }
public List<Integer> getOffsets() { return offsets; }

View File

@ -13,8 +13,14 @@ import java.util.Arrays;
* To change this template use File | Settings | File Templates.
*/
public class BaseTransitionTableCalculatorJavaIntegrationTest extends WalkerTest{
public static final String OUTPUT_MD5 = "856a3124fb078b4d820f935369ec53cc";
// MD5s last computed 10/20 at revision 1877
public static final String OUTPUT_MD5_STANDARD = "856a3124fb078b4d820f935369ec53cc";
public static final String OUTPUT_MD5_3MISMATCHES = "847375e3d40415d237c627a56d163760";
public static final String OUTPUT_MD5_LOWMAPPINGQUALITY = "d7e490168dfef8eb4185b596f55bfaa8";
public static final String OUTPUT_MD5_LOWQUALITYSCORE = "e3f3c3875936b0df7a8899c2ef5d1dd2";
public static final String OUTPUT_MD5_LOWCONFIDENTREFTHRESHOLD = "521a9fa7716ed22550c2ba3fe3409070";
public static final String OUTPUT_MD5_HIGHCONFIDENTREFTHRESHOLD = "8ab6d389fc494881736e9a58126c2f1b";
public static final String OUTPUT_MD5_ALLARGUMENTS = "f45481946d7a5c70078d432b0baff083";
public static final String LOCUS = "1:10,000,000-10,200,000";
public static final String BAM_FILE = "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam";
public static final String REFERENCE = "/broad/1KG/reference/human_b36_both.fasta";
@ -22,7 +28,49 @@ public class BaseTransitionTableCalculatorJavaIntegrationTest extends WalkerTest
@Test
public void testBaseTransitionTableCalculatorJava() {
String args = "-T BaseTransitionTableCalculatorJava -o %s -I "+BAM_FILE+" -L "+LOCUS+" -R "+REFERENCE;
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(args,1,Arrays.asList(OUTPUT_MD5));
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(args,1,Arrays.asList(OUTPUT_MD5_STANDARD));
executeTest("testBaseTransitionTableCalculatorJava", spec);
}
@Test
public void testBaseTransitionTableCalculatorJavaAdditionalMismatches() {
String args = "-T BaseTransitionTableCalculatorJava -o %s -I "+BAM_FILE+" -L "+LOCUS+" -R "+REFERENCE+" --maxNumMismatches 3";
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(args,1,Arrays.asList(OUTPUT_MD5_3MISMATCHES));
executeTest("BaseTransitionTableCalculatorJava: Additional Mismatches",spec);
}
@Test
public void testBaseTransitionCalculatorJavaLowMapingQuality() {
String args = "-T BaseTransitionTableCalculatorJava -o %s -I "+BAM_FILE+" -L "+LOCUS+" -R "+REFERENCE+ " --minMappingQuality 5";
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(args,1,Arrays.asList(OUTPUT_MD5_LOWMAPPINGQUALITY));
executeTest("BaseTransitionTableCalculatorJava: Low Mapping Quality",spec);
}
@Test
public void testBaseTransitionCalculatorJavaLowQualityScore() {
String args = "-T BaseTransitionTableCalculatorJava -o %s -I "+BAM_FILE+" -L "+LOCUS+" -R "+REFERENCE+ " --minQualityScore 5";
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(args,1,Arrays.asList(OUTPUT_MD5_LOWQUALITYSCORE));
executeTest("BaseTransitionTableCalculatorJava: Low Quality Score",spec);
}
@Test
public void testBaseTransitionCalculatorJavaLowConfidentRefThreshold() {
String args = "-T BaseTransitionTableCalculatorJava -o %s -I "+BAM_FILE+" -L "+LOCUS+" -R "+REFERENCE+ " --confidentRefThreshold 2";
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(args,1,Arrays.asList(OUTPUT_MD5_LOWCONFIDENTREFTHRESHOLD));
executeTest("BaseTransitionTableCalculatorJava: Low Ref Threshold",spec);
}
@Test
public void testBaseTransitionCalculatorJavaHighConfidentRefThreshold() {
String args = "-T BaseTransitionTableCalculatorJava -o %s -I "+BAM_FILE+" -L "+LOCUS+" -R "+REFERENCE+ " --confidentRefThreshold 8";
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(args,1,Arrays.asList(OUTPUT_MD5_HIGHCONFIDENTREFTHRESHOLD));
executeTest("BaseTransitionTableCalculatorJava: Low Ref Threshold",spec);
}
@Test
public void testBaseTransitionCalculatorJavaAllArguments() {
String args = "-T BaseTransitionTableCalculatorJava -o %s -I "+BAM_FILE+" -L "+LOCUS+" -R "+REFERENCE+ " --confidentRefThreshold 2 --minQualityScore 5 --minMappingQuality 5 --maxNumMismatches 3";
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(args,1,Arrays.asList(OUTPUT_MD5_ALLARGUMENTS));
executeTest("BaseTransitionTableCalculatorJava: Low Ref Threshold, Low Q Score, Low Mapping Quality, Additional Mismatches",spec);
}
}

View File

@ -41,8 +41,9 @@ class TransitionTable() {
}
def header(): String = {
return Utils.join("\t", mapEntries((i,j,x) => "P(%c|true=%c)" format (fromKey(j), fromKey(i))).toArray)
}
//return Utils.join("\t", mapEntries((i,j,x) => "P(%c|true=%c)" format (fromKey(j), fromKey(i))).toArray)
return Utils.join("\t", mapEntries((i,j,x) => "[%c|%c]" format (fromKey(j), fromKey(i))).toArray)
}
override def toString(): String = {
return Utils.join("\t", mapEntries((i,j,x) => "%.2f" format x).toArray)
@ -106,7 +107,10 @@ class BaseTransitionTableCalculator extends LocusWalker[Unit,Int] {
nonRefBases.length match {
case 1 =>
addRead(nonRefRead, offsets.head, nonRefBases.head, ref, table1)
//if ( goodRead(nonRefRead,offsets.head) ) {
//printf("Including site:%n nonRefBases=%s%n refBases=%s%n nonRefRead=%s%n offset=%s%n", nonRefBases, refBases, nonRefRead.format, offsets.head)
//}
addRead(nonRefRead, offsets.head, nonRefBases.head, ref, table1)
addRead(nonRefRead, offsets.head, nonRefBases.head, ref, if ( nonRefRead.getReadNegativeStrandFlag ) tableREV else tableFWD )
case 2 =>
addRead(nonRefRead, offsets.head, nonRefBases.head, ref, table2)
@ -128,7 +132,7 @@ class BaseTransitionTableCalculator extends LocusWalker[Unit,Int] {
def addRead(nonRefRead: SAMRecord, offset: Integer, nonRefBase: Char, ref: ReferenceContext, table: TransitionTable): Unit = {
if ( goodRead(nonRefRead, offset) ) {
//printf("Including site:%n call=%s%n nonRefBases=%s%n refBases=%s%n nonRefRead=%s%n offset=%s%n", call, nonRefBases, refBases, nonRefRead.format, offsets.head)
//printf("Including site:%n call=%s%n nonRefBases=%s%n refBases=%s%n nonRefRead=%s%n offset=%s%n", nonRefBases, refBases, nonRefRead.format, offsets.head)
//println(call.getReadDepth, call.getReferencebase, nonRefBases, refBases, nonRefRead.getMappingQuality)
if ( CARE_ABOUT_STRAND && nonRefRead.getReadNegativeStrandFlag() ) {
// it's on the reverse strand
@ -180,7 +184,7 @@ class BaseTransitionTableCalculator extends LocusWalker[Unit,Int] {
if ( call == null )
return false;
return ! call.getFirst().get(0).isVariant() && call.getFirst().get(0).getNegLog10PError() > MIN_LOD
return (! call.getFirst().get(0).isVariant()) && call.getFirst().get(0).getNegLog10PError() > MIN_LOD
}
def reduceInit(): Int = {