Major code refactoring:
@ Pooled utils & power
- Removed two of the power walkers leaving only PowerBelowFrequency, added some additional
flags on PowerBelowFrequency to give it some of the behavior that PowerAndCoverage had
- Removed a number of PoolUtils variables and methods that were used in those walkers or simply
not used
- Removed AnalyzePowerWalker (un-necessary)
- Changed the location of Quad/Squad/ReadOffsetQuad into poolseq
@NQS
- Deleted all walkers but the minimum NQS walker, refactored not to use LocalMapType
@ BaseTransitionTable
- Added a slew of new integration tests for different flaggable and integral parameters
- (Scala) just a System.out that was added and commented out (no actual code change)
- (Java) changed a < to <= and a boolean formula
Chris
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1887 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4be6bb8e92
commit
3b1fabeff0
|
|
@ -201,7 +201,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Referen
|
|||
public int countMismatches(ReadBackedPileup p) {
|
||||
int refM = 0;
|
||||
|
||||
switch (p.getRef()) {
|
||||
switch (Character.toUpperCase(p.getRef())) {
|
||||
case 'A': refM = BasicPileup.countBases(p.getBases()).a;
|
||||
break;
|
||||
case 'C': refM = BasicPileup.countBases(p.getBases()).c;
|
||||
|
|
@ -322,6 +322,7 @@ class BaseTransitionTable {
|
|||
}
|
||||
|
||||
|
||||
|
||||
class ReferenceContextWindow {
|
||||
|
||||
protected int windowSize;
|
||||
|
|
|
|||
|
|
@ -1,34 +0,0 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Oct 9, 2009
|
||||
* Time: 11:39:34 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
class LocalMapType {
|
||||
|
||||
public AlignmentContext context;
|
||||
public ReferenceContext ref;
|
||||
public RefMetaDataTracker tracker;
|
||||
|
||||
public LocalMapType(AlignmentContext context, ReferenceContext ref, RefMetaDataTracker tracker) {
|
||||
this.context = context;
|
||||
this.ref = ref;
|
||||
this.tracker = tracker;
|
||||
}
|
||||
|
||||
public int numReads() {
|
||||
return context.numReads();
|
||||
}
|
||||
|
||||
public int qScore(int read) {
|
||||
return (int) context.getReads().get(read).getBaseQualities()[context.getOffsets().get(read)];
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,142 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RMD;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Oct 20, 2009
|
||||
* Time: 9:41:57 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
@Requires(value={DataSource.REFERENCE}, referenceMetaData = {@RMD(name="dbsnp",type= rodDbSNP.class)})
|
||||
public class MinimumNQSWalker extends LocusWalker<Pair<List<Pair<Integer,Integer>>,List<Pair<Integer,Integer>>>, int[][][]> {
|
||||
private static int MM_OFFSET = 1;
|
||||
private static int MATCH_OFFSET = 0;
|
||||
private static int QSCORE_MAX = 1 + QualityUtils.MAX_REASONABLE_Q_SCORE;
|
||||
@Argument(fullName="windowSize", shortName="ws", doc="Size of the window (in one direction)", required=true)
|
||||
private int winSide = 4;
|
||||
|
||||
public void initialize() {
|
||||
out.printf("%s%n", makeHeader());
|
||||
}
|
||||
|
||||
public int[][][] reduceInit() {
|
||||
int[][][] counts = new int[QSCORE_MAX][QSCORE_MAX][2];
|
||||
for ( int i = 0; i < QSCORE_MAX; i ++ ) {
|
||||
for ( int j = 0; j < QSCORE_MAX; j ++ ) {
|
||||
counts[i][j][1]=0;
|
||||
counts[i][j][0]=0;
|
||||
}
|
||||
}
|
||||
return counts;
|
||||
}
|
||||
|
||||
public int[][][] reduce(Pair<List<Pair<Integer,Integer>>,List<Pair<Integer,Integer>>> map, int[][][] prevReduce) {
|
||||
if ( map != null ) {
|
||||
List<Pair<Integer,Integer>> matchingQualityNQSPairs = map.getFirst();
|
||||
List<Pair<Integer,Integer>> mismatchingQualityNQSPairs = map.getSecond();
|
||||
for ( Pair<Integer,Integer> p : matchingQualityNQSPairs ) {
|
||||
prevReduce[p.getFirst()][p.getSecond()][MATCH_OFFSET] ++;
|
||||
}
|
||||
|
||||
for ( Pair<Integer,Integer> p : mismatchingQualityNQSPairs ) {
|
||||
prevReduce[p.getFirst()][p.getSecond()][MM_OFFSET] ++;
|
||||
}
|
||||
}
|
||||
|
||||
return prevReduce;
|
||||
}
|
||||
|
||||
public Pair<List<Pair<Integer,Integer>>,List<Pair<Integer,Integer>>> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
ArrayList<Pair<Integer,Integer>> matchingQualityNQSPairs = new ArrayList<Pair<Integer,Integer>>();
|
||||
ArrayList<Pair<Integer,Integer>> mismatchingQualityNQSPairs = new ArrayList<Pair<Integer,Integer>>();
|
||||
if ( (Variation) tracker.lookup("dbsnp",null) == null ) {
|
||||
for ( int r = 0; r < context.numReads(); r ++ ) {
|
||||
SAMRecord read = context.getReads().get(r);
|
||||
int offset = context.getOffsets().get(r);
|
||||
int quality = read.getBaseQualities()[offset];
|
||||
int NQS = getNghdQ(offset, read);
|
||||
Pair<Integer,Integer> qualityNQSPair = new Pair<Integer,Integer> (quality,NQS);
|
||||
if ( BaseUtils.basesAreEqual(read.getReadBases()[offset], (byte) ref.getBase()) ) {
|
||||
matchingQualityNQSPairs.add(qualityNQSPair);
|
||||
} else {
|
||||
mismatchingQualityNQSPairs.add(qualityNQSPair);
|
||||
}
|
||||
}
|
||||
|
||||
return new Pair<List<Pair<Integer,Integer>>,List<Pair<Integer,Integer>>>(matchingQualityNQSPairs,mismatchingQualityNQSPairs);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void onTraversalDone( int[][][] reduce ) {
|
||||
for ( int qc = 0; qc < QSCORE_MAX; qc ++ ) {
|
||||
for ( int qn = 0; qn < QSCORE_MAX; qn ++ ) {
|
||||
out.printf("%s%n", formatData(reduce[qc][qn],qc,qn));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public int getNghdQ(int off, SAMRecord read) {
|
||||
// System.out.println("getNghdQ");
|
||||
byte minQ = Byte.MAX_VALUE;
|
||||
byte[] quals = read.getBaseQualities();
|
||||
int rdlnth = read.getReadLength();
|
||||
int start;
|
||||
int end;
|
||||
if ( off - winSide < 0 ) {
|
||||
start = 0;
|
||||
} else {
|
||||
start = off - winSide;
|
||||
}
|
||||
|
||||
if ( off + winSide > rdlnth ) {
|
||||
end = rdlnth;
|
||||
} else {
|
||||
end = off + winSide;
|
||||
}
|
||||
|
||||
for ( int i = start; i < end; i ++ ) {
|
||||
if ( i != off ) {
|
||||
byte q = quals[i];
|
||||
if ( q < minQ ) {
|
||||
minQ = q;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return minQ;
|
||||
}
|
||||
|
||||
private String makeHeader() {
|
||||
return String.format("%s\t%s\t%s\t%s\t%s\t%s","Reported_Q","Min_Nghd_Q","N_observations","Mm_rate","Empirical_Q","Q_diff");
|
||||
}
|
||||
|
||||
private String formatData( int[] mmArray, int qCenter, int qNghd ) {
|
||||
int counts = mmArray[MM_OFFSET]+mmArray[MATCH_OFFSET];
|
||||
double mismatch = ((double)mmArray[MM_OFFSET]/counts);
|
||||
byte qEmp = QualityUtils.probToQual(1-mismatch);
|
||||
return String.format("%d\t%d\t%d\t%f\t%d\t%d", qCenter, qNghd, counts, mismatch, qEmp,qEmp-qCenter);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1,155 +0,0 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.Recalibration.LocalMapType;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Sep 29, 2009
|
||||
* Time: 1:13:25 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class NQSClusteredZScoreWalker extends LocusWalker<LocalMapType, int[][][]> {
|
||||
static final int WIN_SIDE_SIZE = 5;
|
||||
static final int Z_SCORE_MIN = -6;
|
||||
static final int Z_SCORE_MAX = 6;
|
||||
static final int Z_SCORE_MULTIPLIER = 20; // bins are Z_SCORE * (this) rounded to the nearst int
|
||||
static final int MM_OFFSET = 1;
|
||||
static final int MATCH_OFFSET = 0;
|
||||
static final int MAX_Q_SCORE = 2 + QualityUtils.MAX_REASONABLE_Q_SCORE;
|
||||
|
||||
protected int WINDOW;
|
||||
protected int Z_SCORE_RANGE;
|
||||
|
||||
public void initialize() {
|
||||
WINDOW = 2*WIN_SIDE_SIZE+1;
|
||||
Z_SCORE_RANGE = Z_SCORE_MAX - Z_SCORE_MIN;
|
||||
}
|
||||
|
||||
public int[][][] reduceInit() {
|
||||
int[][][] q = new int[Z_SCORE_RANGE*Z_SCORE_MULTIPLIER+1][MAX_Q_SCORE][2];
|
||||
for ( int i = 0; i < Z_SCORE_RANGE*Z_SCORE_MULTIPLIER+1; i ++ ) {
|
||||
for ( int j = 0; j < MAX_Q_SCORE; j ++ ) {
|
||||
for ( int k = 0; k < 2; k ++ ) {
|
||||
q[i][j][k] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return q;
|
||||
}
|
||||
|
||||
|
||||
public LocalMapType map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
||||
return new LocalMapType(context, ref, tracker);
|
||||
}
|
||||
|
||||
public int[][][] reduce(LocalMapType map, int[][][] qScores) {
|
||||
for ( int i = 0; i < map.numReads(); i ++ ) {
|
||||
SAMRecord read = map.context.getReads().get(i);
|
||||
int offset = map.context.getOffsets().get(i);
|
||||
if ( isMismatch(read, offset, map.ref) ) {
|
||||
qScores[calcZScoreBin(read,offset)][getQScoreAsInt(read,offset)][MM_OFFSET]++;
|
||||
} else {
|
||||
qScores[calcZScoreBin(read,offset)][getQScoreAsInt(read,offset)][MATCH_OFFSET]++;
|
||||
}
|
||||
}
|
||||
|
||||
return qScores;
|
||||
}
|
||||
|
||||
public void onTraversalDone( int[][][] zScoreBins ) {
|
||||
out.print( header() );
|
||||
for ( int i = 0; i < Z_SCORE_RANGE*Z_SCORE_MULTIPLIER; i ++ ) {
|
||||
for ( int j = 0; j < MAX_Q_SCORE; j ++ ) {
|
||||
out.print( formatData(zScoreBins[i][j], i, j) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isMismatch( SAMRecord read, int offset, ReferenceContext ref ) {
|
||||
return (Character.toUpperCase((char) read.getReadBases()[offset]) != ref.getBase());
|
||||
}
|
||||
|
||||
public int getQScoreAsInt( SAMRecord read, int offset ) {
|
||||
return (int) read.getBaseQualities()[offset];
|
||||
}
|
||||
|
||||
public int calcZScoreBin( SAMRecord read, int offset ) {
|
||||
Pair<Double,Double> meanVar = calcWindowMeanVariance(read, offset);
|
||||
double rawZ = (getQScoreAsInt(read, offset) - meanVar.first)/Math.sqrt(meanVar.second);
|
||||
int zBin;
|
||||
if ( rawZ >= Z_SCORE_MAX ) {
|
||||
zBin = (int) Math.floor(Z_SCORE_RANGE*Z_SCORE_MULTIPLIER);
|
||||
} else if ( rawZ <= Z_SCORE_MIN ) {
|
||||
zBin = 0;
|
||||
} else if ( rawZ > 0 ) {
|
||||
zBin = (int) Math.floor(rawZ*Z_SCORE_MULTIPLIER) - (int) Math.floor(Z_SCORE_MIN*Z_SCORE_MULTIPLIER);
|
||||
} else {
|
||||
zBin = (int) Math.floor(-Z_SCORE_MIN*Z_SCORE_MULTIPLIER) + (int) Math.floor(rawZ*Z_SCORE_MULTIPLIER);
|
||||
}
|
||||
|
||||
return zBin;
|
||||
}
|
||||
|
||||
public Pair<Double,Double> calcWindowMeanVariance( SAMRecord read, int offset ) {
|
||||
int start,end;
|
||||
if ( offset - WIN_SIDE_SIZE < 0 ) {
|
||||
start = 0;
|
||||
} else {
|
||||
start = offset - WIN_SIDE_SIZE;
|
||||
}
|
||||
|
||||
if ( offset + WIN_SIDE_SIZE > read.getReadLength() ) {
|
||||
end = read.getReadLength();
|
||||
} else {
|
||||
end = offset + WIN_SIDE_SIZE;
|
||||
}
|
||||
|
||||
double mean = 0.0;
|
||||
int n = end-start;
|
||||
byte[] quals = read.getBaseQualities();
|
||||
|
||||
for ( int i = start; i < end; i ++ ) {
|
||||
if ( i != offset ) {
|
||||
mean += ((double) quals[i])/n;
|
||||
}
|
||||
}
|
||||
|
||||
double var = 0.0;
|
||||
|
||||
for ( int i = start; i < end; i ++ ) {
|
||||
if ( i != offset ) {
|
||||
var += Math.pow(((double) quals[i] - mean),2)/n;
|
||||
}
|
||||
}
|
||||
|
||||
return new Pair<Double,Double>(mean, var);
|
||||
}
|
||||
|
||||
public String header() {
|
||||
String format = "%s\t%s\t%s\t%s\t%s\t%s%n";
|
||||
return String.format(format, "ZScore", "N_obs", "Expected_Mismatch", "Empirical_Mismatch", "Expected_MM_As_Q", "Empirical_MM_As_Q");
|
||||
}
|
||||
|
||||
public String formatData ( int[] matchMismatch, int zScoreBin, int q ) {
|
||||
String format = "%f\t%d\t%f\t%f\t%d\t%d%n";
|
||||
|
||||
double zScore = ( (double) zScoreBin )/Z_SCORE_MULTIPLIER;
|
||||
int counts = matchMismatch[MATCH_OFFSET] + matchMismatch[MM_OFFSET];
|
||||
double empMMR = (((double)matchMismatch[MM_OFFSET])/counts);
|
||||
double expMMR = QualityUtils.qualToErrorProb((byte) q);
|
||||
int empMMRAsQ = QualityUtils.probToQual(1-empMMR);
|
||||
|
||||
return String.format(format, zScore, counts, expMMR, empMMR, q, empMMRAsQ);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,139 +0,0 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.Recalibration.LocalMapType;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Sep 29, 2009
|
||||
* Time: 10:54:18 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class NQSCovariantByCountsWalker extends LocusWalker<LocalMapType, int[][][][] > {
|
||||
|
||||
final static int SMALL_DEVIATION = 5;
|
||||
final static int LARGE_DEVIATION = 8;
|
||||
final static int WIN_SIZE_SIDE = 6;
|
||||
final static int MATCH_OFFSET = 1;
|
||||
final static int MM_OFFSET = 0;
|
||||
final static int MAX_Q_SCORE = 2 + QualityUtils.MAX_REASONABLE_Q_SCORE;
|
||||
final static String DATA_FORMAT = "%d\t%d\t%d\t%d\t%f\t%f\t%d\t%d%n";
|
||||
final static String HEADER_FORMAT = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n";
|
||||
|
||||
int NGHD_SIZE;
|
||||
|
||||
public void initialize() {
|
||||
NGHD_SIZE = 2*WIN_SIZE_SIDE+1;
|
||||
}
|
||||
|
||||
public int[][][][] reduceInit() {
|
||||
int[][][][] q = new int[2*WIN_SIZE_SIDE][2*WIN_SIZE_SIDE][MAX_Q_SCORE][2];
|
||||
|
||||
for ( int i = 0; i < 2*WIN_SIZE_SIDE; i ++ ) {
|
||||
for ( int j = 0; j < 2*WIN_SIZE_SIDE; j ++ ) {
|
||||
for ( int k = 0; k < MAX_Q_SCORE; k ++ ) {
|
||||
for ( int h = 0; h < 2; h ++ ) {
|
||||
q[i][j][k][h] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return q;
|
||||
}
|
||||
|
||||
public LocalMapType map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
||||
return new LocalMapType(context, ref, tracker);
|
||||
}
|
||||
|
||||
public int [][][][] reduce( LocalMapType map, int [][][][] quals ) {
|
||||
for ( int i = 0; i < map.numReads(); i ++ ) {
|
||||
SAMRecord read = map.context.getReads().get(i);
|
||||
int offset = map.context.getOffsets().get(i);
|
||||
Pair<Integer,Integer> deviantCounts = countNumDeviantBases(read,offset);
|
||||
|
||||
if ( isMismatch(read,offset,map.ref) ) {
|
||||
quals[deviantCounts.first][deviantCounts.second][(int) read.getBaseQualities()[offset]][MM_OFFSET]++;
|
||||
} else {
|
||||
quals[deviantCounts.first][deviantCounts.second][(int) read.getBaseQualities()[offset]][MATCH_OFFSET]++;
|
||||
}
|
||||
}
|
||||
|
||||
return quals;
|
||||
}
|
||||
|
||||
public void onTraversalDone( int[][][][] counts ) {
|
||||
out.print( header() );
|
||||
|
||||
for ( int i = 0; i < 2*WIN_SIZE_SIDE; i ++ ) {
|
||||
for ( int j = 0; j < 2*WIN_SIZE_SIDE; j ++ ) {
|
||||
out.print( formatData( counts[i][j], i, j ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public Pair<Integer,Integer> countNumDeviantBases(SAMRecord read, int offset) {
|
||||
int start,end;
|
||||
if ( offset - WIN_SIZE_SIDE < 0 ) {
|
||||
start = 0;
|
||||
} else {
|
||||
start = offset - WIN_SIZE_SIDE;
|
||||
}
|
||||
|
||||
if ( offset + WIN_SIZE_SIDE > read.getReadLength() ) {
|
||||
end = read.getReadLength();
|
||||
} else {
|
||||
end = offset + WIN_SIZE_SIDE;
|
||||
}
|
||||
|
||||
int largeCounts = 0;
|
||||
int smallCounts = 0;
|
||||
byte[] quals = read.getBaseQualities();
|
||||
for ( int i = start; i < end; i ++ ) {
|
||||
if ( Math.abs(quals[i] - quals[offset]) >= LARGE_DEVIATION ) {
|
||||
largeCounts ++;
|
||||
} else if ( Math.abs( quals[i] - quals[offset] ) >= SMALL_DEVIATION ) {
|
||||
smallCounts ++;
|
||||
}
|
||||
}
|
||||
|
||||
return new Pair<Integer,Integer>(smallCounts,largeCounts);
|
||||
}
|
||||
|
||||
public boolean isMismatch( SAMRecord read, int offset, ReferenceContext ref ) {
|
||||
return ( Character.toUpperCase((char)read.getReadBases()[offset]) != ref.getBase() );
|
||||
}
|
||||
|
||||
public String header() {
|
||||
return String.format(HEADER_FORMAT, "N_SMALL_DEV", "N_LARGE_DEV", "MATCH", "MISMATCH", "EXPECTED_ERR", "EMPIRICAL_ERR", "EXP_AS_Q", "EMP_AS_Q");
|
||||
}
|
||||
|
||||
public String formatData( int[][] qScores, int smDev, int lgDev ) {
|
||||
int match = 0;
|
||||
int mismatch = 0;
|
||||
double expErr = 0.0;
|
||||
|
||||
for ( int i = 0; i < MAX_Q_SCORE; i ++ ) {
|
||||
match += qScores[i][MATCH_OFFSET];
|
||||
mismatch += qScores[i][MM_OFFSET];
|
||||
expErr += QualityUtils.qualToErrorProb((byte)i)*qScores[i][MATCH_OFFSET];
|
||||
expErr += QualityUtils.qualToErrorProb((byte)i)*qScores[i][MM_OFFSET];
|
||||
}
|
||||
|
||||
expErr = expErr/(match + mismatch);
|
||||
int expAsQ = QualityUtils.probToQual(1-expErr);
|
||||
|
||||
double empErr = ((double)mismatch)/(match+mismatch);
|
||||
int empAsQ = QualityUtils.probToQual(1-empErr);
|
||||
|
||||
return String.format(DATA_FORMAT, smDev, lgDev, match, mismatch, expErr, empErr, expAsQ, empAsQ);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,153 +0,0 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.Recalibration.LocalMapType;
|
||||
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Sep 23, 2009
|
||||
* Time: 11:26:52 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class NQSExtendedGroupsCovariantWalker extends LocusWalker<LocalMapType, long[][][]> {
|
||||
|
||||
final int NEIGHBORHOOD_SIZE = 5;
|
||||
final int MM_OFFSET = 1;
|
||||
final int COUNT_OFFSET = 0;
|
||||
final int MAX_QSCORE = QualityUtils.MAX_REASONABLE_Q_SCORE+1;
|
||||
int NQS_GROUPS;
|
||||
|
||||
public static final String DATA_FORMAT = "%d\t%d\t%d\t%d\t%d%n";
|
||||
public static final String TEXT_FORMAT = "%s\t%s\t%s\t%s\t%s%n";
|
||||
|
||||
|
||||
// Walker methods
|
||||
|
||||
public void initialize() {
|
||||
NQS_GROUPS = (MAX_QSCORE)*(MAX_QSCORE+1)/2 + MAX_QSCORE;
|
||||
// this is from solving the recurrent equation
|
||||
// a_{n+1} = a_n + n + 1
|
||||
}
|
||||
|
||||
public long[][][] reduceInit() {
|
||||
long[][][] covariantCounts = new long[NQS_GROUPS][2][MAX_QSCORE];
|
||||
|
||||
for ( int i = 0; i < NQS_GROUPS; i ++ ) {
|
||||
for ( int j = 0; j < 2; j++ ) {
|
||||
for ( int k = 0; k < MAX_QSCORE; k ++) {
|
||||
covariantCounts[i][j][k] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return covariantCounts;
|
||||
}
|
||||
|
||||
public LocalMapType map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
return new LocalMapType(context, ref, tracker);
|
||||
}
|
||||
|
||||
public long[][][] reduce( LocalMapType map, long[][][] cumulativeBins ) {
|
||||
|
||||
for ( int i = 0; i < map.numReads(); i ++ ) {
|
||||
int groupNumber = calcGroupNumber(i, map.context);
|
||||
int qualityAsInt = getBaseQualityAsInt(map.context,i);
|
||||
cumulativeBins[groupNumber][COUNT_OFFSET][qualityAsInt] ++;
|
||||
if ( ! isRef(i, map.context, map.ref ) ) {
|
||||
cumulativeBins[groupNumber][MM_OFFSET][qualityAsInt] ++;
|
||||
}
|
||||
}
|
||||
|
||||
return cumulativeBins;
|
||||
}
|
||||
|
||||
public void onTraversalDone(long[][][] cumulativeBins) {
|
||||
printNQSMismatchTable(cumulativeBins);
|
||||
}
|
||||
|
||||
// Map-Reduce helper methods
|
||||
|
||||
public int calcGroupNumber( int read, AlignmentContext context ) {
|
||||
return calcGroupNumberFromQualities(getBaseQualityAsInt(context, read), getMinNeighborhoodBaseQualityAsInt(context, read));
|
||||
}
|
||||
|
||||
/*
|
||||
* calculates group number as
|
||||
* #: nghd left base nghd right
|
||||
* 0: 000 0 000
|
||||
* 1: 000 1 000
|
||||
* 2: 111 1 111
|
||||
* 3: 000 2 000
|
||||
* 4: 111 2 111
|
||||
* 5: 222 2 222
|
||||
* 6: 000 3 000
|
||||
* 7: 111 3 111
|
||||
* etc
|
||||
*/
|
||||
public int calcGroupNumberFromQualities( int baseQuality, int minNeighborhoodQuality ) {
|
||||
int groupNumber = (baseQuality)*(baseQuality+1)/2;
|
||||
if ( minNeighborhoodQuality <= baseQuality ) {
|
||||
groupNumber += minNeighborhoodQuality;
|
||||
} else {
|
||||
groupNumber += baseQuality; // yes we want to do this
|
||||
}
|
||||
|
||||
return groupNumber;
|
||||
}
|
||||
|
||||
public int getBaseQualityAsInt( AlignmentContext context, int read ) {
|
||||
return (int) context.getReads().get(read).getBaseQualities()[context.getOffsets().get(read)];
|
||||
}
|
||||
|
||||
public int getMinNeighborhoodBaseQualityAsInt( AlignmentContext context, int read ) {
|
||||
byte[] qualities = context.getReads().get(read).getBaseQualities();
|
||||
int offset = context.getOffsets().get(read);
|
||||
int readLength = qualities.length;
|
||||
int start = (offset - NEIGHBORHOOD_SIZE < readLength) ? 0 : offset - NEIGHBORHOOD_SIZE;
|
||||
int end = (offset + NEIGHBORHOOD_SIZE > readLength) ? readLength : offset+NEIGHBORHOOD_SIZE;
|
||||
byte minQuality = Byte.MAX_VALUE;
|
||||
|
||||
for ( int i = start; i < end; i ++ ) {
|
||||
if ( i != offset ) {
|
||||
if ( qualities[i] < minQuality ) {
|
||||
minQuality = qualities[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (int) minQuality;
|
||||
}
|
||||
|
||||
public boolean isRef(int read, AlignmentContext context, ReferenceContext ref ) {
|
||||
return (context.getReads().get(read).getReadBases()[context.getOffsets().get(read)] == ref.getBase());
|
||||
}
|
||||
|
||||
// printing helper methods
|
||||
|
||||
public void printNQSMismatchTable(long[][][] cumulativeBins) {
|
||||
out.print(createHeader());
|
||||
for ( int qscore = 0; qscore < MAX_QSCORE; qscore ++ ) {
|
||||
for ( int group = 0; group < NQS_GROUPS; group ++ ) {
|
||||
out.print(formatNQSMismatchCountString(qscore,group,cumulativeBins));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public String createHeader() {
|
||||
return String.format(TEXT_FORMAT, "Qscore_Reported", "NQS_Group", "Mismatches", "Total", "Empirical_Qscore");
|
||||
}
|
||||
|
||||
public String formatNQSMismatchCountString(int qscore, int group, long[][][] cumulativeBins) {
|
||||
long mm = cumulativeBins[group][MM_OFFSET][qscore];
|
||||
long ct = cumulativeBins[group][COUNT_OFFSET][qscore];
|
||||
double mmr = (ct > 0) ? mm/ct : -1;
|
||||
|
||||
return String.format(DATA_FORMAT, qscore, group, mm, ct, QualityUtils.probToQual(1-mmr));
|
||||
}
|
||||
}
|
||||
|
|
@ -1,188 +0,0 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
||||
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.Recalibration.LocalMapType;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Sep 14, 2009
|
||||
* Time: 1:37:44 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class NQSMismatchCovariantWalker extends LocusWalker<LocalMapType, long[][][]> {
|
||||
public static final int NQS_GROUPS = 35;
|
||||
public static final int NQS_RESOLUTION = 1;
|
||||
public static final int NQS_DIFFERENCE = 5;
|
||||
public static final int NEIGHBORHOOD_SIZE = 5;
|
||||
public static final int NQS_QSCORE_START = 5;
|
||||
public static final int MM_OFFSET = 0;
|
||||
public static final int COUNT_OFFSET = 1;
|
||||
public static final int FAIL_OFFSET = 2;
|
||||
public static final String DATA_FORMAT = "%d %d %d %d %d %d%n";
|
||||
public static final String HEADER_FORMAT = "%s %s %s %s %s %s%n";
|
||||
|
||||
public long[][][] reduceInit() {
|
||||
long[][][] mismatchesByNQS = new long[(int) QualityUtils.MAX_QUAL_SCORE+1][this.numNQSGroups()+1][2];
|
||||
for ( int qualityScore = 0; qualityScore <= (int) QualityUtils.MAX_QUAL_SCORE; qualityScore ++) {
|
||||
for ( int nqsGroup = 0; nqsGroup <= numNQSGroups(); nqsGroup++ ) {
|
||||
mismatchesByNQS[qualityScore][nqsGroup][MM_OFFSET] = 0;
|
||||
mismatchesByNQS[qualityScore][nqsGroup][COUNT_OFFSET] = 0;
|
||||
}
|
||||
}
|
||||
return mismatchesByNQS;
|
||||
}
|
||||
|
||||
public LocalMapType map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
||||
return new LocalMapType(context,ref,tracker);
|
||||
}
|
||||
|
||||
public long[][][] reduce(LocalMapType rawMapData, long[][][] cumMismatchArray) {
|
||||
if ( ! isDBSNP(rawMapData.tracker) ) {
|
||||
for( int read = 0; read < rawMapData.numReads(); read ++ ) {
|
||||
int bestNQSRelativeToStart = getBestNQSRelativeToStart(rawMapData.context, read, nqsDifference(),
|
||||
nqsResolution(), centerBaseQualityStart(), windowSize());
|
||||
if( isValidRelativeNQS(bestNQSRelativeToStart) ) {
|
||||
cumMismatchArray[rawMapData.qScore(read)][bestNQSRelativeToStart][COUNT_OFFSET]++;
|
||||
if( isMismatch(read, rawMapData.context, rawMapData.ref) ) {
|
||||
cumMismatchArray[rawMapData.qScore(read)][bestNQSRelativeToStart][MM_OFFSET]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return cumMismatchArray;
|
||||
}
|
||||
|
||||
public void onTraversalDone(long[][][] cumulativeCounts) {
|
||||
printNQSMismatchTable(cumulativeCounts);
|
||||
}
|
||||
|
||||
public int getBestNQSRelativeToStart(AlignmentContext context, int readNum, int ctrNghdNQSDif, int nqsStepSize,
|
||||
int ctrBaseQStart, int windSize) {
|
||||
int minNeighborhoodQualityScore = findMinQScoreInWindow(context, readNum, windSize);
|
||||
int centerQScore = getQualityScore(context,readNum);
|
||||
int stepsToCutoffNeighborhood = (int) Math.floor((minNeighborhoodQualityScore - (ctrBaseQStart - ctrNghdNQSDif))/nqsStepSize);
|
||||
int stepsToCutoffCenter = (int) Math.floor((centerQScore-ctrBaseQStart )/nqsStepSize);
|
||||
|
||||
if(stepsToCutoffNeighborhood < stepsToCutoffCenter) {
|
||||
return stepsToCutoffNeighborhood;
|
||||
} else {
|
||||
return stepsToCutoffCenter;
|
||||
}
|
||||
}
|
||||
|
||||
protected void printNQSMismatchTable(long[][][] cumulativeCounts) {
|
||||
out.printf("%s", createHeader() );
|
||||
for ( int qscore = 0; qscore <= QualityUtils.MAX_QUAL_SCORE; qscore ++ ) {
|
||||
for( int nqsGroup = 0; nqsGroup <= numNQSGroups(); nqsGroup ++ ) {
|
||||
out.printf("%s", formatNQSMismatchCountString(cumulativeCounts, nqsGroup, qscore));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected String createHeader() {
|
||||
return String.format(HEADER_FORMAT,"Qscore Reported","Qscore Threshold at Locus", "Minimum Neighborhood Quality", "NQS Group",
|
||||
"# Non-dbSNP Mismatches", "Total Non-dbSNP Counts");
|
||||
}
|
||||
|
||||
protected String formatNQSMismatchCountString(long[][][] counts, int nqsGroup, int qscore) {
|
||||
return String.format(DATA_FORMAT,qscore,getCenterThreshold(nqsGroup),getNeighborhoodThreshold(nqsGroup),nqsGroup,
|
||||
counts[qscore][nqsGroup][MM_OFFSET], counts[qscore][nqsGroup][COUNT_OFFSET]);
|
||||
}
|
||||
|
||||
protected int getCenterThreshold(int nqsGroup) {
|
||||
return centerBaseQualityStart() + nqsGroup*nqsResolution();
|
||||
}
|
||||
|
||||
protected int getNeighborhoodThreshold(int nqsGroup) {
|
||||
return getCenterThreshold(nqsGroup)-nqsDifference();
|
||||
}
|
||||
|
||||
protected int numNQSGroups() {
|
||||
return NQS_GROUPS;
|
||||
}
|
||||
|
||||
protected int nqsResolution() {
|
||||
return NQS_RESOLUTION;
|
||||
}
|
||||
|
||||
protected int nqsDifference() {
|
||||
return NQS_DIFFERENCE;
|
||||
}
|
||||
|
||||
protected boolean isValidRelativeNQS(int relNQS) {
|
||||
return relNQS >= 0;
|
||||
}
|
||||
|
||||
protected boolean isDBSNP(RefMetaDataTracker tracker) {
|
||||
|
||||
return false;
|
||||
// return ( tracker.lookup("dbSNP",null) != null );
|
||||
}
|
||||
|
||||
protected boolean isMismatch(int readNum, AlignmentContext context, ReferenceContext ref) {
|
||||
return ( (char) getBases(context,readNum)[ getOffset(context,readNum) ] ) != ref.getBase();
|
||||
}
|
||||
|
||||
protected byte[] getBases(AlignmentContext context, int offset) {
|
||||
return context.getReads().get(offset).getReadBases();
|
||||
}
|
||||
|
||||
protected int getOffset(AlignmentContext context, int read) {
|
||||
return context.getOffsets().get(read);
|
||||
}
|
||||
|
||||
protected int windowSize() {
|
||||
return NEIGHBORHOOD_SIZE;
|
||||
}
|
||||
|
||||
protected int centerBaseQualityStart() {
|
||||
return NQS_QSCORE_START;
|
||||
}
|
||||
|
||||
protected int getQualityScore(AlignmentContext context, int readNo) {
|
||||
return getQualityScore(context.getReads().get(readNo), context.getOffsets().get(readNo));
|
||||
}
|
||||
|
||||
protected int getQualityScore(SAMRecord read, int offset) {
|
||||
return (int) read.getBaseQualities()[offset];
|
||||
}
|
||||
|
||||
protected int findMinQScoreInWindow(AlignmentContext context, int readNo, int windowSize) {
|
||||
SAMRecord read = context.getReads().get(readNo);
|
||||
int offset = context.getOffsets().get(readNo);
|
||||
int start;
|
||||
int end;
|
||||
|
||||
if ( offset - windowSize < 0) {
|
||||
start = 0;
|
||||
} else {
|
||||
start = offset - windowSize;
|
||||
}
|
||||
|
||||
if ( offset + windowSize > read.getReadLength() ) {
|
||||
end = read.getReadLength();
|
||||
} else {
|
||||
end = offset + windowSize;
|
||||
}
|
||||
|
||||
int minQualityScore = Integer.MAX_VALUE;
|
||||
for (int positionInRead = start; positionInRead < end; positionInRead++ ) {
|
||||
int qScoreAtPosition = getQualityScore(read,positionInRead);
|
||||
if ( qScoreAtPosition < minQualityScore ) {
|
||||
minQualityScore = qScoreAtPosition;
|
||||
}
|
||||
}
|
||||
|
||||
return minQualityScore;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -1,160 +0,0 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.FileReader;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
import net.sf.samtools.SAMFileWriter;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: Ghost
|
||||
* Date: Oct 11, 2009
|
||||
* Time: 11:04:07 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class NQSRecalibratorWalker extends ReadWalker<SAMRecord,SAMFileWriter> {
|
||||
@Argument(fullName="recalibrationTable", shortName="rt", doc="Table detailing NQS recalibration by reported, minimum, and maximum", required=true)
|
||||
String recalFile = null;
|
||||
@Argument(fullName="outputBamFile", shortName="outputBAM", doc="output BAM file", required=false)
|
||||
public SAMFileWriter outBam = null;
|
||||
|
||||
final static int MIN_RECALIBRATION_OBSERVATIONS = 10000;
|
||||
final static int WIN_SIDE = 11;
|
||||
final static int QMAX = 3 + QualityUtils.MAX_REASONABLE_Q_SCORE;
|
||||
|
||||
protected byte[][][] RECALIBRATION_TABLE;
|
||||
|
||||
public void initialize() {
|
||||
// initialize the table
|
||||
RECALIBRATION_TABLE = initializeQualityRecalibrationTable();
|
||||
BufferedReader reader;
|
||||
try {
|
||||
reader = new BufferedReader(new FileReader(recalFile));
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("File "+recalFile+" not found",e);
|
||||
}
|
||||
try {
|
||||
String header = reader.readLine();
|
||||
logger.debug(header);
|
||||
while( reader.ready() ) {
|
||||
StringTokenizer tok = new StringTokenizer(reader.readLine());
|
||||
int repQ = Integer.valueOf(tok.nextToken());
|
||||
int minQ = Integer.valueOf(tok.nextToken());
|
||||
int maxQ = Integer.valueOf(tok.nextToken());
|
||||
int nObserv = Integer.valueOf(tok.nextToken());
|
||||
tok.nextToken(); // skip one - empirical mismatch rate
|
||||
int empQ = Integer.valueOf(tok.nextToken());
|
||||
if ( nObserv > MIN_RECALIBRATION_OBSERVATIONS ) {
|
||||
RECALIBRATION_TABLE[repQ][minQ][maxQ] = (byte) empQ;
|
||||
}
|
||||
// System.out.println(repQ);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new StingException("File "+recalFile+" could not be read",e);
|
||||
}
|
||||
}
|
||||
|
||||
public SAMFileWriter reduceInit() {
|
||||
return outBam;
|
||||
}
|
||||
|
||||
public SAMRecord map(char[] ref, SAMRecord read) {
|
||||
byte[] initialQualities = read.getBaseQualities();
|
||||
byte[] finalQualities = getNewQualitiesByNQS(initialQualities);
|
||||
return recalibrateRead(read, finalQualities);
|
||||
}
|
||||
|
||||
public byte[] getNewQualitiesByNQS( byte [] initQuals ) {
|
||||
byte [] newQuals = new byte[initQuals.length];
|
||||
for ( int offset = 0; offset < initQuals.length; offset ++ ) {
|
||||
newQuals[offset] = qualityByNQS(initQuals,offset);
|
||||
}
|
||||
|
||||
return newQuals;
|
||||
}
|
||||
|
||||
public SAMFileWriter reduce( SAMRecord newRead, SAMFileWriter outwriter ) {
|
||||
if ( outwriter == null ) {
|
||||
out.println(newRead);
|
||||
} else {
|
||||
outwriter.addAlignment(newRead);
|
||||
}
|
||||
|
||||
return outwriter;
|
||||
}
|
||||
|
||||
public byte qualityByNQS( byte [] quals, int offset ) {
|
||||
Pair<Byte,Byte> minMaxQuality = getMinMaxQuality(quals,offset);
|
||||
return nqsLookup( quals[offset] , minMaxQuality.getFirst() , minMaxQuality.getSecond() );
|
||||
}
|
||||
|
||||
public Pair<Byte,Byte> getMinMaxQuality( byte [] quals, int offset ) {
|
||||
int start;
|
||||
int end;
|
||||
if ( offset-WIN_SIDE < 0 ) {
|
||||
start = 0;
|
||||
} else {
|
||||
start = offset - WIN_SIDE;
|
||||
}
|
||||
|
||||
if ( offset + WIN_SIDE > quals.length ) {
|
||||
end = quals.length;
|
||||
} else {
|
||||
end = offset + WIN_SIDE;
|
||||
}
|
||||
|
||||
byte minQuality = Byte.MAX_VALUE;
|
||||
byte maxQuality = Byte.MIN_VALUE;
|
||||
|
||||
for ( int i = start; i < end; i ++ ) {
|
||||
if ( i != offset ) {
|
||||
if ( quals[i] < minQuality ) {
|
||||
minQuality = quals[i];
|
||||
}
|
||||
if ( quals[i] > maxQuality ) {
|
||||
maxQuality = quals[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new Pair<Byte,Byte>(minQuality,maxQuality);
|
||||
}
|
||||
|
||||
public byte nqsLookup( byte repQ, byte minQ, byte maxQ ) {
|
||||
return RECALIBRATION_TABLE[(int) repQ][(int) minQ][(int) maxQ];
|
||||
}
|
||||
|
||||
public SAMRecord recalibrateRead( SAMRecord read, byte[] newQualities ) {
|
||||
read.setBaseQualities(newQualities);
|
||||
return read;
|
||||
}
|
||||
|
||||
private byte[][][] initializeQualityRecalibrationTable() {
|
||||
byte [][][] table = new byte[QMAX][QMAX][QMAX];
|
||||
for ( int qrep = 0; qrep < QMAX; qrep ++ ) {
|
||||
for ( int qmin = 0; qmin < QMAX; qmin ++ ) {
|
||||
for ( int qmax = 0; qmax < QMAX; qmax ++ ) {
|
||||
table[qrep][qmin][qmax] = (byte) qrep;
|
||||
// default value is the reported q-score
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return table;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,181 +0,0 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.ListIterator;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: Ghost
|
||||
* Date: Sep 24, 2009
|
||||
* Time: 10:40:29 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class NQSTabularDistributionWalker extends LocusWalker<LocalMapType, NQSDistributionTable> {
|
||||
|
||||
final int WINDOW_SIZE_SIDE = 3;
|
||||
final int QSCORE_BIN_SIZE = 4;
|
||||
final String ROW_FORMAT = "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d%n";
|
||||
final String HEAD_FORMAT = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n";
|
||||
|
||||
int qBins;
|
||||
int WINDOW_SIZE;
|
||||
|
||||
public void initialize() {
|
||||
qBins = 2 + (int) Math.ceil(((double)QualityUtils.MAX_REASONABLE_Q_SCORE)/QSCORE_BIN_SIZE);
|
||||
WINDOW_SIZE = 2*WINDOW_SIZE_SIDE+1;
|
||||
}
|
||||
|
||||
public NQSDistributionTable reduceInit() {
|
||||
return new NQSDistributionTable(WINDOW_SIZE, qBins, QSCORE_BIN_SIZE, logger);
|
||||
}
|
||||
|
||||
public LocalMapType map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
return new LocalMapType(context, ref, tracker);
|
||||
}
|
||||
|
||||
public NQSDistributionTable reduce( LocalMapType curMap, NQSDistributionTable prevReduce ) {
|
||||
// iterate through reads
|
||||
for ( int i = 0; i < curMap.numReads(); i ++ ) {
|
||||
prevReduce.update(curMap.context.getReads().get(i), curMap.context.getOffsets().get(i), WINDOW_SIZE_SIDE, QSCORE_BIN_SIZE, curMap.ref);
|
||||
}
|
||||
|
||||
return prevReduce;
|
||||
}
|
||||
|
||||
public void onTraversalDone(NQSDistributionTable finalTable) {
|
||||
out.print( makeHeader() );
|
||||
for ( int a = 0; a < qBins; a ++ ) {
|
||||
for ( int b = 0; b < qBins; b ++ ) {
|
||||
for ( int c = 0; c < qBins; c ++ ) {
|
||||
for ( int d = 0; d < qBins; d ++ ) {
|
||||
for ( int e = 0; e < qBins; e ++ ) {
|
||||
for ( int f = 0; f < qBins; f ++ ) {
|
||||
for ( int g = 0; g < qBins; g ++ ) {
|
||||
Pair<Integer,Integer> mm = finalTable.getPair(a,b,c,d,e,f,g);
|
||||
out.print( formatOutput(a,b,c,d,e,f,g,mm.first,mm.second) );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public String makeHeader() {
|
||||
return String.format(HEAD_FORMAT,"Q1","Q2","Q3","Q4","Q5","Q6","Q7","Mismatch","Match");
|
||||
}
|
||||
|
||||
public String formatOutput( int a, int b, int c, int d, int e, int f, int g, int h, int i ) {
|
||||
return String.format(ROW_FORMAT, a, b, c, d, e, f, g, h, i);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class NQSDistributionTable {
|
||||
|
||||
|
||||
public final int MM_OFFSET = 0;
|
||||
public final int MATCH_OFFSET = 1;
|
||||
|
||||
protected int [][][][][][][][] table;
|
||||
protected Logger logger;
|
||||
|
||||
protected int OFF_END_OFFSET;
|
||||
protected int OFF_START_OFFSET;
|
||||
|
||||
public NQSDistributionTable (int winSize, int qBins, int qStep, Logger logger) {
|
||||
if ( 7 != winSize ) {
|
||||
throw new StingException("Size positied in tabular distribution is not the size of the distribution table");
|
||||
}
|
||||
this.logger = logger;
|
||||
table = new int[qBins][qBins][qBins][qBins][qBins][qBins][qBins][2];
|
||||
// yes, this is absolutely positively horrendous brute-force code.
|
||||
// ... Knuth would be proud. These could be lists for a decrease
|
||||
// in memory but a slowdown in building the distribution.
|
||||
for ( int a = 0; a < qBins; a ++ ) {
|
||||
for ( int b = 0; b < qBins; b ++ ) {
|
||||
for ( int c = 0; c < qBins; c ++ ) {
|
||||
for ( int d = 0; d < qBins; d ++ ) {
|
||||
for ( int e = 0; e < qBins; e ++ ) {
|
||||
for ( int f = 0; f < qBins; f ++ ) {
|
||||
for ( int g = 0; g < qBins; g ++ ) {
|
||||
for ( int h = 0; h < 2; h ++ ) {
|
||||
table[a][b][c][d][e][f][g][h] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
OFF_END_OFFSET = qBins-2;
|
||||
OFF_START_OFFSET = qBins-1;
|
||||
}
|
||||
|
||||
public void update( SAMRecord read, int offset, int winSize, int binSize, ReferenceContext ref ) {
|
||||
// note: there are two extra bins, one for "No quality: off the end of read"
|
||||
// and one for "No quality: off the beginning of read"
|
||||
List<Integer> coordinates = getQscoreCoordinates(read,offset,winSize, binSize);
|
||||
if ( isMatch(read, offset, ref) ) {
|
||||
coordinates.add(MATCH_OFFSET);
|
||||
} else {
|
||||
coordinates.add(MM_OFFSET);
|
||||
}
|
||||
ListIterator<Integer> o = coordinates.listIterator();
|
||||
table[o.next()][o.next()][o.next()][o.next()][o.next()][o.next()][o.next()][o.next()] ++;
|
||||
// i'm sure i could use recursion for this but my brain is too tired after swimming to figure
|
||||
// out, in 20 seconds, how to do it
|
||||
}
|
||||
|
||||
public List<Integer> getQscoreCoordinates( SAMRecord read, int offset, int win, int binSize ) {
|
||||
LinkedList<Integer> coords = new LinkedList<Integer>();
|
||||
for ( int i = offset - win; i <= offset + win ; i ++ ) {
|
||||
coords.add(getQscoreBin(getQScoreAsInt(read,i), binSize));
|
||||
}
|
||||
logger.debug(Integer.toString(coords.size()));
|
||||
return coords;
|
||||
}
|
||||
|
||||
public int getQScoreAsInt( SAMRecord read, int offset ) {
|
||||
int qscore;
|
||||
if ( offset < 0 ) {
|
||||
qscore = read.getReadNegativeStrandFlag() ? OFF_END_OFFSET : OFF_START_OFFSET;
|
||||
} else if ( offset >= read.getReadLength() ) {
|
||||
qscore = read.getReadNegativeStrandFlag() ? OFF_START_OFFSET : OFF_END_OFFSET;
|
||||
} else {
|
||||
qscore = (int) read.getBaseQualities()[offset];
|
||||
}
|
||||
|
||||
return qscore;
|
||||
}
|
||||
|
||||
public int getQscoreBin( int qscore, int binSize ) {
|
||||
return (int) Math.floor(((double)qscore)/binSize);
|
||||
}
|
||||
|
||||
public Pair<Integer,Integer> getPair( int a, int b, int c, int d, int e, int f, int g ) {
|
||||
int mm = table[a][b][c][d][e][f][g][MM_OFFSET];
|
||||
int ma = table[a][b][c][d][e][f][g][MATCH_OFFSET];
|
||||
|
||||
return new Pair<Integer,Integer>(mm,ma);
|
||||
}
|
||||
|
||||
public boolean isMatch( SAMRecord read, int offset, ReferenceContext ref ) {
|
||||
return ( Character.toUpperCase( (char) read.getReadBases()[offset] ) == ref.getBase() || ref.getBase() == 'N');
|
||||
}
|
||||
}
|
||||
|
|
@ -1,77 +0,0 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.LinkedList;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Sep 28, 2009
|
||||
* Time: 12:02:33 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class QualityScoreDistributionWalker extends LocusWalker<List<Pair<Integer,Integer>>, int[][]> {
|
||||
@Argument(fullName="maxReadLength", shortName="rl", doc="Maximum read length in the bam file", required=true)
|
||||
protected int MAX_READ_LENGTH = 125;
|
||||
|
||||
|
||||
protected final int MAX_REASONABLE_Q_SCORE = 3 + QualityUtils.MAX_REASONABLE_Q_SCORE;
|
||||
public void initialize() {
|
||||
|
||||
}
|
||||
|
||||
public int[][] reduceInit() {
|
||||
int [][] qualityCounts = new int[MAX_READ_LENGTH][MAX_REASONABLE_Q_SCORE];
|
||||
for ( int i = 0; i < MAX_READ_LENGTH; i ++ ) {
|
||||
for ( int j = 0; j < MAX_REASONABLE_Q_SCORE; j ++ ) {
|
||||
qualityCounts[i][j] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return qualityCounts;
|
||||
}
|
||||
|
||||
public List<Pair<Integer,Integer>> map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
||||
LinkedList<Pair<Integer,Integer>> qualByPos = new LinkedList<Pair<Integer,Integer>>();
|
||||
for ( int r = 0; r < context.numReads(); r ++ ) {
|
||||
if( context.getReads().get(r).getReadNegativeStrandFlag() ) {
|
||||
int of = context.getReads().get(r).getReadLength() - context.getOffsets().get(r);
|
||||
int qu = context.getReads().get(r).getBaseQualities()[context.getOffsets().get(r)];
|
||||
qualByPos.add(new Pair<Integer,Integer>(of, qu));
|
||||
}
|
||||
else {
|
||||
int of = context.getOffsets().get(r);
|
||||
int qu = context.getReads().get(r).getBaseQualities()[of];
|
||||
qualByPos.add(new Pair<Integer,Integer>(of, qu));
|
||||
}
|
||||
}
|
||||
|
||||
return qualByPos;
|
||||
}
|
||||
|
||||
public int[][] reduce( List<Pair<Integer,Integer>> qualByPos, int[][] counts ) {
|
||||
for ( Pair<Integer,Integer> qPosPair : qualByPos ) {
|
||||
counts[qPosPair.getFirst()][qPosPair.getSecond()] ++;
|
||||
}
|
||||
|
||||
return counts;
|
||||
}
|
||||
|
||||
public void onTraversalDone( int[][] counts ) {
|
||||
for ( int pos = 0; pos < MAX_READ_LENGTH; pos ++ ) {
|
||||
String outStr = Integer.toString(pos);
|
||||
for ( int qual = 0; qual < MAX_REASONABLE_Q_SCORE; qual ++) {
|
||||
outStr = outStr + "\t" + Integer.toString(counts[pos][qual]);
|
||||
}
|
||||
out.printf("%s%n", outStr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,218 +0,0 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.poolseq;
|
||||
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.walkers.By;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.playground.utils.PoolUtils;
|
||||
import org.broadinstitute.sting.playground.utils.ReadOffsetQuad;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileReader;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.util.StringTokenizer;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.List;
|
||||
import java.rmi.NoSuchObjectException;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Aug 25, 2009
|
||||
* Time: 3:47:47 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
@By(DataSource.REFERENCE)
|
||||
public class AnalyzePowerWalker extends CoverageAndPowerWalker{
|
||||
// runs CoverageAndPowerWalker except compares to Syzygy outputs in a file
|
||||
|
||||
@Argument(fullName = "SyzygyOutputFile", shortName = "sof", doc="Syzygy output file to compare to", required = true)
|
||||
String pathToSyzygyFile = null;
|
||||
@Argument(fullName = "ColumnOffset", shortName = "co", doc = "Offset of column containing the power in the pf", required = true)
|
||||
int colOffset = 0;
|
||||
|
||||
BufferedReader syzyFileReader;
|
||||
final String pfFileDelimiter = " ";
|
||||
boolean outOfLinesInSyzyFile = false;
|
||||
private double[][] syzyPowerTable;
|
||||
private String[][] syzySanityCheckTable;
|
||||
|
||||
@Override
|
||||
public void initialize()
|
||||
{
|
||||
super.initialize();
|
||||
syzyPowerTable = new double[8][1500000];
|
||||
syzySanityCheckTable = new String[8][1500000];
|
||||
try {
|
||||
syzyFileReader = new BufferedReader(new FileReader(pathToSyzygyFile));
|
||||
logger.error("generatePowerTable called");
|
||||
generatePowerTable(syzyFileReader);
|
||||
} catch (FileNotFoundException e) {
|
||||
String newErrMsg = "Syzygy input file " + pathToSyzygyFile + " could be incorrect. File not found.";
|
||||
throw new StingException(newErrMsg,e);
|
||||
} catch (IOException e) {
|
||||
String newErrMsg = "Syzygy input file error: could not read first line of "+pathToSyzygyFile;
|
||||
throw new StingException(newErrMsg,e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public Pair<Integer,Integer> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext rawContext)
|
||||
{
|
||||
AlignmentContext context;
|
||||
if (super.getMinQualityScore() <= 0) {
|
||||
context = rawContext;
|
||||
} else {
|
||||
Pair<List<SAMRecord>,List<Integer>> readsFilteredByQuality = filterByQuality(rawContext.getReads(),rawContext.getOffsets(), super.getMinQualityScore());
|
||||
context = new AlignmentContext(rawContext.getLocation(),readsFilteredByQuality.getFirst(),readsFilteredByQuality.getSecond());
|
||||
}
|
||||
ReadOffsetQuad readsByDirection = PoolUtils.splitReadsByReadDirection(context.getReads(),context.getOffsets());
|
||||
Pair<Pair<List<SAMRecord>, List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> splitReads = new Pair(new Pair(readsByDirection.getFirstReads(),readsByDirection.getSecondReads()),new Pair(readsByDirection.getFirstOffsets(),readsByDirection.getSecondOffsets()));
|
||||
if ( !super.suppress_printing )
|
||||
{
|
||||
Pair<double[],byte[]> powPair = super.calculatePower(splitReads,false,context);
|
||||
|
||||
|
||||
|
||||
int tabIndexChrom = getTableChromIndex(context.getLocation().toString());
|
||||
int tabIndexLoc = getTablePosIndex(context.getLocation().toString());
|
||||
double syzyPow = 0;
|
||||
String syzySanity = "NoLoc";
|
||||
if(tabIndexChrom >= 0 && tabIndexLoc >= 0) {
|
||||
syzyPow += syzyPowerTable[tabIndexChrom][tabIndexLoc];
|
||||
syzySanity = "s " + syzySanityCheckTable[tabIndexChrom][tabIndexLoc];
|
||||
|
||||
}
|
||||
out.printf("%s|%s: %d %d %d %d %d %d %f %f %f %f %n", context.getLocation(), syzySanity, splitReads.getFirst().getFirst().size(), splitReads.getFirst().getSecond().size(),
|
||||
context.getReads().size(), powPair.getSecond()[0], powPair.getSecond()[1], powPair.getSecond()[2],
|
||||
powPair.getFirst()[0], powPair.getFirst()[1], powPair.getFirst()[2], syzyPow);
|
||||
|
||||
}
|
||||
|
||||
return new Pair(splitReads.getFirst().getFirst().size(), splitReads.getFirst().getFirst().size());
|
||||
}
|
||||
|
||||
public Pair<Double,String> getSyzyPowFromFile() {
|
||||
String thisLine = null;
|
||||
try {
|
||||
thisLine = syzyFileReader.readLine();
|
||||
} catch(IOException e) {
|
||||
String newErrMsg = "Ran out of lines in the syzyfile; further output of Syzygy power will be suppressed.";
|
||||
outOfLinesInSyzyFile=true;
|
||||
logger.warn(newErrMsg + " " + e.toString());
|
||||
return new Pair(-1.1, "Printing Stops Here");
|
||||
}
|
||||
|
||||
String chromPosTar = null;
|
||||
StringTokenizer lineTokenizer = new StringTokenizer(thisLine, pfFileDelimiter);
|
||||
try {
|
||||
chromPosTar = lineTokenizer.nextToken();
|
||||
for(int j = 1; j < colOffset; j++) {
|
||||
lineTokenizer.nextToken();
|
||||
}
|
||||
String chromPos = (new StringTokenizer(chromPosTar, "\t")).nextToken();
|
||||
return new Pair((Double.valueOf(lineTokenizer.nextToken())/100.0),chromPos);
|
||||
} catch (NoSuchElementException e) {
|
||||
String errMsg = "The given column offset for the pool, " + colOffset + " exceeded the number of entries in the file " + pathToSyzygyFile;
|
||||
throw new StingException(errMsg);
|
||||
}
|
||||
}
|
||||
|
||||
public String createHeaderString() {
|
||||
return (super.createHeaderString() + " PowSyz");
|
||||
}
|
||||
|
||||
//TODO: rewrite this so it's not a complete hack!!!
|
||||
|
||||
public void generatePowerTable(BufferedReader syzFile) {
|
||||
|
||||
System.out.println("generatePowerTable entered");
|
||||
int numreads = 0;
|
||||
|
||||
try{
|
||||
while(syzFile.ready()) {
|
||||
String line = syzFile.readLine();
|
||||
if(line == null || line.equals("")) {
|
||||
break;
|
||||
}
|
||||
StringTokenizer tok = new StringTokenizer(line, " :\t");
|
||||
String chrmNoStrWithChr = tok.nextToken();
|
||||
String chrmNoStr = chrmNoStrWithChr.substring(3);
|
||||
String locStr = tok.nextToken();
|
||||
for(int j = colOffset - 2; j > 0; j --) {
|
||||
tok.nextToken();
|
||||
}
|
||||
numreads++;
|
||||
String syzyPowStr = tok.nextToken();
|
||||
if ( chrmNoStr == null || locStr == null || chrmNoStr.equals("") || locStr.equals("")) {
|
||||
break;
|
||||
}
|
||||
int chrIndex = getTableChromIndex(chrmNoStr,locStr);
|
||||
int posIndex = getTablePosIndex(chrmNoStr,locStr);
|
||||
syzyPowerTable[chrIndex][posIndex] = Double.valueOf(syzyPowStr)/100.0;
|
||||
syzySanityCheckTable[chrIndex][posIndex] = "chrm" + chrmNoStr +":" + locStr;
|
||||
if( (numreads % 1000) == 0){
|
||||
System.out.println(numreads + " reads from Syzygy file");
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
// do nothing
|
||||
System.out.println("IOException caught");
|
||||
}
|
||||
System.out.println("Table generated.");
|
||||
|
||||
}
|
||||
|
||||
public int getTableChromIndex(String chromNo, String locNo) {
|
||||
switch (Integer.valueOf(chromNo)) {
|
||||
case 1: return 0;
|
||||
case 2: return 1;
|
||||
case 3:
|
||||
if ( Integer.valueOf(locNo) < 63000000 )
|
||||
return 2;
|
||||
else
|
||||
return 3;
|
||||
case 7: return 4;
|
||||
case 10: return 5;
|
||||
case 11: return 6;
|
||||
case 12: return 7;
|
||||
default:
|
||||
System.out.println(chromNo + " " + locNo);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
public int getTableChromIndex(String chromAndPos) {
|
||||
StringTokenizer tok = new StringTokenizer(chromAndPos,":");
|
||||
return getTableChromIndex(tok.nextToken().substring(3),tok.nextToken());
|
||||
}
|
||||
|
||||
public int getTablePosIndex(String chromAndPos) {
|
||||
StringTokenizer tok = new StringTokenizer(chromAndPos,":");
|
||||
return getTablePosIndex(tok.nextToken().substring(3),tok.nextToken());
|
||||
}
|
||||
|
||||
public int getTablePosIndex(String chromNo, String locNo) {
|
||||
switch ( getTableChromIndex(chromNo, locNo) ) {
|
||||
case 0: return Integer.valueOf(locNo) - 120065274;
|
||||
case 1: return Integer.valueOf(locNo) - 43311321;
|
||||
case 2: return Integer.valueOf(locNo) - 12157091;
|
||||
case 3: return Integer.valueOf(locNo) - 64000000;
|
||||
case 4: return Integer.valueOf(locNo) - 27838907;
|
||||
case 5: return Integer.valueOf(locNo) - 12003199;
|
||||
case 6: return Integer.valueOf(locNo) - 92342389;
|
||||
case 7: return Integer.valueOf(locNo) - 69809219;
|
||||
default:
|
||||
System.out.println(chromNo + " " + locNo);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,232 +0,0 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.poolseq;
|
||||
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.ListUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.By;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.playground.utils.PoolUtils;
|
||||
import org.broadinstitute.sting.playground.utils.ReadOffsetQuad;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: Aug 18, 2009
|
||||
* Time: 11:57:07 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
|
||||
@By(DataSource.REFERENCE)
|
||||
public class CoverageAndPowerWalker extends LocusWalker<Pair<Integer, Integer>, Pair<Long, Long>> {
|
||||
@Argument(fullName="suppressLocusPrinting",doc="Suppress printing",required=false)
|
||||
public boolean suppress_printing = false;
|
||||
|
||||
@Argument(fullName="poolSize", shortName="ps", doc="Number of individuals in pool", required=true)
|
||||
public int num_individuals = 0;
|
||||
|
||||
@Argument(fullName="bootStrap", shortName="bs", doc="Use a bootstrap method", required=false)
|
||||
public boolean useBootstrap = false;
|
||||
|
||||
@Argument(fullName="lodThreshold", shortName="lt", doc="Threshold for LOD score for calls", required=false)
|
||||
public double lodThreshold = 3.0;
|
||||
|
||||
@Argument(fullName="minQScore", shortName="qm", doc="Threshold for the minimum quality score, filter out bases below",required=false)
|
||||
public byte minQScore = 0;
|
||||
|
||||
@Argument(fullName="outputUnthresholdedCoverage", shortName = "uc", doc="Print out the total coverage as well as the coverage above the quality threshold", required = false)
|
||||
public boolean outputUnthreshCvg = false;
|
||||
|
||||
private static final int BOOTSTRAP_ITERATIONS = 300;
|
||||
|
||||
public void initialize() {
|
||||
|
||||
if(num_individuals <= 0)
|
||||
throw new IllegalArgumentException("Positive nonzero parameter expected for poolSize");
|
||||
}
|
||||
|
||||
public Pair<Long,Long> reduceInit() {
|
||||
if ( ! suppress_printing ) { // print header
|
||||
out.printf("%s%n",createHeaderString());
|
||||
}
|
||||
return new Pair(0l,0l);
|
||||
}
|
||||
|
||||
public Pair<Long,Long> reduce(Pair<Integer,Integer> newCvg, Pair<Long,Long> prevCvg) {
|
||||
return new Pair(prevCvg.first + newCvg.first, prevCvg.second + newCvg.second);
|
||||
}
|
||||
|
||||
public Pair<Integer,Integer> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
AlignmentContext filteredContext;
|
||||
if (this.getMinQualityScore() <= 0) {
|
||||
filteredContext = context;
|
||||
} else {
|
||||
Pair<List<SAMRecord>,List<Integer>> readsFilteredByQuality = filterByQuality(context.getReads(),context.getOffsets(), this.getMinQualityScore());
|
||||
filteredContext = new AlignmentContext(context.getLocation(),readsFilteredByQuality.getFirst(),readsFilteredByQuality.getSecond());
|
||||
}
|
||||
ReadOffsetQuad splitReads = PoolUtils.splitReadsByReadDirection(filteredContext.getReads(), filteredContext.getOffsets());
|
||||
Pair<Pair<List<SAMRecord>,List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> readsByDirection = new Pair(new Pair(splitReads.getFirstReads(),splitReads.getSecondReads()),new Pair(splitReads.getFirstOffsets(),splitReads.getSecondOffsets()));
|
||||
if ( ! suppress_printing && ! outputUnthreshCvg ) {
|
||||
Pair<double[],byte[]> powers = calculatePower(readsByDirection, useBootstrap, filteredContext);
|
||||
out.printf("%s %d %d %d %d %d %d %f %f %f%n", filteredContext.getLocation(), readsByDirection.getFirst().getFirst().size(), readsByDirection.getFirst().getSecond().size(),
|
||||
filteredContext.getReads().size(), powers.getSecond()[0], powers.getSecond()[1], powers.getSecond()[2],
|
||||
powers.getFirst()[0], powers.getFirst()[1], powers.getFirst()[2]);
|
||||
} else if (! suppress_printing && outputUnthreshCvg) {
|
||||
Pair<double[],byte[]> powers = calculatePower(readsByDirection, useBootstrap, filteredContext);
|
||||
ReadOffsetQuad readsByDir = PoolUtils.splitReadsByReadDirection(context.getReads(),context.getOffsets());
|
||||
Pair<Pair<List<SAMRecord>,List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> ufReadsByDirection = new Pair(new Pair(readsByDir.getFirstReads(), readsByDir.getSecondReads()), new Pair(readsByDir.getFirstOffsets(), readsByDir.getSecondOffsets()));
|
||||
out.printf("%s %d %d %d %d %d %d %d %d %d %f %f %f%n", filteredContext.getLocation(), ufReadsByDirection.getFirst().getFirst().size(),
|
||||
ufReadsByDirection.getFirst().getSecond().size(), context.getReads().size(),
|
||||
readsByDirection.getFirst().getFirst().size(), readsByDirection.getFirst().getSecond().size(),
|
||||
filteredContext.getReads().size(), powers.getSecond()[0], powers.getSecond()[1], powers.getSecond()[2],
|
||||
powers.getFirst()[0], powers.getFirst()[1], powers.getFirst()[2]);
|
||||
}
|
||||
return new Pair(readsByDirection.getFirst().getFirst().size(),readsByDirection.getFirst().getSecond().size());
|
||||
}
|
||||
|
||||
// helper methods
|
||||
|
||||
public Pair<double[],byte[]> calculatePower(Pair<Pair<List<SAMRecord>,List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> dirReads, boolean bootStrap, AlignmentContext context) {
|
||||
Pair<double[],byte[]> powerAndMedianQ;
|
||||
if( bootStrap ) {
|
||||
powerAndMedianQ = calculateDirectionalPowersByBootstrap(dirReads,lodThreshold,context);
|
||||
} else {
|
||||
powerAndMedianQ = calculateDirectionalPowersTheoretical(dirReads,lodThreshold,context);
|
||||
}
|
||||
|
||||
return powerAndMedianQ;
|
||||
}
|
||||
|
||||
public Pair<double[],byte[]> calculateDirectionalPowersTheoretical(Pair<Pair<List<SAMRecord>,List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> dirReads, double thresh, AlignmentContext context) {
|
||||
byte[] medQs = getMedianQScores(dirReads, context);
|
||||
double powerForward = calculatePowerTheoretical(dirReads.getFirst().getFirst(),dirReads.getSecond().getFirst(),medQs[0],thresh);
|
||||
double powerReverse = calculatePowerTheoretical(dirReads.getFirst().getSecond(), dirReads.getSecond().getSecond(),medQs[1],thresh);
|
||||
double powerCombined = calculatePowerTheoretical(context.getReads(),context.getOffsets(),medQs[2],thresh);
|
||||
double [] powers = {powerForward,powerReverse,powerCombined};
|
||||
return new Pair(powers, medQs);
|
||||
}
|
||||
|
||||
public byte[] getMedianQScores(Pair<Pair<List<SAMRecord>,List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> dirReads, AlignmentContext context) {
|
||||
byte medQForward = getMedianQualityScore(dirReads.getFirst().getFirst(),dirReads.getSecond().getFirst());
|
||||
byte medQReverse = getMedianQualityScore(dirReads.getFirst().getSecond(), dirReads.getSecond().getSecond());
|
||||
byte medQCombined = getMedianQualityScore(context.getReads(),context.getOffsets());
|
||||
byte [] medQs = {medQForward, medQReverse, medQCombined};
|
||||
return medQs;
|
||||
}
|
||||
|
||||
public byte getMedianQualityScore(List<SAMRecord> reads, List<Integer> offsets) {
|
||||
return ListUtils.getQScoreMedian(reads,offsets);
|
||||
}
|
||||
|
||||
public double calculatePowerTheoretical(List<SAMRecord> reads, List<Integer> offsets, byte medQ, double thresh) {
|
||||
double p_error = QualityUtils.qualToErrorProb(medQ);
|
||||
int depth = reads.size();
|
||||
if(depth <= 0) {
|
||||
return 0;
|
||||
}
|
||||
double snpProp = this.getSNPProportion(1);
|
||||
|
||||
// variable names from here on out come from the mathematics of a log likelihood test
|
||||
// with binomial probabilities, of observing k bases consistent with the same SNP
|
||||
// given that SNP occurs in one of the alleles, versus it does not occur.
|
||||
// Numerator and denominator will each have a term raised to the kth power
|
||||
// and a term raised to the (depth - kth) (or d-kth) power. Thus the names.
|
||||
// Taking a log then brings those powers out as coefficients, where they can be solved for.
|
||||
|
||||
|
||||
double kterm_num = Math.log10( snpProp * (1 - p_error) + (1 - snpProp) * (p_error/3) );
|
||||
double kterm_denom = Math.log10( p_error/3 );
|
||||
double dkterm_num = Math.log10( snpProp * (p_error/3) + (1 - snpProp) * (1 - p_error) );
|
||||
double dkterm_denom = Math.log10( 1 - p_error);
|
||||
|
||||
int kaccept = (int) Math.ceil( ( thresh - ( (double) depth ) * ( dkterm_num - dkterm_denom ) )/( kterm_num - kterm_denom- dkterm_num + dkterm_denom ) );
|
||||
|
||||
// we will reject the null hypothesis if we see kaccept or more SNPs, the power is the probability that this occurs
|
||||
// we can optimize this by checking to see which sum is smaller
|
||||
|
||||
double pow = 0;
|
||||
|
||||
if ( depth - kaccept < kaccept ) {// kaccept > depth/2 - calculate power as P[hits between kaccept and depth]
|
||||
return MathUtils.cumBinomialProbLog(kaccept, depth, depth, snpProp);
|
||||
} else { // kaccept < depth/2 - calculate power as 1-P[hits between 0 and kaccept]
|
||||
return 1-MathUtils.cumBinomialProbLog(0,kaccept,depth,snpProp);
|
||||
}
|
||||
}
|
||||
|
||||
public double getSNPProportion(int numSNPAlleles) {
|
||||
return ((double) numSNPAlleles / (2.0 * num_individuals));
|
||||
}
|
||||
|
||||
public Pair<double[],byte[]> calculateDirectionalPowersByBootstrap(Pair<Pair<List<SAMRecord>,List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> dirReads, double thresh, AlignmentContext context) {
|
||||
double powerForward = bootstrapPower(dirReads.getFirst().getFirst(),dirReads.getSecond().getFirst(),thresh);
|
||||
double powerReverse = bootstrapPower(dirReads.getFirst().getSecond(),dirReads.getSecond().getSecond(),thresh);
|
||||
double powerCombined = bootstrapPower(context.getReads(),context.getOffsets(),thresh);
|
||||
double[] powers = {powerForward, powerReverse, powerCombined};
|
||||
return new Pair(powers,getMedianQScores(dirReads,context));
|
||||
}
|
||||
|
||||
public double bootstrapPower(List<SAMRecord> reads, List<Integer> offsets, double thresh) {
|
||||
double power = 0.0;
|
||||
for ( int boot = 0; boot < BOOTSTRAP_ITERATIONS; boot++) {
|
||||
Pair<Pair<List<SAMRecord>,List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> snpReadsAndRefReads = coinTossPartition(reads,offsets,this.getSNPProportion(1));
|
||||
if( PoolUtils.calculateLogLikelihoodOfSample(snpReadsAndRefReads, num_individuals) > thresh) {
|
||||
power += 1.0/BOOTSTRAP_ITERATIONS;
|
||||
}
|
||||
}
|
||||
|
||||
return power;
|
||||
}
|
||||
|
||||
public String createHeaderString() {
|
||||
String headString;
|
||||
if(! outputUnthreshCvg ) {
|
||||
headString = "Chrom:Pos ForwardCoverage ReverseCoverage CombinedCoverage ForwardMedianQ ReverseMedianQ CombinedMedianQ PowerForward PowerReverse PowerCombined";
|
||||
} else {
|
||||
headString = "Chrom:Pos ForwardCoverage ReverseCoverage CombinedCoverage ForwardCoverageAboveThreshold ReverseCoverageAboveThreshold CombinedCoverageAboveThreshold ForwardMedianQ ReverseMedianQ CombinedMedianQ PowerForward PowerReverse PowerCombined";
|
||||
}
|
||||
return headString;
|
||||
}
|
||||
|
||||
public byte getMinQualityScore() {
|
||||
return minQScore;
|
||||
}
|
||||
|
||||
public Pair<List<SAMRecord>,List<Integer>> filterByQuality(List<SAMRecord> reads, List<Integer> offsets, byte qMin) {
|
||||
return PoolUtils.thresholdReadsByQuality(reads,offsets,qMin);
|
||||
}
|
||||
|
||||
// class methods
|
||||
|
||||
public static Pair<Pair<List<SAMRecord>,List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> coinTossPartition(List<SAMRecord> reads, List<Integer> offsets, double snpProb) {
|
||||
Pair<Pair<List<SAMRecord>,List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> partitionedReads;
|
||||
if( reads == null || offsets == null ) {
|
||||
partitionedReads = null;
|
||||
} else {
|
||||
ArrayList<SAMRecord> snpReads = new ArrayList();
|
||||
ArrayList<SAMRecord> refReads = new ArrayList();
|
||||
ArrayList<Integer> snpOff = new ArrayList();
|
||||
ArrayList<Integer> refOff = new ArrayList();
|
||||
for(int readNo = 0; readNo < reads.size(); readNo++) {
|
||||
if ( Math.random() < snpProb) {
|
||||
snpReads.add(reads.get(readNo));
|
||||
snpOff.add(offsets.get(readNo));
|
||||
} else {
|
||||
refReads.add(reads.get(readNo));
|
||||
refOff.add(offsets.get(readNo));
|
||||
}
|
||||
}
|
||||
partitionedReads= new Pair(new Pair(snpReads,refReads), new Pair(snpOff, refOff));
|
||||
}
|
||||
|
||||
return partitionedReads;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,262 +0,0 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.poolseq;
|
||||
|
||||
import org.broadinstitute.sting.playground.utils.SQuad;
|
||||
import org.broadinstitute.sting.playground.utils.ReadOffsetQuad;
|
||||
import org.broadinstitute.sting.playground.utils.PoolUtils;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.By;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: Ghost
|
||||
* Date: Sep 12, 2009
|
||||
* Time: 12:20:42 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
@By(DataSource.REFERENCE)
|
||||
public class PowerAndCoverageWalker extends LocusWalker<SQuad<Integer>, SQuad<Long>> {
|
||||
|
||||
@Argument(fullName="suppressLocusPrinting", doc="Walker does not output locus data to any file. Only total coverage information is given at the end", required=false)
|
||||
public boolean suppressPrinting = false;
|
||||
|
||||
@Argument(fullName="outputFile", shortName="of", doc="Locus information is printed to this file instead of standard out", required = false)
|
||||
public String outputFile = null;
|
||||
|
||||
@Argument(fullName="bootStrapIterations", shortName="bs", doc="Use a bootstrap method with this many iterations", required=false)
|
||||
public int bootstrapIterations = 0;
|
||||
|
||||
@Argument(fullName="lodThreshold", shortName="lod", doc="Threshold for log likelihood ratio to be called a SNP. Defaults to 3.0", required = false)
|
||||
public double lodThresh = 3.0;
|
||||
|
||||
@Argument(fullName="minimumQScore", shortName="qm", doc="Use bases whose phred (Q) score meets or exceeds this number. Defaults to 0", required = false)
|
||||
public byte minQ = 0;
|
||||
|
||||
@Argument(fullName="aboveQScoreOutputOnly", doc="Output only the information for reads that exceed the q-score threshold", required=false)
|
||||
public boolean aboveQScoreOutputOnly = false;
|
||||
|
||||
@Argument(fullName="outputRawStatistics", shortName="rs", doc="Walker outputs the median quality score and power for the un-thresholded reads as well as those that are thresholded", required=false)
|
||||
public boolean outputRawStatistics = false;
|
||||
|
||||
@Argument(fullName="poolSize", shortName="ps", doc="Number of individuals in the pool", required = true)
|
||||
public int numIndividuals = 0;
|
||||
|
||||
protected PrintStream outputWriter = null;
|
||||
|
||||
public void initialize() {
|
||||
if(numIndividuals <= 0) {
|
||||
throw new StingException("Pool size must be greater than 1. You input "+numIndividuals);
|
||||
}
|
||||
|
||||
if( outputFile != null ) {
|
||||
try {
|
||||
outputWriter = new PrintStream(outputFile);
|
||||
} catch (FileNotFoundException e) {
|
||||
String errMsg = "The filepath input as an argument to -of, "+outputFile+" was a directory or could not be found or created.";
|
||||
throw new StingException(errMsg, e);
|
||||
}
|
||||
outputWriter.printf("%s%n", generateHeaderString());
|
||||
} else if( ! suppressPrinting ) {
|
||||
out.printf("%s%n", generateHeaderString());
|
||||
}
|
||||
}
|
||||
|
||||
public SQuad<Long> reduceInit() {
|
||||
return new SQuad<Long>(0l,0l,0l,0l);
|
||||
}
|
||||
|
||||
/* REDUCE
|
||||
* Add up filtered and unfiltered coverage in the forward
|
||||
* and reverse directions.
|
||||
*/
|
||||
public SQuad<Long> reduce( SQuad<Integer> newCvg, SQuad<Long> oldCvg) {
|
||||
return new SQuad<Long>(newCvg.getFirst() + oldCvg.getFirst(), newCvg.getSecond() + oldCvg.getSecond(),
|
||||
newCvg.getThird() + oldCvg.getThird(), newCvg.getFourth() + oldCvg.getFourth());
|
||||
}
|
||||
/* MAP
|
||||
* Get the coverage in each direction above and below threshold and calculate the estimated power
|
||||
* if printing is not suppressed print this information to the file
|
||||
* pass coverage information on to Reduce
|
||||
*/
|
||||
public SQuad<Integer> map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
ReadOffsetQuad readsByDirection = splitReadsByDirection(context.getReads(),context.getOffsets());
|
||||
ReadOffsetQuad readsByDirectionAboveThresh;
|
||||
|
||||
if(minQ > 0) {
|
||||
readsByDirectionAboveThresh = thresholdReadsByQuality(readsByDirection);
|
||||
} else {
|
||||
readsByDirectionAboveThresh = readsByDirection;
|
||||
}
|
||||
|
||||
|
||||
SQuad<Byte> medianQScoresAboveThresh = getMedianQScores(readsByDirectionAboveThresh);
|
||||
SQuad<Double> powerAboveThresh = calculatePower(readsByDirectionAboveThresh, medianQScoresAboveThresh);
|
||||
SQuad<Byte> medianQScoresUnthresh;
|
||||
SQuad<Double> powerUnthresh;
|
||||
|
||||
if(! outputRawStatistics) {
|
||||
medianQScoresUnthresh = null;
|
||||
powerUnthresh = null;
|
||||
} else {
|
||||
medianQScoresUnthresh = getMedianQScores(readsByDirection);
|
||||
powerUnthresh= calculatePower(readsByDirection, medianQScoresUnthresh);
|
||||
}
|
||||
|
||||
if ( ! suppressPrinting && outputWriter == null) {
|
||||
out.printf("%s%n",outputString(readsByDirection,readsByDirectionAboveThresh,powerAboveThresh, powerUnthresh,
|
||||
medianQScoresAboveThresh, medianQScoresUnthresh, context.getLocation()));
|
||||
} else if (! suppressPrinting && outputWriter != null) {
|
||||
outputWriter.printf("%s%n",outputString(readsByDirection,readsByDirectionAboveThresh,powerAboveThresh, powerUnthresh,
|
||||
medianQScoresAboveThresh, medianQScoresUnthresh, context.getLocation()));
|
||||
}
|
||||
|
||||
return new SQuad<Integer>(readsByDirection.numReadsFirst(), readsByDirection.numReadsSecond(),
|
||||
readsByDirectionAboveThresh.numReadsFirst(), readsByDirectionAboveThresh.numReadsSecond());
|
||||
|
||||
}
|
||||
|
||||
// private helper methods
|
||||
|
||||
/*
|
||||
* Make different headers for the output file depending on printing-related command line arguments
|
||||
*/
|
||||
private String generateHeaderString() {
|
||||
String header;
|
||||
if (minQ <= 0) {
|
||||
header = "Chrom:Pos CoverageFwd CoverageRev CoverageCom MedianQFwd MedianQRev MedianQCom PowerFwd PowerRev PowerCom";
|
||||
} else if (! aboveQScoreOutputOnly && ! outputRawStatistics){
|
||||
String m = Byte.toString(minQ);
|
||||
header = "Chrom:Pos CoverageFwd CoverageRev CoverageCom CoverageAboveQ"+m+"Fwd CoverageAboveQ"+m+"Rev CoverageAboveQ"+m+"Com MedianQFwd MedianQRev MedianQCom PowerFwd PowerRev PowerCom";
|
||||
} else if (! outputRawStatistics){
|
||||
header = "Chrom:Pos CoverageFwd CoverageRev CoverageCom MedianQFwd MedianQRev MedianQCom PowerFwd PowerRev PowerCom";
|
||||
} else {
|
||||
String m = Byte.toString(minQ);
|
||||
header = "Chrom:Pos CoverageFwd CoverageRev CoverageCom CoverageAboveQ"+m+"Fwd CoverageAboveQ"+m+"Rev "+
|
||||
"CoverageAboveQ"+m+"Com RawMedianQFwd RawMedianQRev RawMedianQCom MedianQAboveQ"+m+"Fwd "+
|
||||
"MedianQAboveQ"+m+"Rev MedianQAboveQ"+m+"Com RawPowerFwd RawPowerRev RawPowerCom "+
|
||||
"PowerAboveQ"+m+"Fwd PowerAboveQ"+m+"Rev PowerAboveQ"+m+"Com";
|
||||
}
|
||||
|
||||
return header;
|
||||
}
|
||||
/*
|
||||
* Make different output strings depending on printing-related command-line options
|
||||
*/
|
||||
private String outputString(ReadOffsetQuad reads, ReadOffsetQuad threshReads, SQuad<Double> pow, SQuad<Double> rawPow, SQuad<Byte> medQ, SQuad<Byte> rawQ, GenomeLoc loc) {
|
||||
String outputString;
|
||||
if(minQ <= 0) {
|
||||
outputString = String.format("%s %d %d %d %d %d %d %f %f %f", loc.toString(), reads.numReadsFirst(), reads.numReadsSecond(), reads.numReads(),
|
||||
medQ.getFirst(), medQ.getSecond(), medQ.getThird(), pow.getFirst(), pow.getSecond(), pow.getThird());
|
||||
} else if (! aboveQScoreOutputOnly && ! outputRawStatistics ){
|
||||
outputString = String.format("%s %d %d %d %d %d %d %d %d %d %f %f %f", loc.toString(), reads.numReadsFirst(), reads.numReadsSecond(), reads.numReads(),
|
||||
threshReads.numReadsFirst(), threshReads.numReadsSecond(), threshReads.numReads(), medQ.getFirst(),
|
||||
medQ.getSecond(), medQ.getThird(), pow.getFirst(), pow.getSecond(), pow.getThird());
|
||||
} else if (! outputRawStatistics ){
|
||||
outputString = String.format("%s %d %d %d %d %d %d %f %f %f", loc.toString(), threshReads.numReadsFirst(), threshReads.numReadsSecond(),
|
||||
threshReads.numReads(), medQ.getFirst(), medQ.getSecond(), medQ.getThird(), pow.getFirst(),
|
||||
pow.getSecond(), pow.getThird());
|
||||
} else {
|
||||
outputString = String.format("%s %d %d %d %d %d %d %d %d %d %d %d %d %f %f %f %f %f %f", loc.toString(), reads.numReadsFirst(),
|
||||
reads.numReadsSecond(), reads.numReads(), threshReads.numReadsFirst(), threshReads.numReadsSecond(),
|
||||
threshReads.numReads(), rawQ.getFirst(), rawQ.getSecond(), rawQ.getThird(), medQ.getFirst(),
|
||||
medQ.getSecond(), medQ.getThird(), rawPow.getFirst(), rawPow.getSecond(), rawPow.getThird(),
|
||||
pow.getFirst(), pow.getSecond(), pow.getThird());
|
||||
}
|
||||
return outputString;
|
||||
}
|
||||
|
||||
private ReadOffsetQuad splitReadsByDirection(List<SAMRecord> reads, List<Integer> offsets) {
|
||||
return PoolUtils.splitReadsByReadDirection(reads,offsets);
|
||||
}
|
||||
|
||||
private ReadOffsetQuad thresholdReadsByQuality(ReadOffsetQuad readQuad) {
|
||||
return new ReadOffsetQuad(PoolUtils.thresholdReadsByQuality(readQuad.getFirstReadOffsetPair(), minQ),
|
||||
PoolUtils.thresholdReadsByQuality(readQuad.getSecondReadOffsetPair(), minQ));
|
||||
}
|
||||
|
||||
private SQuad<Byte> getMedianQScores(ReadOffsetQuad reads) {
|
||||
return new SQuad<Byte>(ListUtils.getQScoreMedian(reads.getFirstReads(), reads.getFirstOffsets()),
|
||||
ListUtils.getQScoreMedian(reads.getSecondReads(), reads.getSecondOffsets()),
|
||||
ListUtils.getQScoreMedian(reads.getReadsCombined(), reads.getOffsetsCombined()),
|
||||
(byte) 0);
|
||||
}
|
||||
|
||||
private SQuad<Double> calculatePower(ReadOffsetQuad readQuad, SQuad<Byte> medianQs) {
|
||||
SQuad<Double> power;
|
||||
if( bootstrapIterations > 0 ) {
|
||||
power = bootstrapPowerByDirection(readQuad);
|
||||
} else {
|
||||
power = theoreticalPowerByDirection(readQuad,medianQs);
|
||||
}
|
||||
return power;
|
||||
}
|
||||
|
||||
private SQuad<Double> theoreticalPowerByDirection(ReadOffsetQuad readQuad, SQuad<Byte> medianQs) {
|
||||
return new SQuad<Double>( theoreticalPower(readQuad.numReadsFirst(), medianQs.getFirst()),
|
||||
theoreticalPower(readQuad.numReadsSecond(), medianQs.getSecond()),
|
||||
theoreticalPower(readQuad.numReads(), medianQs.getThird()),
|
||||
0.0 );
|
||||
}
|
||||
|
||||
private SQuad<Double> bootstrapPowerByDirection(ReadOffsetQuad readQuad) {
|
||||
return new SQuad<Double> ( bootstrapPower(readQuad.getFirstReads(), readQuad.getFirstOffsets()),
|
||||
bootstrapPower(readQuad.getSecondReads(), readQuad.getSecondOffsets()),
|
||||
bootstrapPower(readQuad.getReadsCombined(), readQuad.getOffsetsCombined()),
|
||||
0.0 );
|
||||
}
|
||||
|
||||
private double theoreticalPower(int depth, byte q) {
|
||||
double power;
|
||||
if( depth <= 0 ) {
|
||||
power = 0.0;
|
||||
} else {
|
||||
double p_error = QualityUtils.qualToErrorProb(q);
|
||||
double snpProp = getSNPProportion();
|
||||
double kterm_num = Math.log10( snpProp * (1 - p_error) + (1 - snpProp) * (p_error/3) );
|
||||
double kterm_denom = Math.log10( p_error/3 );
|
||||
double dkterm_num = Math.log10( snpProp * (p_error/3) + (1 - snpProp) * (1 - p_error) );
|
||||
double dkterm_denom = Math.log10( 1 - p_error);
|
||||
|
||||
int kaccept = (int) Math.ceil( ( lodThresh - ( (double) depth ) * ( dkterm_num - dkterm_denom ) ) /
|
||||
( kterm_num - kterm_denom- dkterm_num + dkterm_denom ) );
|
||||
|
||||
// we will reject the null hypothesis if we see kaccept or more SNPs, the power is the probability that this occurs
|
||||
// we can optimize this by checking to see which sum is smaller
|
||||
|
||||
if ( depth - kaccept < kaccept ) {// kaccept > depth/2 - calculate power as P[hits between kaccept and depth]
|
||||
power = MathUtils.cumBinomialProbLog(kaccept, depth, depth, snpProp);
|
||||
} else { // kaccept < depth/2 - calculate power as 1-P[hits between 0 and kaccept]
|
||||
power = 1-MathUtils.cumBinomialProbLog(0,kaccept,depth,snpProp);
|
||||
}
|
||||
}
|
||||
|
||||
return power;
|
||||
}
|
||||
|
||||
private double bootstrapPower(List<SAMRecord> reads, List<Integer> offsets) {
|
||||
double power = 0.0;
|
||||
double snpProp = getSNPProportion();
|
||||
for ( int boot = 0; boot < bootstrapIterations; boot ++) {
|
||||
ReadOffsetQuad partition = PoolUtils.coinTossPartition(reads, offsets, getSNPProportion());
|
||||
if ( PoolUtils.calculateLogLikelihoodOfSample(partition, snpProp) >= lodThresh ) {
|
||||
power += 1.0/bootstrapIterations;
|
||||
}
|
||||
}
|
||||
return power;
|
||||
}
|
||||
|
||||
private double getSNPProportion() {
|
||||
return 1/(2.0*numIndividuals);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -44,6 +44,12 @@ public class PowerBelowFrequencyWalker extends LocusWalker<Integer,Integer> {
|
|||
@Argument(fullName="minimumMappingQuality", shortName="mmq", doc="Only use reads above this mapping quality in the power calculation", required=false)
|
||||
int minMappingQuality = -1;
|
||||
|
||||
@Argument(fullName="ignoreForwardReads",doc="Use the forward reads at a site. Defaults to true.", required = false)
|
||||
boolean ignoreForwardReads = true;
|
||||
|
||||
@Argument(fullName="ignoreReverseReads",doc="Use the reverse reads at a site. Defaults to true.", required = false)
|
||||
boolean ignoreReverseReads = true;
|
||||
|
||||
public void initialize() {
|
||||
if ( alleleFreq < 1 ) {
|
||||
String err = "Allele frequency (-af) must be greater than or equal to one.";
|
||||
|
|
@ -64,6 +70,16 @@ public class PowerBelowFrequencyWalker extends LocusWalker<Integer,Integer> {
|
|||
String output = String.format("%s", context.getLocation().toString());
|
||||
|
||||
// threshold reads if necessary
|
||||
if ( ignoreForwardReads && ignoreReverseReads) {
|
||||
throw new StingException("User has elected to ignore both forward and reverse reads. Power is zero.");
|
||||
}
|
||||
else if ( ! ignoreForwardReads && ignoreReverseReads ) {
|
||||
org.broadinstitute.sting.playground.gatk.walkers.poolseq.ReadOffsetQuad rq = PoolUtils.splitReadsByReadDirection(context.getReads(),context.getOffsets());
|
||||
context = new AlignmentContext(context.getLocation(),rq.getFirstReads(),rq.getFirstOffsets());
|
||||
} else if ( ignoreForwardReads && ! ignoreReverseReads ) {
|
||||
org.broadinstitute.sting.playground.gatk.walkers.poolseq.ReadOffsetQuad rq = PoolUtils.splitReadsByReadDirection(context.getReads(),context.getOffsets());
|
||||
context = new AlignmentContext(context.getLocation(),rq.getSecondReads(),rq.getSecondOffsets());
|
||||
}
|
||||
|
||||
if ( minQ > 0 ) {
|
||||
Pair<List<SAMRecord>, List<Integer>> thresh = PoolUtils.thresholdReadsByQuality(context.getReads(),context.getOffsets(),minQ);
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
package org.broadinstitute.sting.playground.utils;
|
||||
package org.broadinstitute.sting.playground.gatk.walkers.poolseq;
|
||||
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package org.broadinstitute.sting.playground.utils;
|
||||
package org.broadinstitute.sting.playground.gatk.walkers.poolseq;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package org.broadinstitute.sting.playground.utils;
|
||||
package org.broadinstitute.sting.playground.gatk.walkers.poolseq;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -8,6 +8,7 @@ import java.util.ArrayList;
|
|||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.poolseq.ReadOffsetQuad;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -21,39 +22,6 @@ public class PoolUtils {
|
|||
private PoolUtils() {
|
||||
}
|
||||
|
||||
public static final int BASE_A_OFFSET = 0;
|
||||
public static final int BASE_C_OFFSET = 1;
|
||||
public static final int BASE_G_OFFSET = 2;
|
||||
public static final int BASE_T_OFFSET = 3;
|
||||
public static final int BASE_INDEXED_ARRAY_SIZE = 4;
|
||||
|
||||
public static Pair<List<SAMRecord>, List<Integer>> splitReadsByIndels( List<SAMRecord> reads, List<Integer> offsets, boolean returnBases ) {
|
||||
|
||||
List<SAMRecord> baseReads = new ArrayList<SAMRecord>();
|
||||
List<SAMRecord> indelReads = new ArrayList<SAMRecord>();
|
||||
List<Integer> baseOffsets = new ArrayList<Integer>();
|
||||
List<Integer> indelOffsets = new ArrayList<Integer>();
|
||||
|
||||
|
||||
for ( int r = 0; r < reads.size(); r ++ ) {
|
||||
SAMRecord read = reads.get(r);
|
||||
int offset = offsets.get(r);
|
||||
if (BaseUtils.isRegularBase( (char) read.getReadBases()[offset] ) ) {
|
||||
baseReads.add(read);
|
||||
baseOffsets.add(offset);
|
||||
} else {
|
||||
indelReads.add(read);
|
||||
indelOffsets.add(offset);
|
||||
}
|
||||
}
|
||||
|
||||
if (returnBases) {
|
||||
return new Pair<List<SAMRecord>,List<Integer>>(baseReads,baseOffsets);
|
||||
} else {
|
||||
return new Pair<List<SAMRecord>,List<Integer>>(indelReads,indelOffsets);
|
||||
}
|
||||
}
|
||||
|
||||
public static ReadOffsetQuad splitReadsByReadDirection(List<SAMRecord> reads, List<Integer> offsets) {
|
||||
ArrayList<SAMRecord> forwardReads;
|
||||
ArrayList<SAMRecord> reverseReads;
|
||||
|
|
@ -66,10 +34,10 @@ public class PoolUtils {
|
|||
forwardOffsets = null;
|
||||
reverseOffsets = null;
|
||||
} else {
|
||||
forwardReads = new ArrayList();
|
||||
reverseReads = new ArrayList();
|
||||
forwardOffsets = new ArrayList();
|
||||
reverseOffsets = new ArrayList();
|
||||
forwardReads = new ArrayList<SAMRecord>();
|
||||
reverseReads = new ArrayList<SAMRecord>();
|
||||
forwardOffsets = new ArrayList<Integer>();
|
||||
reverseOffsets = new ArrayList<Integer>();
|
||||
|
||||
for (int readNo = 0; readNo < reads.size(); readNo++) {
|
||||
if (reads.get(readNo).getReadNegativeStrandFlag()) {
|
||||
|
|
@ -85,44 +53,6 @@ public class PoolUtils {
|
|||
return new ReadOffsetQuad(forwardReads,forwardOffsets,reverseReads,reverseOffsets);
|
||||
}
|
||||
|
||||
public static Pair<List<SAMRecord>[], List<Integer>[]> splitReadsByBase(List<SAMRecord> reads, List<Integer> offsets) {
|
||||
ArrayList<SAMRecord>[] readsByBase;
|
||||
ArrayList<Integer>[] offsetsByBase;
|
||||
if (reads == null) {
|
||||
readsByBase = null;
|
||||
offsetsByBase = null;
|
||||
} else {
|
||||
readsByBase = new ArrayList[4];
|
||||
offsetsByBase = new ArrayList[4];
|
||||
for (int readNum = 0; readNum < reads.size(); readNum++) {
|
||||
switch (reads.get(readNum).getReadBases()[offsets.get(readNum)]) {
|
||||
case 'A':
|
||||
case 'a':
|
||||
readsByBase[BASE_A_OFFSET].add(reads.get(readNum));
|
||||
offsetsByBase[BASE_A_OFFSET].add(offsets.get(readNum));
|
||||
break;
|
||||
case 'C':
|
||||
case 'c':
|
||||
readsByBase[BASE_C_OFFSET].add(reads.get(readNum));
|
||||
offsetsByBase[BASE_C_OFFSET].add(offsets.get(readNum));
|
||||
break;
|
||||
case 'G':
|
||||
case 'g':
|
||||
readsByBase[BASE_G_OFFSET].add(reads.get(readNum));
|
||||
offsetsByBase[BASE_G_OFFSET].add(offsets.get(readNum));
|
||||
break;
|
||||
case 'T':
|
||||
case 't':
|
||||
readsByBase[BASE_T_OFFSET].add(reads.get(readNum));
|
||||
offsetsByBase[BASE_T_OFFSET].add(offsets.get(readNum));
|
||||
break;
|
||||
default:
|
||||
break; // TODO: INDEL AWARENESS
|
||||
}
|
||||
}
|
||||
}
|
||||
return new Pair(readsByBase, offsetsByBase);
|
||||
}
|
||||
|
||||
public static Pair<List<SAMRecord>, List<Integer>> thresholdReadsByQuality(List<SAMRecord> reads, List<Integer> offsets, byte qThresh) {
|
||||
List<SAMRecord> threshReads;
|
||||
|
|
@ -134,8 +64,8 @@ public class PoolUtils {
|
|||
threshReads = reads;
|
||||
threshOffsets = offsets;
|
||||
} else {
|
||||
threshReads = new ArrayList();
|
||||
threshOffsets = new ArrayList();
|
||||
threshReads = new ArrayList<SAMRecord>();
|
||||
threshOffsets = new ArrayList<Integer>();
|
||||
|
||||
for (int readNo = 0; readNo < reads.size(); readNo++) {
|
||||
if (reads.get(readNo).getBaseQualities()[offsets.get(readNo)] >= qThresh) {
|
||||
|
|
@ -172,109 +102,5 @@ public class PoolUtils {
|
|||
return new Pair<List<SAMRecord>,List<Integer>>(goodMapReads,goodMapOffsets);
|
||||
}
|
||||
|
||||
public static Pair<List<SAMRecord>,List<Integer>> thresholdReadsByQuality(Pair<List<SAMRecord>,List<Integer>> readPair, byte qThresh) {
|
||||
return thresholdReadsByQuality(readPair.getFirst(),readPair.getSecond(),qThresh);
|
||||
}
|
||||
|
||||
public static int getBaseOffset(char base) {
|
||||
switch (base) {
|
||||
case 'A':
|
||||
case 'a':
|
||||
return getBaseAOffset();
|
||||
case 'C':
|
||||
case 'c':
|
||||
return getBaseCOffset();
|
||||
case 'G':
|
||||
case 'g':
|
||||
return getBaseGOffset();
|
||||
case 'T':
|
||||
case 't':
|
||||
return getBaseTOffset();
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
//TODO: indel offsets
|
||||
}
|
||||
|
||||
public static int getBaseAOffset() {
|
||||
return BASE_A_OFFSET;
|
||||
}
|
||||
|
||||
public static int getBaseCOffset() {
|
||||
return BASE_C_OFFSET;
|
||||
}
|
||||
|
||||
public static int getBaseGOffset() {
|
||||
return BASE_G_OFFSET;
|
||||
}
|
||||
|
||||
public static int getBaseTOffset() {
|
||||
return BASE_T_OFFSET;
|
||||
}
|
||||
|
||||
public static List<Byte> getReadBaseQualities(List<SAMRecord> reads, List<Integer> offsets) {
|
||||
List<Byte> qualities = new ArrayList<Byte>(reads.size());
|
||||
for (int readNo = 0; readNo < reads.size(); readNo++) {
|
||||
qualities.add(reads.get(readNo).getBaseQualities()[offsets.get(readNo)]);
|
||||
}
|
||||
|
||||
return qualities;
|
||||
}
|
||||
|
||||
public static double calculateLogLikelihoodOfSample(ReadOffsetQuad readsBySupport, double alleleFreq) {
|
||||
List<Byte> qSNP = getReadBaseQualities(readsBySupport.getFirstReads(), readsBySupport.getFirstOffsets());
|
||||
List<Byte> qRef = getReadBaseQualities(readsBySupport.getSecondReads(), readsBySupport.getSecondOffsets());
|
||||
Pair<Double,Double> logsumSNP = qListToSumLogProbabilities(true, qSNP, 1/alleleFreq);
|
||||
Pair<Double,Double> logsumRef = qListToSumLogProbabilities(false, qRef, 1/alleleFreq);
|
||||
return 0.0 - logsumSNP.getFirst() - logsumRef.getFirst() + logsumSNP.getSecond() + logsumRef.getSecond();
|
||||
}
|
||||
|
||||
public static double calculateLogLikelihoodOfSample(Pair<Pair<List<SAMRecord>,List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> snpReadsRefReads, int nIndivids) {
|
||||
List<Byte> qListSnps = getReadBaseQualities(snpReadsRefReads.getFirst().getFirst(),snpReadsRefReads.getSecond().getFirst());
|
||||
List<Byte> qListRefs = getReadBaseQualities(snpReadsRefReads.getFirst().getSecond(),snpReadsRefReads.getSecond().getSecond());
|
||||
Pair<Double,Double> logsumSNP = qListToSumLogProbabilities(true,qListSnps, 2.0*nIndivids);
|
||||
Pair<Double,Double> logsumRef = qListToSumLogProbabilities(false,qListRefs, 2.0*nIndivids);
|
||||
|
||||
return 0.0 - logsumSNP.first - logsumRef.first + logsumSNP.second + logsumRef.second;
|
||||
}
|
||||
|
||||
|
||||
public static Pair<Double,Double> qListToSumLogProbabilities(boolean listRepresentsSNPObservations, List<Byte> qList, double denom)
|
||||
{
|
||||
double logProbObserveXAndSNPTrue = 0; // note "error" for SNP is observing a ref
|
||||
double logProbObserveXAndRefTrue = 0;// and "error" for ref is observing a SNP
|
||||
|
||||
for (byte qual : qList) {
|
||||
double p_err = QualityUtils.qualToErrorProb(qual);
|
||||
if (listRepresentsSNPObservations) {
|
||||
logProbObserveXAndSNPTrue += Math.log10((1 - p_err) / denom +((denom - 1)*p_err) / denom);
|
||||
logProbObserveXAndRefTrue += Math.log10(p_err);
|
||||
} else {
|
||||
logProbObserveXAndSNPTrue += Math.log10((denom - 1) * (1 - p_err)/denom + p_err/denom);
|
||||
logProbObserveXAndRefTrue+= Math.log10(1 -p_err);
|
||||
}
|
||||
}
|
||||
|
||||
return new Pair<Double,Double>(logProbObserveXAndSNPTrue,logProbObserveXAndRefTrue);
|
||||
}
|
||||
|
||||
public static ReadOffsetQuad coinTossPartition(List<SAMRecord> reads, List<Integer> offsets, double alleleFreq) {
|
||||
ArrayList<SAMRecord> snpReads = new ArrayList<SAMRecord>();
|
||||
ArrayList<Integer> snpOffsets = new ArrayList<Integer>();
|
||||
ArrayList<SAMRecord> refReads = new ArrayList<SAMRecord>();
|
||||
ArrayList<Integer> refOffsets = new ArrayList<Integer>();
|
||||
|
||||
for( int i = 0; i < reads.size(); i ++ ) {
|
||||
if ( Math.random() < alleleFreq ) {
|
||||
snpReads.add(reads.get(i));
|
||||
snpOffsets.add(offsets.get(i));
|
||||
} else {
|
||||
refReads.add(reads.get(i));
|
||||
refOffsets.add(offsets.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
return new ReadOffsetQuad(snpReads,snpOffsets,refReads,refOffsets);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,6 +33,8 @@ public class ReadBackedPileup extends BasicPileup {
|
|||
this.offsets = offsets;
|
||||
}
|
||||
|
||||
|
||||
|
||||
public int size() { return reads.size(); }
|
||||
public List<SAMRecord> getReads() { return reads; }
|
||||
public List<Integer> getOffsets() { return offsets; }
|
||||
|
|
|
|||
|
|
@ -13,8 +13,14 @@ import java.util.Arrays;
|
|||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class BaseTransitionTableCalculatorJavaIntegrationTest extends WalkerTest{
|
||||
|
||||
public static final String OUTPUT_MD5 = "856a3124fb078b4d820f935369ec53cc";
|
||||
// MD5s last computed 10/20 at revision 1877
|
||||
public static final String OUTPUT_MD5_STANDARD = "856a3124fb078b4d820f935369ec53cc";
|
||||
public static final String OUTPUT_MD5_3MISMATCHES = "847375e3d40415d237c627a56d163760";
|
||||
public static final String OUTPUT_MD5_LOWMAPPINGQUALITY = "d7e490168dfef8eb4185b596f55bfaa8";
|
||||
public static final String OUTPUT_MD5_LOWQUALITYSCORE = "e3f3c3875936b0df7a8899c2ef5d1dd2";
|
||||
public static final String OUTPUT_MD5_LOWCONFIDENTREFTHRESHOLD = "521a9fa7716ed22550c2ba3fe3409070";
|
||||
public static final String OUTPUT_MD5_HIGHCONFIDENTREFTHRESHOLD = "8ab6d389fc494881736e9a58126c2f1b";
|
||||
public static final String OUTPUT_MD5_ALLARGUMENTS = "f45481946d7a5c70078d432b0baff083";
|
||||
public static final String LOCUS = "1:10,000,000-10,200,000";
|
||||
public static final String BAM_FILE = "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam";
|
||||
public static final String REFERENCE = "/broad/1KG/reference/human_b36_both.fasta";
|
||||
|
|
@ -22,7 +28,49 @@ public class BaseTransitionTableCalculatorJavaIntegrationTest extends WalkerTest
|
|||
@Test
|
||||
public void testBaseTransitionTableCalculatorJava() {
|
||||
String args = "-T BaseTransitionTableCalculatorJava -o %s -I "+BAM_FILE+" -L "+LOCUS+" -R "+REFERENCE;
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(args,1,Arrays.asList(OUTPUT_MD5));
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(args,1,Arrays.asList(OUTPUT_MD5_STANDARD));
|
||||
executeTest("testBaseTransitionTableCalculatorJava", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBaseTransitionTableCalculatorJavaAdditionalMismatches() {
|
||||
String args = "-T BaseTransitionTableCalculatorJava -o %s -I "+BAM_FILE+" -L "+LOCUS+" -R "+REFERENCE+" --maxNumMismatches 3";
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(args,1,Arrays.asList(OUTPUT_MD5_3MISMATCHES));
|
||||
executeTest("BaseTransitionTableCalculatorJava: Additional Mismatches",spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBaseTransitionCalculatorJavaLowMapingQuality() {
|
||||
String args = "-T BaseTransitionTableCalculatorJava -o %s -I "+BAM_FILE+" -L "+LOCUS+" -R "+REFERENCE+ " --minMappingQuality 5";
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(args,1,Arrays.asList(OUTPUT_MD5_LOWMAPPINGQUALITY));
|
||||
executeTest("BaseTransitionTableCalculatorJava: Low Mapping Quality",spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBaseTransitionCalculatorJavaLowQualityScore() {
|
||||
String args = "-T BaseTransitionTableCalculatorJava -o %s -I "+BAM_FILE+" -L "+LOCUS+" -R "+REFERENCE+ " --minQualityScore 5";
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(args,1,Arrays.asList(OUTPUT_MD5_LOWQUALITYSCORE));
|
||||
executeTest("BaseTransitionTableCalculatorJava: Low Quality Score",spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBaseTransitionCalculatorJavaLowConfidentRefThreshold() {
|
||||
String args = "-T BaseTransitionTableCalculatorJava -o %s -I "+BAM_FILE+" -L "+LOCUS+" -R "+REFERENCE+ " --confidentRefThreshold 2";
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(args,1,Arrays.asList(OUTPUT_MD5_LOWCONFIDENTREFTHRESHOLD));
|
||||
executeTest("BaseTransitionTableCalculatorJava: Low Ref Threshold",spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBaseTransitionCalculatorJavaHighConfidentRefThreshold() {
|
||||
String args = "-T BaseTransitionTableCalculatorJava -o %s -I "+BAM_FILE+" -L "+LOCUS+" -R "+REFERENCE+ " --confidentRefThreshold 8";
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(args,1,Arrays.asList(OUTPUT_MD5_HIGHCONFIDENTREFTHRESHOLD));
|
||||
executeTest("BaseTransitionTableCalculatorJava: Low Ref Threshold",spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBaseTransitionCalculatorJavaAllArguments() {
|
||||
String args = "-T BaseTransitionTableCalculatorJava -o %s -I "+BAM_FILE+" -L "+LOCUS+" -R "+REFERENCE+ " --confidentRefThreshold 2 --minQualityScore 5 --minMappingQuality 5 --maxNumMismatches 3";
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(args,1,Arrays.asList(OUTPUT_MD5_ALLARGUMENTS));
|
||||
executeTest("BaseTransitionTableCalculatorJava: Low Ref Threshold, Low Q Score, Low Mapping Quality, Additional Mismatches",spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,8 +41,9 @@ class TransitionTable() {
|
|||
}
|
||||
|
||||
def header(): String = {
|
||||
return Utils.join("\t", mapEntries((i,j,x) => "P(%c|true=%c)" format (fromKey(j), fromKey(i))).toArray)
|
||||
}
|
||||
//return Utils.join("\t", mapEntries((i,j,x) => "P(%c|true=%c)" format (fromKey(j), fromKey(i))).toArray)
|
||||
return Utils.join("\t", mapEntries((i,j,x) => "[%c|%c]" format (fromKey(j), fromKey(i))).toArray)
|
||||
}
|
||||
|
||||
override def toString(): String = {
|
||||
return Utils.join("\t", mapEntries((i,j,x) => "%.2f" format x).toArray)
|
||||
|
|
@ -106,7 +107,10 @@ class BaseTransitionTableCalculator extends LocusWalker[Unit,Int] {
|
|||
|
||||
nonRefBases.length match {
|
||||
case 1 =>
|
||||
addRead(nonRefRead, offsets.head, nonRefBases.head, ref, table1)
|
||||
//if ( goodRead(nonRefRead,offsets.head) ) {
|
||||
//printf("Including site:%n nonRefBases=%s%n refBases=%s%n nonRefRead=%s%n offset=%s%n", nonRefBases, refBases, nonRefRead.format, offsets.head)
|
||||
//}
|
||||
addRead(nonRefRead, offsets.head, nonRefBases.head, ref, table1)
|
||||
addRead(nonRefRead, offsets.head, nonRefBases.head, ref, if ( nonRefRead.getReadNegativeStrandFlag ) tableREV else tableFWD )
|
||||
case 2 =>
|
||||
addRead(nonRefRead, offsets.head, nonRefBases.head, ref, table2)
|
||||
|
|
@ -128,7 +132,7 @@ class BaseTransitionTableCalculator extends LocusWalker[Unit,Int] {
|
|||
|
||||
def addRead(nonRefRead: SAMRecord, offset: Integer, nonRefBase: Char, ref: ReferenceContext, table: TransitionTable): Unit = {
|
||||
if ( goodRead(nonRefRead, offset) ) {
|
||||
//printf("Including site:%n call=%s%n nonRefBases=%s%n refBases=%s%n nonRefRead=%s%n offset=%s%n", call, nonRefBases, refBases, nonRefRead.format, offsets.head)
|
||||
//printf("Including site:%n call=%s%n nonRefBases=%s%n refBases=%s%n nonRefRead=%s%n offset=%s%n", nonRefBases, refBases, nonRefRead.format, offsets.head)
|
||||
//println(call.getReadDepth, call.getReferencebase, nonRefBases, refBases, nonRefRead.getMappingQuality)
|
||||
if ( CARE_ABOUT_STRAND && nonRefRead.getReadNegativeStrandFlag() ) {
|
||||
// it's on the reverse strand
|
||||
|
|
@ -180,7 +184,7 @@ class BaseTransitionTableCalculator extends LocusWalker[Unit,Int] {
|
|||
if ( call == null )
|
||||
return false;
|
||||
|
||||
return ! call.getFirst().get(0).isVariant() && call.getFirst().get(0).getNegLog10PError() > MIN_LOD
|
||||
return (! call.getFirst().get(0).isVariant()) && call.getFirst().get(0).getNegLog10PError() > MIN_LOD
|
||||
}
|
||||
|
||||
def reduceInit(): Int = {
|
||||
|
|
|
|||
Loading…
Reference in New Issue