More sophisticated version of the NQSCovariantWalker - modified to be more explicit about how much higher the

quality score of a particular base is than the quality score of its neighbors. The granularity of the binning
jumps from 32 groups to 860 groups.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1702 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2009-09-23 19:18:24 +00:00
parent 856bbd0320
commit 8cdb78ebee
1 changed files with 154 additions and 0 deletions

View File

@ -0,0 +1,154 @@
package org.broadinstitute.sting.playground.gatk.walkers.Recalibration;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.QualityUtils;
import java.util.List;
import net.sf.samtools.SAMRecord;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: Sep 23, 2009
* Time: 11:26:52 AM
* To change this template use File | Settings | File Templates.
*/
public class NQSExtendedGroupsCovariantWalker extends LocusWalker<LocalMapType, long[][][]> {
final int NEIGHBORHOOD_SIZE = 5;
final int MM_OFFSET = 1;
final int COUNT_OFFSET = 0;
final int MAX_QSCORE = QualityUtils.MAX_REASONABLE_Q_SCORE;
int NQS_GROUPS;
public static final String DATA_FORMAT = "%d\t%d\t%d\t%d\t%f%n";
public static final String TEXT_FORMAT = "%s\t%s\t%s\t%s\t%s%n";
// Walker methods
public void initialize() {
NQS_GROUPS = (MAX_QSCORE)*(MAX_QSCORE+1)/2 + MAX_QSCORE;
// this is from solving the recurrent equation
// a_{n+1} = a_n + n + 1
}
public long[][][] reduceInit() {
long[][][] covariantCounts = new long[NQS_GROUPS][2][MAX_QSCORE];
for ( int i = 0; i < NQS_GROUPS; i ++ ) {
for ( int j = 0; j < 2; j++ ) {
for ( int k = 0; k < MAX_QSCORE; k ++) {
covariantCounts[i][j][k] = 0;
}
}
}
return covariantCounts;
}
public LocalMapType map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
return new LocalMapType(context, ref, tracker);
}
public long[][][] reduce( LocalMapType map, long[][][] cumulativeBins ) {
for ( int i = 0; i < map.numReads(); i ++ ) {
int groupNumber = calcGroupNumber(i, map.context);
int qualityAsInt = getBaseQualityAsInt(map.context,i);
cumulativeBins[groupNumber][COUNT_OFFSET][qualityAsInt] ++;
if ( ! isRef(i, map.context, map.ref ) ) {
cumulativeBins[groupNumber][MM_OFFSET][qualityAsInt] ++;
}
}
return cumulativeBins;
}
public void onTraversalDone(long[][][] cumulativeBins) {
printNQSMismatchTable(cumulativeBins);
}
// Map-Reduce helper methods
public int calcGroupNumber( int read, AlignmentContext context ) {
return calcGroupNumberFromQualities(getBaseQualityAsInt(context, read), getMinNeighborhoodBaseQualityAsInt(context, read));
}
/*
* calculates group number as
* #: nghd left base nghd right
* 0: 000 0 000
* 1: 000 1 000
* 2: 111 1 111
* 3: 000 2 000
* 4: 111 2 111
* 5: 222 2 222
* 6: 000 3 000
* 7: 111 3 111
* etc
*/
public int calcGroupNumberFromQualities( int baseQuality, int minNeighborhoodQuality ) {
int groupNumber = (baseQuality)*(baseQuality+1)/2 + baseQuality;
if ( minNeighborhoodQuality <= baseQuality ) {
groupNumber += minNeighborhoodQuality;
}
return groupNumber;
}
public int getBaseQualityAsInt( AlignmentContext context, int read ) {
return (int) context.getReads().get(read).getBaseQualities()[context.getOffsets().get(read)];
}
public int getMinNeighborhoodBaseQualityAsInt( AlignmentContext context, int read ) {
byte[] qualities = context.getReads().get(read).getBaseQualities();
int offset = context.getOffsets().get(read);
int readLength = qualities.length;
int start = (offset - NEIGHBORHOOD_SIZE < readLength) ? 0 : offset - NEIGHBORHOOD_SIZE;
int end = (offset + NEIGHBORHOOD_SIZE > readLength) ? readLength : offset+NEIGHBORHOOD_SIZE;
byte minQuality = Byte.MAX_VALUE;
for ( int i = start; i < end; i ++ ) {
if ( i != offset ) {
if ( qualities[i] < minQuality ) {
minQuality = qualities[i];
}
}
}
return (int) minQuality;
}
public boolean isRef(int read, AlignmentContext context, ReferenceContext ref ) {
return (context.getReads().get(read).getReadBases()[context.getOffsets().get(read)] == ref.getBase());
}
// printing helper methods
public void printNQSMismatchTable(long[][][] cumulativeBins) {
out.print(createHeader());
for ( int qscore = 0; qscore < MAX_QSCORE; qscore ++ ) {
for ( int group = 0; group < NQS_GROUPS; group ++ ) {
out.print(formatNQSMismatchCountString(qscore,group,cumulativeBins));
}
}
}
public String createHeader() {
return String.format(TEXT_FORMAT, "Qscore_Reported", "NQS_Group", "Mismatches", "Total", "Mismatch_Rate");
}
public String formatNQSMismatchCountString(int qscore, int group, long[][][] cumulativeBins) {
long mm = cumulativeBins[group][MM_OFFSET][qscore];
long ct = cumulativeBins[group][COUNT_OFFSET][qscore];
double mmr = mm/ct;
return String.format(DATA_FORMAT, qscore, group, mm, ct, mmr);
}
}