Another temp checking for rearranging things
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1048 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
3c40db260d
commit
ca8a3bd85e
|
|
@ -55,7 +55,6 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
|
|||
@Argument(fullName="collapseDinuc", shortName="collapseDinuc", required=false, doc="")
|
||||
public boolean collapseDinuc = false;
|
||||
|
||||
int NDINUCS = 16;
|
||||
//ArrayList<RecalData> flattenData = new ArrayList<RecalData>();
|
||||
//HashMap<String, RecalData[][][]> data = new HashMap<String, RecalData[][][]>();
|
||||
HashMap<String, RecalDataManager> data = new HashMap<String, RecalDataManager>();
|
||||
|
|
@ -76,7 +75,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
|
|||
if( !isSupportedReadGroup(readGroup) )
|
||||
continue;
|
||||
String rg = readGroup.getReadGroupId();
|
||||
RecalDataManager manager = new RecalDataManager(rg, maxReadLen, QualityUtils.MAX_QUAL_SCORE+1, NDINUCS, ! collapsePos, ! collapseDinuc );
|
||||
RecalDataManager manager = new RecalDataManager(rg, maxReadLen, QualityUtils.MAX_QUAL_SCORE+1, RecalData.NDINUCS, ! collapsePos, ! collapseDinuc );
|
||||
data.put(rg, manager);
|
||||
}
|
||||
}
|
||||
|
|
@ -142,7 +141,11 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
|
|||
int qual = quals[offset];
|
||||
if ( qual > 0 && qual <= QualityUtils.MAX_QUAL_SCORE ) {
|
||||
// previous base is the next base in terms of machine chemistry if this is a negative strand
|
||||
//System.out.printf("Adding b_offset=%c offset=%d cycle=%d qual=%d dinuc=%c%c ref_match=%c comp=%c%n", (char)read.getReadBases()[offset], offset, cycle, qual, prevBase, base, ref, (char)BaseUtils.simpleComplement(ref));
|
||||
// if ( qual == 2 )
|
||||
//if ( read.getReadName().equals("30PTAAAXX090126:5:14:132:764#0") )
|
||||
// System.out.printf("Adding neg?=%b b_offset=%c offset=%d cycle=%d qual=%d dinuc=%c%c ref_match=%c comp=%c name=%s%n",
|
||||
// read.getReadNegativeStrandFlag(), (char)read.getReadBases()[offset], offset, cycle, qual, prevBase, base,
|
||||
// ref, (char)BaseUtils.simpleComplement(ref), read.getReadName());
|
||||
RecalData datum = getRecalData(rg, cycle, qual, prevBase, base);
|
||||
if (datum != null) datum.inc(base,ref);
|
||||
return 1;
|
||||
|
|
@ -174,7 +177,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
|
|||
if (CREATE_TRAINING_DATA) writeTrainingData();
|
||||
}
|
||||
|
||||
void printInfo(PrintStream out) {
|
||||
private void printInfo(PrintStream out) {
|
||||
out.printf("# date %s%n", new Date());
|
||||
out.printf("# collapsed_pos %b%n", collapsePos);
|
||||
out.printf("# collapsed_dinuc %b%n", collapseDinuc);
|
||||
|
|
@ -185,16 +188,16 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
|
||||
void writeTrainingData() {
|
||||
private void writeTrainingData() {
|
||||
PrintStream dinuc_out = null;
|
||||
PrintStream table_out = null;
|
||||
try {
|
||||
dinuc_out = new PrintStream( OUTPUT_FILEROOT+".covariate_counts.csv");
|
||||
dinuc_out.println("rg,dn,logitQ,pos,indicator,count");
|
||||
for (SAMReadGroupRecord readGroup : this.getToolkit().getEngine().getSAMHeader().getReadGroups()) {
|
||||
for ( int dinuc_index=0; dinuc_index<NDINUCS; dinuc_index++) {
|
||||
for ( int dinuc_index=0; dinuc_index<RecalData.NDINUCS; dinuc_index++) {
|
||||
for ( RecalData datum: getRecalData(readGroup.getReadGroupId()) ) {
|
||||
if ( RecalData.string2dinucIndex(datum.dinuc) == dinuc_index ) {
|
||||
if ( RecalData.dinucIndex(datum.dinuc) == dinuc_index ) {
|
||||
if ((datum.N - datum.B) > 0)
|
||||
dinuc_out.format("%s,%s,%d,%d,%d,%d%n", readGroup.getReadGroupId(), RecalData.dinucIndex2bases(dinuc_index), datum.qual, datum.pos, 0, datum.N - datum.B);
|
||||
if (datum.B > 0)
|
||||
|
|
@ -207,7 +210,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
|
|||
if ( outputRawData ) {
|
||||
table_out = new PrintStream( OUTPUT_FILEROOT+".raw_data.csv");
|
||||
printInfo(table_out);
|
||||
table_out.println("rg,dn,Qrep,pos,NBases,MMismatches");
|
||||
table_out.println("rg,dn,Qrep,pos,NBases,MMismatches,Qemp");
|
||||
for (SAMReadGroupRecord readGroup : this.getToolkit().getEngine().getSAMHeader().getReadGroups()) {
|
||||
for ( RecalData datum: getRecalData(readGroup.getReadGroupId()) ) {
|
||||
if ( datum.N > 0 )
|
||||
|
|
@ -295,20 +298,20 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
|
|||
ByDinucFile.printf("dinuc,Qemp-obs,Qemp,Qobs,B,N%n");
|
||||
RecalData All = new RecalData(0,0,readGroup.getReadGroupId(),"");
|
||||
MeanReportedQuality AllReported = new MeanReportedQuality();
|
||||
for (int c=0; c < NDINUCS; c++) {
|
||||
for (int c=0; c < RecalData.NDINUCS; c++) {
|
||||
ByCycle.add(new RecalData(-1, -1,readGroup.getReadGroupId(),RecalData.dinucIndex2bases(c)));
|
||||
ByCycleReportedQ.add(new MeanReportedQuality());
|
||||
}
|
||||
|
||||
for ( RecalData datum: getRecalData(readGroup.getReadGroupId()) ) {
|
||||
int dinucIndex = RecalData.string2dinucIndex(datum.dinuc); //bases2dinucIndex(datum.dinuc.charAt(0), datum.dinuc.charAt(1), false);
|
||||
int dinucIndex = RecalData.dinucIndex(datum.dinuc); //bases2dinucIndex(datum.dinuc.charAt(0), datum.dinuc.charAt(1), false);
|
||||
ByCycle.get(dinucIndex).inc(datum.N, datum.B);
|
||||
ByCycleReportedQ.get(dinucIndex).inc(datum.qual, datum.N);
|
||||
All.inc(datum.N, datum.B);
|
||||
AllReported.inc(datum.qual, datum.N);
|
||||
}
|
||||
|
||||
for (int c=0; c < NDINUCS; c++) {
|
||||
for (int c=0; c < RecalData.NDINUCS; c++) {
|
||||
double empiricalQual = -10 * Math.log10((double)ByCycle.get(c).B / ByCycle.get(c).N);
|
||||
double reportedQual = ByCycleReportedQ.get(c).result();
|
||||
ByDinucFile.printf("%s, %f, %f, %f, %d, %d%n", ByCycle.get(c).dinuc, empiricalQual-reportedQual, empiricalQual, reportedQual, ByCycle.get(c).B, ByCycle.get(c).N);
|
||||
|
|
@ -347,7 +350,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
|
|||
|
||||
for (int q=0; q<QualityUtils.MAX_QUAL_SCORE; q++) {
|
||||
double empiricalQual = -10 * Math.log10((double)ByQ.get(q).B / ByQ.get(q).N);
|
||||
ByQualFile.printf("%d, %f, %.0f, %d, %d%n", q, empiricalQual, ByQReportedQ.get(q).result(), ByQ.get(q).B, ByQ.get(q).N);
|
||||
ByQualFile.printf("%3d, %2.2f, %2.2f, %12d, %12d%n", q, empiricalQual, ByQReportedQ.get(q).result(), ByQ.get(q).B, ByQ.get(q).N);
|
||||
//out.printf("%3d,%s,%3d,%5.1f,%5.1f,%6d,%6d", pos, dinuc, qual, empiricalQual, qual-empiricalQual, N, B); n
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@ package org.broadinstitute.sting.playground.gatk.walkers.recalibration;
|
|||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.apache.log4j.Logger;
|
||||
|
|
@ -12,6 +14,7 @@ import java.io.File;
|
|||
import java.io.FileNotFoundException;
|
||||
|
||||
@WalkerName("LogisticRecalibration")
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||
public class LogisticRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
||||
@Argument(shortName="logisticParams", doc="logistic params file", required=true)
|
||||
public String logisticParamsFile;
|
||||
|
|
@ -185,7 +188,7 @@ public class LogisticRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWr
|
|||
}
|
||||
|
||||
if (read.getReadNegativeStrandFlag())
|
||||
recalQuals = BaseUtils.reverse(quals);
|
||||
recalQuals = BaseUtils.reverse(recalQuals);
|
||||
//System.out.printf("OLD: %s%n", read.format());
|
||||
read.setBaseQualities(recalQuals);
|
||||
//System.out.printf("NEW: %s%n", read.format());
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.recalibration;
|
||||
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
||||
public class RecalData {
|
||||
|
|
@ -53,7 +52,7 @@ public class RecalData {
|
|||
}
|
||||
|
||||
public String toCSVString(boolean collapsedPos) {
|
||||
return String.format("%s,%s,%d,%s,%d,%d", readGroup, dinuc, qual, collapsedPos ? "*" : pos, N, B);
|
||||
return String.format("%s,%s,%d,%s,%d,%d,%d", readGroup, dinuc, qual, collapsedPos ? "*" : pos, N, B, empiricalQualByte());
|
||||
}
|
||||
|
||||
public static RecalData fromCSVString(String s) {
|
||||
|
|
@ -74,25 +73,26 @@ public class RecalData {
|
|||
return datum;
|
||||
}
|
||||
|
||||
public static int bases2dinucIndex(char prevBase, char base, boolean Complement) {
|
||||
if ( BaseUtils.simpleBaseToBaseIndex(prevBase) == -1 || BaseUtils.simpleBaseToBaseIndex(base) == -1 )
|
||||
return -1;
|
||||
|
||||
if (!Complement) {
|
||||
return BaseUtils.simpleBaseToBaseIndex(prevBase) * 4 + BaseUtils.simpleBaseToBaseIndex(base);
|
||||
}else {
|
||||
return (3 - BaseUtils.simpleBaseToBaseIndex(prevBase)) * 4 + (3 - BaseUtils.simpleBaseToBaseIndex(base));
|
||||
}
|
||||
public static int bases2dinucIndex(char prevBase, char base) {
|
||||
int pbI = BaseUtils.simpleBaseToBaseIndex(prevBase);
|
||||
int bI = BaseUtils.simpleBaseToBaseIndex(base);
|
||||
return (pbI == -1 || bI == -1) ? -1 : pbI * 4 + bI;
|
||||
}
|
||||
|
||||
public final static int NDINUCS = 16;
|
||||
public static String dinucIndex2bases(int index) {
|
||||
char data[] = {BaseUtils.baseIndexToSimpleBase(index / 4), BaseUtils.baseIndexToSimpleBase(index % 4)};
|
||||
return new String( data );
|
||||
}
|
||||
|
||||
public static int string2dinucIndex(String s) {
|
||||
return bases2dinucIndex(s.charAt(0), s.charAt(1), false);
|
||||
public static int dinucIndex(String s) {
|
||||
return bases2dinucIndex(s.charAt(0), s.charAt(1));
|
||||
}
|
||||
|
||||
public static int dinucIndex(byte prevBase, byte base) {
|
||||
return bases2dinucIndex((char)prevBase, (char)base);
|
||||
}
|
||||
|
||||
//
|
||||
// private static int nuc2num[];
|
||||
// private static char num2nuc[];
|
||||
|
|
|
|||
|
|
@ -33,12 +33,21 @@ public class RecalDataManager {
|
|||
return trackPos ? pos : 0;
|
||||
}
|
||||
|
||||
public int canonicalPos(int cycle) {
|
||||
return getPosIndex(cycle);
|
||||
}
|
||||
|
||||
|
||||
public int getDinucIndex(String dinuc) {
|
||||
if ( trackDinuc ) {
|
||||
return RecalData.string2dinucIndex(dinuc);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
return trackDinuc ? RecalData.dinucIndex(dinuc) : 0;
|
||||
}
|
||||
|
||||
public int getDinucIndex(byte prevBase, byte base) {
|
||||
return trackDinuc ? RecalData.dinucIndex(prevBase, base) : 0;
|
||||
}
|
||||
|
||||
public String canonicalDinuc(String dinuc) {
|
||||
return trackDinuc ? dinuc : "**";
|
||||
}
|
||||
|
||||
public void addDatum(RecalData datum) {
|
||||
|
|
|
|||
|
|
@ -3,16 +3,21 @@ package org.broadinstitute.sting.playground.gatk.walkers.recalibration;
|
|||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.recalibration.RecalData;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.regex.Matcher;
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
|
||||
@WalkerName("TableRecalibration")
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||
public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
||||
@Argument(shortName="params", doc="CountCovariates params file", required=true)
|
||||
public String paramsFile;
|
||||
|
|
@ -27,26 +32,52 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
// maps from [readGroup] -> [prevBase x base -> [cycle, qual, new qual]]
|
||||
HashMap<String, RecalMapping> cache = new HashMap<String, RecalMapping>();
|
||||
|
||||
@Argument(shortName="serial", doc="", required=false)
|
||||
//@Argument(shortName="serial", doc="", required=false)
|
||||
boolean serialRecalibration = false;
|
||||
|
||||
private static Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
|
||||
private static Pattern COLLAPSED_POS_PATTERN = Pattern.compile("^#\\s+collapsed_pos\\s+(\\w+)");
|
||||
private static Pattern COLLAPSED_DINUC_PATTERN = Pattern.compile("^#\\s+collapsed_dinuc\\s+(\\w+)");
|
||||
private static Pattern HEADER_PATTERN = Pattern.compile("^rg.*");
|
||||
|
||||
public void initialize() {
|
||||
try {
|
||||
System.out.printf("Reading data...%n");
|
||||
List<RecalData> data = new ArrayList<RecalData>();
|
||||
boolean collapsedPos = false;
|
||||
boolean collapsedDinuc = false;
|
||||
|
||||
List<String> lines = new xReadLines(new File(paramsFile)).readLines();
|
||||
for ( String line : lines ) {
|
||||
// rg,dn,logitQ,pos,indicator,count
|
||||
// SRR007069,AA,28,1,0,2
|
||||
data.add(RecalData.fromCSVString(line));
|
||||
//System.out.printf("Reading line %s%n", line);
|
||||
if ( HEADER_PATTERN.matcher(line).matches() )
|
||||
continue;
|
||||
if ( COMMENT_PATTERN.matcher(line).matches() ) {
|
||||
collapsedPos = parseCommentLine(COLLAPSED_POS_PATTERN, line, collapsedPos);
|
||||
collapsedDinuc = parseCommentLine(COLLAPSED_DINUC_PATTERN, line, collapsedDinuc);
|
||||
//System.out.printf("Collapsed %b %b%n", collapsedPos, collapsedDinuc);
|
||||
}
|
||||
else {
|
||||
data.add(RecalData.fromCSVString(line));
|
||||
}
|
||||
}
|
||||
initializeCache(data);
|
||||
initializeCache(data, collapsedPos, collapsedDinuc);
|
||||
} catch ( FileNotFoundException e ) {
|
||||
Utils.scareUser("Cannot read/find parameters file " + paramsFile);
|
||||
}
|
||||
}
|
||||
|
||||
private void initializeCache(List<RecalData> data) {
|
||||
private boolean parseCommentLine(Pattern pat, String line, boolean flag) {
|
||||
Matcher m = pat.matcher(line);
|
||||
if ( m.matches() ) {
|
||||
//System.out.printf("Parsing %s%n", m.group(1));
|
||||
flag = Boolean.parseBoolean(m.group(1));
|
||||
}
|
||||
|
||||
return flag;
|
||||
}
|
||||
|
||||
private void initializeCache(List<RecalData> data, boolean collapsedPos, boolean collapsedDinuc ) {
|
||||
Set<String> readGroups = new HashSet<String>();
|
||||
Set<String> dinucs = new HashSet<String>();
|
||||
int maxPos = -1;
|
||||
|
|
@ -68,7 +99,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
// initialize the data structure
|
||||
HashMap<String, RecalDataManager> managers = new HashMap<String, RecalDataManager>();
|
||||
for ( String readGroup : readGroups ) {
|
||||
RecalDataManager manager = new RecalDataManager(readGroup, maxPos, maxQReported, dinucs.size(), true, true);
|
||||
RecalDataManager manager = new RecalDataManager(readGroup, maxPos, maxQReported, dinucs.size(), ! collapsedPos, ! collapsedDinuc);
|
||||
managers.put(readGroup, manager);
|
||||
}
|
||||
|
||||
|
|
@ -90,13 +121,8 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
}
|
||||
|
||||
public SAMRecord map(char[] ref, SAMRecord read) {
|
||||
//if ( read.getReadLength() > maxReadLen ) {
|
||||
// throw new RuntimeException("Expectedly long read, please increase maxium read len with maxReadLen parameter: " + read.format());
|
||||
//}
|
||||
|
||||
byte[] bases = read.getReadBases();
|
||||
byte[] quals = read.getBaseQualities();
|
||||
byte[] recalQuals = new byte[quals.length];
|
||||
|
||||
// Since we want machine direction reads not corrected positive strand reads, rev comp any negative strand reads
|
||||
if (read.getReadNegativeStrandFlag()) {
|
||||
|
|
@ -104,28 +130,34 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
quals = BaseUtils.reverse(quals);
|
||||
}
|
||||
|
||||
String readGroup = read.getAttribute("RG").toString();
|
||||
byte[] recalQuals = recalibrateBasesAndQuals(read.getAttribute("RG").toString(), bases, quals);
|
||||
|
||||
if (read.getReadNegativeStrandFlag())
|
||||
recalQuals = BaseUtils.reverse(recalQuals);
|
||||
//if ( read.getReadName().equals("30PTAAAXX090126:5:14:132:764#0") )
|
||||
// System.out.printf("OLD: %s%n", read.format());
|
||||
read.setBaseQualities(recalQuals);
|
||||
//if ( read.getReadName().equals("30PTAAAXX090126:5:14:132:764#0") )
|
||||
// System.out.printf("NEW: %s%n", read.format());
|
||||
return read;
|
||||
}
|
||||
|
||||
public byte[] recalibrateBasesAndQuals(final String readGroup, byte[] bases, byte[] quals) {
|
||||
byte[] recalQuals = new byte[quals.length];
|
||||
RecalMapping mapper = cache.get(readGroup);
|
||||
|
||||
int numBases = read.getReadLength();
|
||||
recalQuals[0] = quals[0]; // can't change the first -- no dinuc
|
||||
|
||||
for ( int cycle = 1; cycle < numBases; cycle++ ) { // skip first and last base, qual already set because no dinuc
|
||||
// Take into account that previous base is the next base in terms of machine chemistry if
|
||||
// this is a negative strand
|
||||
recalQuals[0] = quals[0]; // can't change the first -- no dinuc
|
||||
for ( int cycle = 1; cycle < bases.length; cycle++ ) { // skip first and last base, qual already set because no dinuc
|
||||
byte qual = quals[cycle];
|
||||
byte newQual = mapper.getNewQual(readGroup, bases[cycle - 1], bases[cycle], cycle, qual);
|
||||
//if ( read.getReadName().equals("30PTAAAXX090126:5:14:132:764#0") )
|
||||
// System.out.printf("Processing cycle=%d qual=%d: neg?=%b => %d at %s%n",
|
||||
// cycle, qual, read.getReadNegativeStrandFlag(), newQual, read.getReadName());
|
||||
recalQuals[cycle] = newQual;
|
||||
//System.out.printf("Mapping %d => %d%n", qual, newQual);
|
||||
}
|
||||
|
||||
if (read.getReadNegativeStrandFlag())
|
||||
recalQuals = BaseUtils.reverse(quals);
|
||||
//System.out.printf("OLD: %s%n", read.format());
|
||||
read.setBaseQualities(recalQuals);
|
||||
//System.out.printf("NEW: %s%n", read.format());
|
||||
return read;
|
||||
return recalQuals;
|
||||
}
|
||||
|
||||
public void onTraversalDone(SAMFileWriter output) {
|
||||
|
|
@ -164,9 +196,58 @@ interface RecalMapping {
|
|||
}
|
||||
|
||||
class CombinatorialRecalMapping implements RecalMapping {
|
||||
HashMap<String, byte[][]> cache = new HashMap<String, byte[][]>();
|
||||
ArrayList<byte[][]> cache;
|
||||
RecalDataManager manager;
|
||||
|
||||
public CombinatorialRecalMapping(RecalDataManager manager, Set<String> dinucs, int maxPos, int maxQReported ) {
|
||||
this.manager = manager;
|
||||
|
||||
// initialize the data structure
|
||||
cache = new ArrayList<byte[][]>(RecalData.NDINUCS);
|
||||
for ( String dinuc : dinucs ) {
|
||||
cache.add(new byte[maxPos+1][maxQReported+1]);
|
||||
}
|
||||
|
||||
for ( RecalData datum : manager.getAll() ) {
|
||||
//System.out.printf("Adding datum %s%n", datum);
|
||||
byte [][] table = cache.get(this.manager.getDinucIndex(datum.dinuc));
|
||||
if ( table[datum.pos][datum.qual] != 0 )
|
||||
throw new RuntimeException(String.format("Duplicate entry discovered: %s", datum));
|
||||
//table[datum.pos][datum.qual] = (byte)(1 + datum.empiricalQualByte());
|
||||
table[datum.pos][datum.qual] = datum.empiricalQualByte();
|
||||
// System.out.printf("Binding %d %d => %d%n", datum.pos, datum.qual, datum.empiricalQualByte());
|
||||
}
|
||||
}
|
||||
|
||||
public byte getNewQual(final String readGroup, byte prevBase, byte base, int cycle, byte qual) {
|
||||
//String dinuc = String.format("%c%c", (char)prevBase, (char)base);
|
||||
//if ( qual == 2 )
|
||||
// System.out.printf("Qual = 2%n");
|
||||
|
||||
int pos = manager.canonicalPos(cycle);
|
||||
int index = this.manager.getDinucIndex(prevBase, base);
|
||||
byte[][] dataTable = index == -1 ? null : cache.get(index);
|
||||
|
||||
if ( dataTable == null && prevBase != 'N' && base != 'N' )
|
||||
throw new RuntimeException(String.format("Unmapped data table at %s %c%c", readGroup, prevBase, base));
|
||||
|
||||
byte result = dataTable != null && pos < dataTable.length ? dataTable[pos][qual] : qual;
|
||||
|
||||
//if ( result == 2 )
|
||||
// System.out.printf("Lookup RG=%s dinuc=%s cycle=%d pos=%d qual=%d datatable=%s / %d => %d%n",
|
||||
// readGroup, dinuc, cycle, pos, qual, dataTable, dataTable.length, result);
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
/*class CombinatorialRecalMapping implements RecalMapping {
|
||||
HashMap<String, byte[][]> cache = new HashMap<String, byte[][]>();
|
||||
RecalDataManager manager;
|
||||
|
||||
public CombinatorialRecalMapping(RecalDataManager manager, Set<String> dinucs, int maxPos, int maxQReported ) {
|
||||
this.manager = manager;
|
||||
|
||||
// initialize the data structure
|
||||
for ( String dinuc : dinucs ) {
|
||||
byte[][] table = new byte[maxPos+1][maxQReported+1];
|
||||
|
|
@ -180,22 +261,32 @@ class CombinatorialRecalMapping implements RecalMapping {
|
|||
throw new RuntimeException(String.format("Duplicate entry discovered: %s", datum));
|
||||
//table[datum.pos][datum.qual] = (byte)(1 + datum.empiricalQualByte());
|
||||
table[datum.pos][datum.qual] = datum.empiricalQualByte();
|
||||
// System.out.printf("Binding %d %d => %d%n", datum.pos, datum.qual, datum.empiricalQualByte());
|
||||
}
|
||||
}
|
||||
|
||||
public byte getNewQual(final String readGroup, byte prevBase, byte base, int cycle, byte qual) {
|
||||
//System.out.printf("Lookup RG=%s prevBase=%c base=%c cycle=%d qual=%d%n", readGroup, prevBase, base, cycle, qual);
|
||||
//String dinuc = String.format("%c%c", (char)prevBase, (char)base);
|
||||
//if ( qual == 2 )
|
||||
// System.out.printf("Qual = 2%n");
|
||||
|
||||
byte[] bp = {prevBase, base};
|
||||
String dinuc = new String(bp);
|
||||
String dinuc = manager.canonicalDinuc(new String(bp));
|
||||
int pos = manager.canonicalPos(cycle);
|
||||
byte[][] dataTable = cache.get(dinuc);
|
||||
|
||||
if ( dataTable == null && prevBase != 'N' && base != 'N' )
|
||||
throw new RuntimeException(String.format("Unmapped data table at %s %s", readGroup, dinuc));
|
||||
|
||||
return dataTable != null && cycle < dataTable.length ? dataTable[cycle][qual] : qual;
|
||||
byte result = dataTable != null && pos < dataTable.length ? dataTable[pos][qual] : qual;
|
||||
|
||||
//if ( result == 2 )
|
||||
// System.out.printf("Lookup RG=%s dinuc=%s cycle=%d pos=%d qual=%d datatable=%s / %d => %d%n",
|
||||
// readGroup, dinuc, cycle, pos, qual, dataTable, dataTable.length, result);
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}*/
|
||||
|
||||
class SerialRecalMapping implements RecalMapping {
|
||||
// mapping from dinuc x Q => new Q
|
||||
|
|
|
|||
Loading…
Reference in New Issue