Updated CovariateCounterWalker to be read group aware
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@889 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
7755476d36
commit
dfe464cd81
|
|
@ -1,6 +1,7 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers;
|
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import net.sf.samtools.SAMReadGroupRecord;
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||||
|
|
@ -12,6 +13,7 @@ import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
|
|
||||||
|
|
@ -39,9 +41,9 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
|
||||||
public String READ_GROUP = "none";
|
public String READ_GROUP = "none";
|
||||||
|
|
||||||
int NDINUCS = 16;
|
int NDINUCS = 16;
|
||||||
RecalData[][][] data = new RecalData[MAX_READ_LENGTH+1][MAX_QUAL_SCORE+1][NDINUCS];
|
|
||||||
//RecalData[][][] data = new RecalData;
|
|
||||||
ArrayList<RecalData> flattenData = new ArrayList<RecalData>();
|
ArrayList<RecalData> flattenData = new ArrayList<RecalData>();
|
||||||
|
HashMap<String, RecalData[][][]> data = new HashMap<String, RecalData[][][]>();
|
||||||
|
//RecalData[][][] data;
|
||||||
|
|
||||||
static int nuc2num[];
|
static int nuc2num[];
|
||||||
static char num2nuc[];
|
static char num2nuc[];
|
||||||
|
|
@ -87,18 +89,19 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
|
for (SAMReadGroupRecord readGroup : this.getToolkit().getEngine().getSAMHeader().getReadGroups()) {
|
||||||
|
data.put(readGroup.getReadGroupId(), new RecalData[MAX_READ_LENGTH+1][MAX_QUAL_SCORE+1][NDINUCS]);
|
||||||
for ( int i = 0; i < MAX_READ_LENGTH+1; i++) {
|
for ( int i = 0; i < MAX_READ_LENGTH+1; i++) {
|
||||||
for ( int j = 0; j < MAX_QUAL_SCORE+1; j++) {
|
for ( int j = 0; j < MAX_QUAL_SCORE+1; j++) {
|
||||||
for ( int k = 0; k < NDINUCS; k++) {
|
for ( int k = 0; k < NDINUCS; k++) {
|
||||||
String dinuc = dinucIndex2bases(k);
|
String dinuc = dinucIndex2bases(k);
|
||||||
RecalData datum = new RecalData(i, j, dinuc);
|
RecalData datum = new RecalData(i, j, dinuc);
|
||||||
data[i][j][k] = datum;
|
data.get(readGroup.getReadGroupId())[i][j][k] = datum;
|
||||||
flattenData.add(datum);
|
flattenData.add(datum);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
public Integer map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||||
|
|
@ -108,8 +111,8 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
|
||||||
List<Integer> offsets = context.getOffsets();
|
List<Integer> offsets = context.getOffsets();
|
||||||
for (int i =0; i < reads.size(); i++ ) {
|
for (int i =0; i < reads.size(); i++ ) {
|
||||||
SAMRecord read = reads.get(i);
|
SAMRecord read = reads.get(i);
|
||||||
|
SAMReadGroupRecord readGroup = read.getHeader().getReadGroup((String)read.getAttribute("RG"));
|
||||||
if (//read.getHeader().getReadGroup((String)read.getAttribute("RG")).getAttribute("PL") == "ILLUMINA" &&
|
if ( readGroup.getAttribute("PL") == "ILLUMINA" &&
|
||||||
!read.getReadNegativeStrandFlag() &&
|
!read.getReadNegativeStrandFlag() &&
|
||||||
(READ_GROUP.equals("none") || read.getAttribute("RG") != null && read.getAttribute("RG").equals(READ_GROUP)) &&
|
(READ_GROUP.equals("none") || read.getAttribute("RG") != null && read.getAttribute("RG").equals(READ_GROUP)) &&
|
||||||
(read.getMappingQuality() >= MIN_MAPPING_QUALITY) &&
|
(read.getMappingQuality() >= MIN_MAPPING_QUALITY) &&
|
||||||
|
|
@ -130,7 +133,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
|
||||||
// Convert offset into cycle position which means reversing the position of reads on the negative strand
|
// Convert offset into cycle position which means reversing the position of reads on the negative strand
|
||||||
//int cycle = read.getReadNegativeStrandFlag() ? numBases - offset - 1 : offset;
|
//int cycle = read.getReadNegativeStrandFlag() ? numBases - offset - 1 : offset;
|
||||||
//data[cycle][qual][dinuc_index].inc(base,ref);
|
//data[cycle][qual][dinuc_index].inc(base,ref);
|
||||||
data[offset][qual][dinuc_index].inc(base,ref);
|
data.get(readGroup.getReadGroupId())[offset][qual][dinuc_index].inc(base,ref);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -168,10 +171,12 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
void writeTrainingData() {
|
void writeTrainingData() {
|
||||||
|
|
||||||
|
for (SAMReadGroupRecord readGroup : this.getToolkit().getEngine().getSAMHeader().getReadGroups()) {
|
||||||
for ( int dinuc_index=0; dinuc_index<NDINUCS; dinuc_index++) {
|
for ( int dinuc_index=0; dinuc_index<NDINUCS; dinuc_index++) {
|
||||||
PrintStream dinuc_out = null;
|
PrintStream dinuc_out = null;
|
||||||
try {
|
try {
|
||||||
dinuc_out = new PrintStream( OUTPUT_FILEROOT+".covariate_counts."+dinucIndex2bases(dinuc_index)+".csv");
|
dinuc_out = new PrintStream( OUTPUT_FILEROOT+".covariate_counts.RG_"+readGroup.getReadGroupId()+"."+dinucIndex2bases(dinuc_index)+".csv");
|
||||||
dinuc_out.println("logitQ,pos,indicator,count");
|
dinuc_out.println("logitQ,pos,indicator,count");
|
||||||
|
|
||||||
for ( RecalData datum: flattenData ) {
|
for ( RecalData datum: flattenData ) {
|
||||||
|
|
@ -190,6 +195,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
class MeanReportedQuality {
|
class MeanReportedQuality {
|
||||||
double Qn = 0;
|
double Qn = 0;
|
||||||
|
|
@ -210,9 +216,10 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void qualityDiffVsCycle() {
|
public void qualityDiffVsCycle() {
|
||||||
|
for (SAMReadGroupRecord readGroup : this.getToolkit().getEngine().getSAMHeader().getReadGroups()) {
|
||||||
PrintStream ByCycleFile = null;
|
PrintStream ByCycleFile = null;
|
||||||
try {
|
try {
|
||||||
ByCycleFile = new PrintStream(OUTPUT_FILEROOT+".quality_difference_v_cycle.csv");
|
ByCycleFile = new PrintStream(OUTPUT_FILEROOT+".RG_"+readGroup.getReadGroupId()+".quality_difference_v_cycle.csv");
|
||||||
} catch (FileNotFoundException e){
|
} catch (FileNotFoundException e){
|
||||||
System.out.println("Could not open output files based on OUTPUT_FILEROOT option: " + OUTPUT_FILEROOT);
|
System.out.println("Could not open output files based on OUTPUT_FILEROOT option: " + OUTPUT_FILEROOT);
|
||||||
System.exit(1);
|
System.exit(1);
|
||||||
|
|
@ -239,14 +246,16 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
|
||||||
double reportedQual = ByCycleReportedQ.get(c).result();
|
double reportedQual = ByCycleReportedQ.get(c).result();
|
||||||
ByCycleFile.printf("%d, %f, %f, %f, %d, %d%n", c, empiricalQual-reportedQual, empiricalQual, reportedQual, ByCycle.get(c).B, ByCycle.get(c).N);
|
ByCycleFile.printf("%d, %f, %f, %f, %d, %d%n", c, empiricalQual-reportedQual, empiricalQual, reportedQual, ByCycle.get(c).B, ByCycle.get(c).N);
|
||||||
}
|
}
|
||||||
System.out.printf("Cycle: N=%d, B=%d, Qemp=%.1f, ", All.N, All.B, -10 * Math.log10((double)All.B/All.N));
|
}
|
||||||
System.out.printf("Qrep=%.1f%n", AllReported.result());
|
//System.out.printf("Cycle: N=%d, B=%d, Qemp=%.1f, ", All.N, All.B, -10 * Math.log10((double)All.B/All.N));
|
||||||
|
//System.out.printf("Qrep=%.1f%n", AllReported.result());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void qualityDiffVsDinucleotide() {
|
public void qualityDiffVsDinucleotide() {
|
||||||
|
for (SAMReadGroupRecord readGroup : this.getToolkit().getEngine().getSAMHeader().getReadGroups()) {
|
||||||
PrintStream ByDinucFile = null;
|
PrintStream ByDinucFile = null;
|
||||||
try {
|
try {
|
||||||
ByDinucFile = new PrintStream(OUTPUT_FILEROOT+".quality_difference_v_dinucleotide.csv");
|
ByDinucFile = new PrintStream(OUTPUT_FILEROOT+".RG_"+readGroup.getReadGroupId()+".quality_difference_v_dinucleotide.csv");
|
||||||
} catch (FileNotFoundException e){
|
} catch (FileNotFoundException e){
|
||||||
System.out.println("Could not open output files based on OUTPUT_FILEROOT option: " + OUTPUT_FILEROOT);
|
System.out.println("Could not open output files based on OUTPUT_FILEROOT option: " + OUTPUT_FILEROOT);
|
||||||
System.exit(1);
|
System.exit(1);
|
||||||
|
|
@ -274,14 +283,16 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
|
||||||
double reportedQual = ByCycleReportedQ.get(c).result();
|
double reportedQual = ByCycleReportedQ.get(c).result();
|
||||||
ByDinucFile.printf("%s, %f, %f, %f, %d, %d%n", ByCycle.get(c).dinuc, empiricalQual-reportedQual, empiricalQual, reportedQual, ByCycle.get(c).B, ByCycle.get(c).N);
|
ByDinucFile.printf("%s, %f, %f, %f, %d, %d%n", ByCycle.get(c).dinuc, empiricalQual-reportedQual, empiricalQual, reportedQual, ByCycle.get(c).B, ByCycle.get(c).N);
|
||||||
}
|
}
|
||||||
System.out.printf("Dinuc: N=%d, B=%d, Qemp=%.1f, ", All.N, All.B, -10 * Math.log10((double)All.B/All.N));
|
}
|
||||||
System.out.printf("Qrep=%.1f%n", AllReported.result());
|
//System.out.printf("Dinuc: N=%d, B=%d, Qemp=%.1f, ", All.N, All.B, -10 * Math.log10((double)All.B/All.N));
|
||||||
|
//System.out.printf("Qrep=%.1f%n", AllReported.result());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void qualityEmpiricalObserved() {
|
public void qualityEmpiricalObserved() {
|
||||||
|
for (SAMReadGroupRecord readGroup : this.getToolkit().getEngine().getSAMHeader().getReadGroups()) {
|
||||||
PrintStream ByQualFile = null;
|
PrintStream ByQualFile = null;
|
||||||
try {
|
try {
|
||||||
ByQualFile = new PrintStream(OUTPUT_FILEROOT+".empirical_v_reported_quality.csv");
|
ByQualFile = new PrintStream(OUTPUT_FILEROOT+".RG_"+readGroup.getReadGroupId()+".empirical_v_reported_quality.csv");
|
||||||
} catch (FileNotFoundException e){
|
} catch (FileNotFoundException e){
|
||||||
System.out.println("Could not open output files based on OUTPUT_FILEROOT option: " + OUTPUT_FILEROOT);
|
System.out.println("Could not open output files based on OUTPUT_FILEROOT option: " + OUTPUT_FILEROOT);
|
||||||
System.exit(1);
|
System.exit(1);
|
||||||
|
|
@ -309,8 +320,9 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
|
||||||
ByQualFile.printf("%d, %f, %.0f, %d, %d%n", q, empiricalQual, ByQReportedQ.get(q).result(), ByQ.get(q).B, ByQ.get(q).N);
|
ByQualFile.printf("%d, %f, %.0f, %d, %d%n", q, empiricalQual, ByQReportedQ.get(q).result(), ByQ.get(q).B, ByQ.get(q).N);
|
||||||
//out.printf("%3d,%s,%3d,%5.1f,%5.1f,%6d,%6d", pos, dinuc, qual, empiricalQual, qual-empiricalQual, N, B); n
|
//out.printf("%3d,%s,%3d,%5.1f,%5.1f,%6d,%6d", pos, dinuc, qual, empiricalQual, qual-empiricalQual, N, B); n
|
||||||
}
|
}
|
||||||
System.out.printf("Emp-Obs: N=%d, B=%d, Qemp=%.1f, ", All.N, All.B, -10 * Math.log10((double)All.B/All.N));
|
}
|
||||||
System.out.printf("Qrep=%.1f%n", AllReported.result());
|
//System.out.printf("Emp-Obs: N=%d, B=%d, Qemp=%.1f, ", All.N, All.B, -10 * Math.log10((double)All.B/All.N));
|
||||||
|
//System.out.printf("Qrep=%.1f%n", AllReported.result());
|
||||||
}
|
}
|
||||||
|
|
||||||
public Integer reduceInit() {
|
public Integer reduceInit() {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue