Now seeing results from the training data. There are still some critical problems in the quality of the output, but we're at least getting training output.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@891 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-06-03 20:41:07 +00:00
parent 4e41646c88
commit 8672ae6019
1 changed files with 18 additions and 16 deletions

View File

@ -112,7 +112,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
for (int i =0; i < reads.size(); i++ ) { for (int i =0; i < reads.size(); i++ ) {
SAMRecord read = reads.get(i); SAMRecord read = reads.get(i);
SAMReadGroupRecord readGroup = read.getHeader().getReadGroup((String)read.getAttribute("RG")); SAMReadGroupRecord readGroup = read.getHeader().getReadGroup((String)read.getAttribute("RG"));
if ( readGroup.getAttribute("PL") == "ILLUMINA" && if ( "ILLUMINA".equalsIgnoreCase(readGroup.getAttribute("PL").toString()) &&
!read.getReadNegativeStrandFlag() && !read.getReadNegativeStrandFlag() &&
(READ_GROUP.equals("none") || read.getAttribute("RG") != null && read.getAttribute("RG").equals(READ_GROUP)) && (READ_GROUP.equals("none") || read.getAttribute("RG") != null && read.getAttribute("RG").equals(READ_GROUP)) &&
(read.getMappingQuality() >= MIN_MAPPING_QUALITY) && (read.getMappingQuality() >= MIN_MAPPING_QUALITY) &&
@ -171,30 +171,32 @@ public class CovariateCounterWalker extends LocusWalker<Integer, Integer> {
} }
void writeTrainingData() { void writeTrainingData() {
PrintStream dinuc_out = null;
for (SAMReadGroupRecord readGroup : this.getToolkit().getEngine().getSAMHeader().getReadGroups()) { try {
for ( int dinuc_index=0; dinuc_index<NDINUCS; dinuc_index++) { dinuc_out = new PrintStream( OUTPUT_FILEROOT+".covariate_counts.csv");
PrintStream dinuc_out = null; dinuc_out.println("rg,dn,logitQ,pos,indicator,count");
try { for (SAMReadGroupRecord readGroup : this.getToolkit().getEngine().getSAMHeader().getReadGroups()) {
dinuc_out = new PrintStream( OUTPUT_FILEROOT+".covariate_counts.RG_"+readGroup.getReadGroupId()+"."+dinucIndex2bases(dinuc_index)+".csv"); for ( int dinuc_index=0; dinuc_index<NDINUCS; dinuc_index++) {
dinuc_out.println("logitQ,pos,indicator,count");
for ( RecalData datum: flattenData ) { for ( RecalData datum: flattenData ) {
if (string2dinucIndex(datum.dinuc) == dinuc_index) { if (string2dinucIndex(datum.dinuc) == dinuc_index) {
if ((datum.N - datum.B) > 0) if ((datum.N - datum.B) > 0)
dinuc_out.format("%d,%d,%d,%d\n", datum.qual, datum.pos, 0, datum.N - datum.B); dinuc_out.format("%s,%s,%d,%d,%d,%d%n", readGroup.getReadGroupId(), dinucIndex2bases(dinuc_index), datum.qual, datum.pos, 0, datum.N - datum.B);
if (datum.B > 0) if (datum.B > 0)
dinuc_out.format("%d,%d,%d,%d\n", datum.qual, datum.pos, 1, datum.B); dinuc_out.format("%s,%s,%d,%d,%d,%d%n", readGroup.getReadGroupId(), dinucIndex2bases(dinuc_index), datum.qual, datum.pos, 1, datum.B);
} }
} }
} catch (FileNotFoundException e) {
System.err.println("FileNotFoundException: " + e.getMessage());
} finally {
if (dinuc_out != null)
dinuc_out.close();
} }
} }
} }
catch (FileNotFoundException e) {
System.err.println("FileNotFoundException: " + e.getMessage());
return;
}
finally {
if (dinuc_out != null)
dinuc_out.close();
}
} }
class MeanReportedQuality { class MeanReportedQuality {