From c51f51f255f33f9ee4890d2d6cec120f8e04b4ed Mon Sep 17 00:00:00 2001 From: kiran Date: Mon, 13 Apr 2009 00:49:41 +0000 Subject: [PATCH] Make sure we always write at least 1000 points per base in each cycle's scatterplot. Print the disagreement rate between Bustard and FourBaseRecaller. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@375 348d0f76-0448-11de-a6fe-93d51630548a --- .../fourbasecaller/FourBaseRecaller.java | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/fourbasecaller/FourBaseRecaller.java b/java/src/org/broadinstitute/sting/playground/fourbasecaller/FourBaseRecaller.java index 081a0afdd..344fa9d3a 100644 --- a/java/src/org/broadinstitute/sting/playground/fourbasecaller/FourBaseRecaller.java +++ b/java/src/org/broadinstitute/sting/playground/fourbasecaller/FourBaseRecaller.java @@ -11,6 +11,7 @@ import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.playground.illumina.FirecrestFileParser; import org.broadinstitute.sting.playground.illumina.FirecrestReadData; import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram; import java.io.File; @@ -139,6 +140,9 @@ public class FourBaseRecaller extends CommandLineProgram { ffp = new FirecrestFileParser(DIR.getParentFile(), LANE); fread = ffp.next(); + int[][] base_counts = new int[4][bread.getFirstReadSequence().length()]; + int bases_incorrect = 0, bases_total = 0; + if (debugout != null) { debugout.println("cycle int_a int_c int_g int_t bustard_base kiran_base bustard_prob kiran_prob"); } queryid = 0; @@ -163,10 +167,14 @@ public class FourBaseRecaller extends CommandLineProgram { bestqual[cycle] = fp.qualAtRank(0); nextbestqual[cycle] = QualityUtils.baseAndProbToCompressedQuality(fp.indexAtRank(1), fp.probAtRank(1)); - if (debugout != null && queryid < 5000 && bases.charAt(cycle) != '.') { + if (debugout != null && bases.charAt(cycle) != '.' && base_counts[BaseUtils.simpleBaseToBaseIndex(bases.charAt(cycle))][cycle] < 1000) { debugout.println(cycle + " " + fourintensity[0] + " " + fourintensity[1] + " " + fourintensity[2] + " " + fourintensity[3] + " " + (bases.charAt(cycle)) + " " + ((char) asciiseq[cycle]) + " " + bestqual[cycle] + " " + quals[cycle]); - //System.out.println(cycle + " " + fourintensity[0] + " " + fourintensity[1] + " " + fourintensity[2] + " " + fourintensity[3] + " " + (bases.charAt(cycle)) + " " + ((char) asciiseq[cycle]) + " " + bestqual[cycle] + " " + quals[cycle]); + + base_counts[BaseUtils.simpleBaseToBaseIndex(bases.charAt(cycle))][cycle]++; } + + bases_incorrect += (bases.charAt(cycle) == (char) asciiseq[cycle]) ? 0 : 1; + bases_total++; } sfw.addAlignment(constructSAMRecord("KIR_", new String(asciiseq), bestqual, nextbestqual, bread, sfh)); @@ -180,6 +188,8 @@ public class FourBaseRecaller extends CommandLineProgram { } sfw.close(); + System.out.println("Nonmatch rate: " + ((double) bases_incorrect)/((double) bases_total)); + return 0; }