From 3761c0900b9f095a3bd51d473244707f4397dd0f Mon Sep 17 00:00:00 2001 From: kiran Date: Thu, 14 May 2009 18:01:41 +0000 Subject: [PATCH] Added Bustard vs. Four-prob percent bases consistent output. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@710 348d0f76-0448-11de-a6fe-93d51630548a --- .../secondarybase/AnnotateSecondaryBase.java | 30 +++++++++++++++++-- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/java/src/org/broadinstitute/sting/secondarybase/AnnotateSecondaryBase.java b/java/src/org/broadinstitute/sting/secondarybase/AnnotateSecondaryBase.java index 010455ce7..04aba3f40 100755 --- a/java/src/org/broadinstitute/sting/secondarybase/AnnotateSecondaryBase.java +++ b/java/src/org/broadinstitute/sting/secondarybase/AnnotateSecondaryBase.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.secondarybase; import org.broadinstitute.sting.utils.cmdLine.CommandLineProgram; import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.secondarybase.BasecallingReadModel; import java.io.File; @@ -38,37 +39,60 @@ public class AnnotateSecondaryBase extends CommandLineProgram { if (SAM_IN == null || !SAM_IN.exists()) { // Iterate through raw Firecrest data and store the first N reads up to TRAINING_LIMIT - System.out.println("Training from the first " + TRAINING_LIMIT + " reads in the raw data."); + System.out.println("Loading training set from the first " + TRAINING_LIMIT + " reads in the raw data..."); trainingSet.loadFirstNUnambiguousReadsTrainingSet(); } else { // Find alignments with zero mismatches and store them until we've picked up TRAINING_LIMIT alignments - System.out.println("Training from the first " + TRAINING_LIMIT + " perfect reads in the aligned data."); + System.out.println("Loading training set from the first " + TRAINING_LIMIT + " perfect reads in the aligned data..."); trainingSet.loadPreAlignedTrainingSet(SAM_IN, REFERENCE); } // Iterate through the stored training data and add the info to the BasecallingReadModel + System.out.println("Applying training set..."); BasecallingReadModel model = new BasecallingReadModel(CYCLE_END - CYCLE_BEGIN + 1, CONTEXT); model.train(trainingSet); // Call bases and write results + System.out.println("Calling bases and writing SAM file..."); SAMFileHeader sfh = new SAMFileHeader(); SAMFileWriter sfw = new SAMFileWriterFactory().makeSAMOrBAMWriter(sfh, false, SAM_OUT); IlluminaParser iparser = new IlluminaParser(BUSTARD_DIR, LANE, CYCLE_BEGIN, CYCLE_END); + int basesConsistent = 0, basesTotal = 0; + RawRead rr; while ((rr = iparser.next()) != null) { FourProbRead fpr = model.call(rr); SAMRecord sr = constructSAMRecord(rr, fpr, sfh, RUN_BARCODE); sfw.addAlignment(sr); + + for (int cycle = 0; cycle < fpr.size(); cycle++) { + int rawBaseIndex = BaseUtils.simpleBaseToBaseIndex((char) rr.getSequence()[cycle]); + int fpBaseIndex = fpr.get(cycle).indexAtRank(0); + + if (rawBaseIndex >= 0 && fpBaseIndex >= 0) { + basesTotal++; + + if (rawBaseIndex == fpBaseIndex) { + basesConsistent++; + } + } + } + + if (basesTotal % 10000 == 0 && basesTotal > 0) { + System.out.printf("%% bases consistent: %d/%d (%4.4f)\r", basesConsistent, basesTotal, ((double) basesConsistent)/((double) basesTotal)); + } } iparser.close(); sfw.close(); - return 0; + System.out.println("Done."); + + return 0; } private SAMRecord constructSAMRecord(RawRead rr, FourProbRead fpr, SAMFileHeader sfh, String runBarcode) {