diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounter.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounter.java index 4d511bce0..157d2a66c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounter.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounter.java @@ -109,13 +109,13 @@ public class CovariateCounter { * @param ref * @return */ - public int updateDataFromRead( String rg, SAMRecord read, int offset, char ref ) { + public int updateDataFromRead( String rg, SAMRecord read, int offset, char ref, boolean useOriginalQuals ) { if ( offset == 0 ) throw new RuntimeException("Illegal read offset " + offset + " in read " + read.getReadName()); int cycle = offset; byte[] bases = read.getReadBases(); - byte[] quals = read.getBaseQualities(); + byte[] quals = RecalDataManager.getQualsForRecalibration(read, useOriginalQuals); char base = (char)bases[offset]; char prevBase = (char)bases[offset - 1]; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java index 7c877c6c2..9878dbfc0 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java @@ -41,6 +41,10 @@ public class CovariateCounterWalker extends LocusWalker { //@Argument(fullName="collapseDinuc", shortName="collapseDinuc", required=false, doc="") public boolean collapseDinuc = false; + @Argument(fullName = "useOriginalQuals", shortName="OQ", doc="If provided, we will use use the quals from the original qualities OQ attribute field instead of the quals in the regular QUALS field", required=false) + public boolean useOriginalQuals = false; + + private CovariateCounter covariateCounter = null; private long counted_sites = 0; // number of sites used to count covariates @@ -109,7 +113,7 @@ public class CovariateCounterWalker extends LocusWalker { if ((read.getMappingQuality() >= MIN_MAPPING_QUALITY && isSupportedReadGroup(readGroup) )) { int offset = offsets.get(i); if ( offset > 0 && offset < (read.getReadLength() - 1) ) { // skip first and last bases because they suck and they don't have a dinuc count - counted_bases += covariateCounter.updateDataFromRead(readGroupString, read, offset, ref.getBase()); + counted_bases += covariateCounter.updateDataFromRead(readGroupString, read, offset, ref.getBase(), useOriginalQuals); } } } @@ -235,33 +239,6 @@ public class CovariateCounterWalker extends LocusWalker { out.printf("# fraction_skipped 1 / %.0f bp%n", (double)counted_sites / skipped_sites); } - @Deprecated - private void writeLogisticRecalibrationTable() { - PrintStream dinuc_out = null; - try { - dinuc_out = new PrintStream( OUTPUT_FILEROOT+".covariate_counts.csv"); - dinuc_out.println("rg,dn,logitQ,pos,indicator,count"); - for (String readGroup : covariateCounter.getReadGroups()) { - for ( int dinuc_index=0; dinuc_index 0) - dinuc_out.format("%s,%s,%d,%d,%d,%d%n", readGroup, RecalData.dinucIndex2bases(dinuc_index), datum.qual, datum.pos, 0, datum.N - datum.B); - if (datum.B > 0) - dinuc_out.format("%s,%s,%d,%d,%d,%d%n", readGroup, RecalData.dinucIndex2bases(dinuc_index), datum.qual, datum.pos, 1, datum.B); - } - } - } - } - } - catch (FileNotFoundException e) { - System.err.println("FileNotFoundException: " + e.getMessage()); - } - finally { - if (dinuc_out != null) dinuc_out.close(); - } - } - /** * Writes out the key recalibration data collected from the reads. Dumps this recalibration data * as a CVS string to the recalTableOut PrintStream. Emits the data for all read groups into this file. diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java index 4950a91eb..c976ef4c8 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java @@ -1,9 +1,12 @@ package org.broadinstitute.sting.gatk.walkers.recalibration; import org.broadinstitute.sting.utils.ExpandingArrayList; +import org.broadinstitute.sting.utils.QualityUtils; import java.util.*; +import net.sf.samtools.SAMRecord; + /** * Created by IntelliJ IDEA. * User: mdepristo @@ -12,6 +15,9 @@ import java.util.*; * To change this template use File | Settings | File Templates. */ public class RecalDataManager { + /** The original quality scores are stored in this attribute */ + public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; + ArrayList flattenData = new ArrayList(); boolean trackPos, trackDinuc; String readGroup; @@ -114,10 +120,6 @@ public class RecalDataManager { return d3 == null ? null : d3.get(dinucIndex); } - //private RecalData get(int posIndex, int qual, int dinucIndex) { - // return data[posIndex][qual][dinucIndex]; - //} - private void set(int posIndex, int qual, int dinucIndex, RecalData datum) { // grow data if necessary ExpandingArrayList> d2 = data.get(posIndex); @@ -137,31 +139,6 @@ public class RecalDataManager { d3.set(dinucIndex, datum); } - //private void set(int posIndex, int qual, int dinucIndex, RecalData datum) { - // data[posIndex][qual][dinucIndex] = datum; - //} - - - /* public List select(int pos, int qual, int dinuc_index ) { - List l = new LinkedList(); - for ( int i = 0; i < data.length; i++ ) { - if ( i == pos || pos == -1 || ! trackPos ) { - for ( int j = 0; j < data[i].length; j++ ) { - if ( j == qual || qual == -1 ) { - for ( int k = 0; k < data[i][j].length; k++ ) { - if ( k == dinuc_index|| dinuc_index == -1 || ! trackDinuc ) { - l.add(data[i][j][k]); - } - } - } - } - } - } - - return l; - } - */ - private RecalData getMatchingDatum(List l, RecalData datum, boolean combinePos, boolean combineQual, boolean combineDinuc) { for ( RecalData find : l ) { @@ -199,5 +176,20 @@ public class RecalDataManager { public List getAll() { return flattenData; } + + // we can process the OQ field if requested + public static byte[] getQualsForRecalibration( SAMRecord read, boolean useOriginalQuals ) { + byte[] quals = read.getBaseQualities(); + if ( useOriginalQuals && read.getAttribute(ORIGINAL_QUAL_ATTRIBUTE_TAG) != null ) { + Object obj = read.getAttribute(ORIGINAL_QUAL_ATTRIBUTE_TAG); + if ( obj instanceof String ) + quals = QualityUtils.fastqToPhred((String)obj); + else { + throw new RuntimeException(String.format("Value encoded by %s in %s isn't a string!", ORIGINAL_QUAL_ATTRIBUTE_TAG, read.getReadName())); + } + } + + return quals; + } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index 08a09f4d1..53972aa0a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -39,6 +39,7 @@ import java.util.regex.Pattern; import java.util.regex.Matcher; import java.io.File; import java.io.FileNotFoundException; +import java.lang.reflect.Method; @WalkerName("TableRecalibration") @Requires({DataSource.READS, DataSource.REFERENCE}) @@ -52,14 +53,17 @@ public class TableRecalibrationWalker extends ReadWalker [prevBase x base -> [cycle, qual, new qual]] HashMap cache = new HashMap(); @@ -82,10 +86,9 @@ public class TableRecalibrationWalker extends ReadWalker %d%n", originalQuals[i], recalQuals[i]); - if ( originalQuals[i] == 0 ) { - //System.out.printf("Preserving Q0 base at %d in read %s%n", i, read.getReadName()); - recalQuals[i] = 0; - } + private void preserveQScores( byte[] originalQuals, byte[] recalQuals, SAMRecord read ) { + for ( int i = 0; i < recalQuals.length; i++ ) { + if ( originalQuals[i] < preserveQScoresLessThan ) { + System.out.printf("Preserving Q%d base at %d in read %s%n", originalQuals[i], i, read.getReadName()); + recalQuals[i] = originalQuals[i]; } } } @@ -528,12 +530,6 @@ class SerialRecalMapping implements RecalMapping { readGroup, prevBase, base, cycle, qual, qual, globalDeltaQ, deltaQPos, deltaQDinuc, newQualByte)); } - //if ( printStateP(pos, RecalData.dinucIndex2bases(index), qual) ) - // System.out.printf("%s %c%c %d %d => %d + %.2f + %.2f + %.2f + %.2f = %d%n", - // readGroup, prevBase, base, cycle, qual, - // qual, globalDeltaQ, deltaQual, deltaQPos, deltaQDinuc, - // newQualByte); - return newQualByte; } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/QualityUtils.java b/java/src/org/broadinstitute/sting/utils/QualityUtils.java index fdf7fbbfd..6fae2a380 100755 --- a/java/src/org/broadinstitute/sting/utils/QualityUtils.java +++ b/java/src/org/broadinstitute/sting/utils/QualityUtils.java @@ -206,4 +206,73 @@ public class QualityUtils { return rcCompressedQuals; } + + // TODO -- + // TODO -- + // TODO -- stolen from SAMUtils in picard -- remove when public access is granted + // TODO -- + // TODO -- + + /** + * Convert an array of bytes, in which each byte is a binary phred quality score, to + * printable ASCII representation of the quality scores, ala FASTQ format. + * @param buffer Array of bytes in which each byte is a binar phred score. + * @param offset Where in buffer to start conversion. + * @param length How many bytes of buffer to convert. + * @return String with ASCII representation of those quality scores. + */ + public static String phredToFastq(final byte[] buffer, final int offset, final int length) { + final char[] chars = new char[length]; + for (int i = 0; i < length; i++) { + chars[i] = phredToFastq(buffer[offset+i] & 0xFF); + } + return new String(chars); + } + + /** convenience -- should be pushed back into Picard */ + public static String phredToFastq(final byte[] buffer) { + return phredToFastq(buffer, 0, buffer.length); + } + + /** + * Convert a single binary phred score to printable ASCII representation, ala FASTQ format. + * @param phredScore binary phred score. + * @return Printable ASCII representation of phred score. + */ + public static char phredToFastq(final int phredScore) { + if (phredScore < 0 || phredScore > 63) { + throw new IllegalArgumentException("Cannot encode phred score: " + phredScore); + } + return (char) (33 + phredScore); + } + + /** + * Convert a string with phred scores in printable ASCII FASTQ format to an array + * of binary phred scores. + * @param fastq Phred scores in FASTQ printable ASCII format. + * @return byte array of binary phred scores in which each byte corresponds to a character in the input string. + */ + public static byte[] fastqToPhred(final String fastq) { + if (fastq == null) { + return null; + } + final int length = fastq.length(); + final byte[] scores = new byte[length]; + for (int i = 0; i < length; i++) { + scores[i] = (byte) fastqToPhred(fastq.charAt(i)); + } + return scores; + } + + /** + * Convert a single printable ASCII FASTQ format phred score to binary phred score. + * @param ch Printable ASCII FASTQ format phred score. + * @return Binary phred score. + */ + public static int fastqToPhred(final char ch) { + if (ch < 33 || ch > 126) { + throw new IllegalArgumentException("Invalid fastq character: " + ch); + } + return (ch - 33); + } } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterTest.java index d02bf8f45..c721de1e8 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterTest.java @@ -93,7 +93,7 @@ public class CovariateCounterTest extends BaseTest { @Test public void testOneRead() { for ( int i = 1; i < read1.getReadBases().length; i++ ) - c.updateDataFromRead(readGroup1, read1, i, (char)read1.getReadBases()[i]); + c.updateDataFromRead(readGroup1, read1, i, (char)read1.getReadBases()[i], false); c.printState(); Assert.assertEquals("Incorrect mapping to recal bin", c.getRecalData(readGroup1, 0, quals1[0], 'A', (char)bases1[0]).N, 0); @@ -112,9 +112,9 @@ public class CovariateCounterTest extends BaseTest { @Test public void testTwoReads() { for ( int i = 1; i < read1.getReadBases().length; i++ ) - c.updateDataFromRead(readGroup1, read1, i, (char)read1.getReadBases()[i]); + c.updateDataFromRead(readGroup1, read1, i, (char)read1.getReadBases()[i], false); for ( int i = 1; i < read2.getReadBases().length; i++ ) - c.updateDataFromRead(readGroup2, read2, i, (char)read2.getReadBases()[i]); + c.updateDataFromRead(readGroup2, read2, i, (char)read2.getReadBases()[i], false); c.printState(); Assert.assertEquals("Incorrect mapping to recal bin", c.getRecalData(readGroup1, 0, quals1[0], 'A', (char)bases1[0]).N, 0); @@ -133,9 +133,9 @@ public class CovariateCounterTest extends BaseTest { @Test public void testTwoReadsSameGroup() { for ( int i = 1; i < read1.getReadBases().length; i++ ) - c.updateDataFromRead(readGroup1, read1, i, (char)read1.getReadBases()[i]); + c.updateDataFromRead(readGroup1, read1, i, (char)read1.getReadBases()[i], false); for ( int i = 1; i < read2.getReadBases().length; i++ ) - c.updateDataFromRead(readGroup1, read1, i, (char)read1.getReadBases()[i]); + c.updateDataFromRead(readGroup1, read1, i, (char)read1.getReadBases()[i], false); c.printState(); for ( int i = 1; i < bases1.length; i++ ) { @@ -153,9 +153,9 @@ public class CovariateCounterTest extends BaseTest { @Test public void testTwoReadsSameGroupNotIdentical() { for ( int i = 1; i < read1.getReadBases().length; i++ ) - c.updateDataFromRead(readGroup1, read1, i, (char)read1.getReadBases()[i]); + c.updateDataFromRead(readGroup1, read1, i, (char)read1.getReadBases()[i], false); for ( int i = 1; i < read3.getReadBases().length; i++ ) - c.updateDataFromRead(readGroup1, read3, i, (char)read3.getReadBases()[i]); + c.updateDataFromRead(readGroup1, read3, i, (char)read3.getReadBases()[i], false); c.printState(); for ( int i = 1; i < bases1.length; i++ ) { @@ -177,6 +177,6 @@ public class CovariateCounterTest extends BaseTest { SAMRecord read = ArtificialSAMUtils.createArtificialRead(header,"read1",1,1, bases, quals); - c.updateDataFromRead(readGroup1, read, 0, (char)bases[0]); + c.updateDataFromRead(readGroup1, read, 0, (char)bases[0], false); } } \ No newline at end of file