From 705cccaf63c57e42fb3fe7450ba998d85ef324f7 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Sun, 27 Jan 2013 02:36:31 -0500 Subject: [PATCH] Making SplitReads output FastQ's instead of BAM - eliminates one step in my pipeline - BAM is too finicky and maintaining parameters that wouldn't be useful was becoming a headache, better avoided. --- .../sting/gatk/io/FastqFileWriter.java | 77 +++++++++++++++++++ .../sting/utils/sam/GATKSAMRecord.java | 12 ++- .../sting/utils/sam/ReadUtils.java | 63 +++++++++++++++ .../variant/utils/BaseUtils.java | 23 ++++++ .../sting/utils/sam/ReadUtilsUnitTest.java | 20 +++++ 5 files changed, 191 insertions(+), 4 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/io/FastqFileWriter.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/FastqFileWriter.java b/public/java/src/org/broadinstitute/sting/gatk/io/FastqFileWriter.java new file mode 100644 index 000000000..acef3e500 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/io/FastqFileWriter.java @@ -0,0 +1,77 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.gatk.io; + +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.sam.ReadUtils; + +import java.io.FileNotFoundException; +import java.io.PrintStream; + +/** + * User: carneiro + * Date: 1/27/13 + * Time: 12:54 AM + */ +public class FastqFileWriter { + private PrintStream output; + + public FastqFileWriter(String filename) { + try { + this.output = new PrintStream(filename); + } catch (FileNotFoundException e) { + throw new ReviewedStingException("Can't open file " + filename); + } + } + + public void addAlignment(GATKSAMRecord read) { + output.println("@" + read.getReadName()); + + if (read.getReadNegativeStrandFlag()) { + output.println(ReadUtils.getBasesReverseComplement(read)); + output.println("+"); + output.println(ReadUtils.convertReadQualToString(invertQuals(read.getBaseQualities()))); + } else { + output.println(ReadUtils.convertReadBasesToString(read)); + output.println("+"); + output.println(ReadUtils.convertReadQualToString(read)); + } + } + + public void close() { + this.output.close(); + } + + private byte[] invertQuals (byte[] quals) { + final int l = quals.length; + byte[] invertedQuals = new byte[l]; + for (int i=0; i cigarElements = new LinkedList(); + cigarElements.add(new CigarElement(length, CigarOperator.M)); + Cigar cigar = new Cigar(cigarElements); + return ArtificialSAMUtils.createArtificialRead(cigar); + } + /////////////////////////////////////////////////////////////////////////////// // *** The following methods are overloaded to cache the appropriate data ***// /////////////////////////////////////////////////////////////////////////////// diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index c17e81b9c..29f8c8dcd 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.NGSPlatform; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.variant.utils.BaseUtils; import java.io.File; import java.util.*; @@ -850,4 +851,66 @@ public class ReadUtils { return events; } + /** + * Given a read, outputs the read bases in a string format + * + * @param read the read + * @return a string representation of the read bases + */ + public static String convertReadBasesToString(GATKSAMRecord read) { + String bases = ""; + for (byte b : read.getReadBases()) { + bases += (char) b; + } + return bases.toUpperCase(); + } + + /** + * Given a read, outputs the base qualities in a string format + * + * @param quals the read qualities + * @return a string representation of the base qualities + */ + public static String convertReadQualToString(byte[] quals) { + String result = ""; + for (byte b : quals) { + result += (char) (33 + b); + } + return result; + } + + /** + * Given a read, outputs the base qualities in a string format + * + * @param read the read + * @return a string representation of the base qualities + */ + public static String convertReadQualToString(GATKSAMRecord read) { + return convertReadQualToString(read.getBaseQualities()); + } + + /** + * Returns the reverse complement of the read bases + * + * @param bases the read bases + * @return the reverse complement of the read bases + */ + public static String getBasesReverseComplement(byte[] bases) { + String reverse = ""; + for (int i = bases.length-1; i >=0; i--) { + reverse += (char) BaseUtils.getComplement(bases[i]); + } + return reverse; + } + + /** + * Returns the reverse complement of the read bases + * + * @param read the read + * @return the reverse complement of the read bases + */ + public static String getBasesReverseComplement(GATKSAMRecord read) { + return getBasesReverseComplement(read.getReadBases()); + } + } diff --git a/public/java/src/org/broadinstitute/variant/utils/BaseUtils.java b/public/java/src/org/broadinstitute/variant/utils/BaseUtils.java index a6ac2ca53..bbd02e0a6 100644 --- a/public/java/src/org/broadinstitute/variant/utils/BaseUtils.java +++ b/public/java/src/org/broadinstitute/variant/utils/BaseUtils.java @@ -26,6 +26,7 @@ package org.broadinstitute.variant.utils; import net.sf.samtools.util.StringUtil; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import java.util.Arrays; @@ -497,4 +498,26 @@ public class BaseUtils { return randomBaseIndex; } + + public static byte getComplement(byte base) { + switch(base) { + case 'a': + case 'A': + return 'T'; + case 'c': + case 'C': + return 'G'; + case 'g': + case 'G': + return 'C'; + case 't': + case 'T': + return 'A'; + case 'n': + case 'N': + return 'N'; + default: + throw new ReviewedStingException("base must be A, C, G or T. " + (char) base + " is not a valid base."); + } + } } diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java index 7b48b1b69..20971643e 100644 --- a/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java @@ -26,12 +26,15 @@ package org.broadinstitute.sting.utils.sam; import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.variant.utils.BaseUtils; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.util.LinkedList; import java.util.List; +import java.util.Random; public class ReadUtilsUnitTest extends BaseTest { @@ -145,4 +148,21 @@ public class ReadUtilsUnitTest extends BaseTest { boundary = get.getAdaptor(read); Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY); } + + @Test (enabled = true) + public void testGetBasesReverseComplement() { + int iterations = 1000; + Random random = GenomeAnalysisEngine.getRandomGenerator(); + while(iterations-- > 0) { + final int l = random.nextInt(1000); + GATKSAMRecord read = GATKSAMRecord.createRandomRead(l); + byte [] original = read.getReadBases(); + byte [] reconverted = new byte[l]; + String revComp = ReadUtils.getBasesReverseComplement(read); + for (int i=0; i