Making SplitReads output FastQ's instead of BAM

- eliminates one step in my pipeline
   - BAM is too finicky and maintaining parameters that wouldn't be useful was becoming a headache, better avoided.
This commit is contained in:
Mauricio Carneiro 2013-01-27 02:36:31 -05:00
parent ae38cf3f72
commit 705cccaf63
5 changed files with 191 additions and 4 deletions

View File

@ -0,0 +1,77 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.io;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.io.FileNotFoundException;
import java.io.PrintStream;
/**
* User: carneiro
* Date: 1/27/13
* Time: 12:54 AM
*/
public class FastqFileWriter {
private PrintStream output;
public FastqFileWriter(String filename) {
try {
this.output = new PrintStream(filename);
} catch (FileNotFoundException e) {
throw new ReviewedStingException("Can't open file " + filename);
}
}
public void addAlignment(GATKSAMRecord read) {
output.println("@" + read.getReadName());
if (read.getReadNegativeStrandFlag()) {
output.println(ReadUtils.getBasesReverseComplement(read));
output.println("+");
output.println(ReadUtils.convertReadQualToString(invertQuals(read.getBaseQualities())));
} else {
output.println(ReadUtils.convertReadBasesToString(read));
output.println("+");
output.println(ReadUtils.convertReadQualToString(read));
}
}
public void close() {
this.output.close();
}
private byte[] invertQuals (byte[] quals) {
final int l = quals.length;
byte[] invertedQuals = new byte[l];
for (int i=0; i<l; i++) {
invertedQuals[l-1-i] = quals[i];
}
return invertedQuals;
}
}

View File

@ -31,10 +31,7 @@ import org.broadinstitute.sting.utils.NGSPlatform;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.recalibration.EventType;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
/**
* @author ebanks, depristo
@ -137,6 +134,13 @@ public class GATKSAMRecord extends BAMRecord {
flags, readLen, mateReferenceSequenceIndex, mateAlignmentStart, insertSize, variableLengthBlock);
}
public static GATKSAMRecord createRandomRead(int length) {
List<CigarElement> cigarElements = new LinkedList<CigarElement>();
cigarElements.add(new CigarElement(length, CigarOperator.M));
Cigar cigar = new Cigar(cigarElements);
return ArtificialSAMUtils.createArtificialRead(cigar);
}
///////////////////////////////////////////////////////////////////////////////
// *** The following methods are overloaded to cache the appropriate data ***//
///////////////////////////////////////////////////////////////////////////////

View File

@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.NGSPlatform;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.variant.utils.BaseUtils;
import java.io.File;
import java.util.*;
@ -850,4 +851,66 @@ public class ReadUtils {
return events;
}
/**
* Given a read, outputs the read bases in a string format
*
* @param read the read
* @return a string representation of the read bases
*/
public static String convertReadBasesToString(GATKSAMRecord read) {
String bases = "";
for (byte b : read.getReadBases()) {
bases += (char) b;
}
return bases.toUpperCase();
}
/**
* Given a read, outputs the base qualities in a string format
*
* @param quals the read qualities
* @return a string representation of the base qualities
*/
public static String convertReadQualToString(byte[] quals) {
String result = "";
for (byte b : quals) {
result += (char) (33 + b);
}
return result;
}
/**
* Given a read, outputs the base qualities in a string format
*
* @param read the read
* @return a string representation of the base qualities
*/
public static String convertReadQualToString(GATKSAMRecord read) {
return convertReadQualToString(read.getBaseQualities());
}
/**
* Returns the reverse complement of the read bases
*
* @param bases the read bases
* @return the reverse complement of the read bases
*/
public static String getBasesReverseComplement(byte[] bases) {
String reverse = "";
for (int i = bases.length-1; i >=0; i--) {
reverse += (char) BaseUtils.getComplement(bases[i]);
}
return reverse;
}
/**
* Returns the reverse complement of the read bases
*
* @param read the read
* @return the reverse complement of the read bases
*/
public static String getBasesReverseComplement(GATKSAMRecord read) {
return getBasesReverseComplement(read.getReadBases());
}
}

View File

@ -26,6 +26,7 @@
package org.broadinstitute.variant.utils;
import net.sf.samtools.util.StringUtil;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.util.Arrays;
@ -497,4 +498,26 @@ public class BaseUtils {
return randomBaseIndex;
}
public static byte getComplement(byte base) {
switch(base) {
case 'a':
case 'A':
return 'T';
case 'c':
case 'C':
return 'G';
case 'g':
case 'G':
return 'C';
case 't':
case 'T':
return 'A';
case 'n':
case 'N':
return 'N';
default:
throw new ReviewedStingException("base must be A, C, G or T. " + (char) base + " is not a valid base.");
}
}
}

View File

@ -26,12 +26,15 @@
package org.broadinstitute.sting.utils.sam;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.variant.utils.BaseUtils;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;
public class ReadUtilsUnitTest extends BaseTest {
@ -145,4 +148,21 @@ public class ReadUtilsUnitTest extends BaseTest {
boundary = get.getAdaptor(read);
Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
}
@Test (enabled = true)
public void testGetBasesReverseComplement() {
int iterations = 1000;
Random random = GenomeAnalysisEngine.getRandomGenerator();
while(iterations-- > 0) {
final int l = random.nextInt(1000);
GATKSAMRecord read = GATKSAMRecord.createRandomRead(l);
byte [] original = read.getReadBases();
byte [] reconverted = new byte[l];
String revComp = ReadUtils.getBasesReverseComplement(read);
for (int i=0; i<l; i++) {
reconverted[l-1-i] = BaseUtils.getComplement((byte) revComp.charAt(i));
}
Assert.assertEquals(reconverted, original);
}
}
}