Making SplitReads output FastQ's instead of BAM
- eliminates one step in my pipeline - BAM is too finicky and maintaining parameters that wouldn't be useful was becoming a headache, better avoided.
This commit is contained in:
parent
ae38cf3f72
commit
705cccaf63
|
|
@ -0,0 +1,77 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.io;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintStream;
|
||||
|
||||
/**
|
||||
* User: carneiro
|
||||
* Date: 1/27/13
|
||||
* Time: 12:54 AM
|
||||
*/
|
||||
public class FastqFileWriter {
|
||||
private PrintStream output;
|
||||
|
||||
public FastqFileWriter(String filename) {
|
||||
try {
|
||||
this.output = new PrintStream(filename);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new ReviewedStingException("Can't open file " + filename);
|
||||
}
|
||||
}
|
||||
|
||||
public void addAlignment(GATKSAMRecord read) {
|
||||
output.println("@" + read.getReadName());
|
||||
|
||||
if (read.getReadNegativeStrandFlag()) {
|
||||
output.println(ReadUtils.getBasesReverseComplement(read));
|
||||
output.println("+");
|
||||
output.println(ReadUtils.convertReadQualToString(invertQuals(read.getBaseQualities())));
|
||||
} else {
|
||||
output.println(ReadUtils.convertReadBasesToString(read));
|
||||
output.println("+");
|
||||
output.println(ReadUtils.convertReadQualToString(read));
|
||||
}
|
||||
}
|
||||
|
||||
public void close() {
|
||||
this.output.close();
|
||||
}
|
||||
|
||||
private byte[] invertQuals (byte[] quals) {
|
||||
final int l = quals.length;
|
||||
byte[] invertedQuals = new byte[l];
|
||||
for (int i=0; i<l; i++) {
|
||||
invertedQuals[l-1-i] = quals[i];
|
||||
}
|
||||
return invertedQuals;
|
||||
}
|
||||
}
|
||||
|
|
@ -31,10 +31,7 @@ import org.broadinstitute.sting.utils.NGSPlatform;
|
|||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* @author ebanks, depristo
|
||||
|
|
@ -137,6 +134,13 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
flags, readLen, mateReferenceSequenceIndex, mateAlignmentStart, insertSize, variableLengthBlock);
|
||||
}
|
||||
|
||||
public static GATKSAMRecord createRandomRead(int length) {
|
||||
List<CigarElement> cigarElements = new LinkedList<CigarElement>();
|
||||
cigarElements.add(new CigarElement(length, CigarOperator.M));
|
||||
Cigar cigar = new Cigar(cigarElements);
|
||||
return ArtificialSAMUtils.createArtificialRead(cigar);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// *** The following methods are overloaded to cache the appropriate data ***//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.MathUtils;
|
|||
import org.broadinstitute.sting.utils.NGSPlatform;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
|
@ -850,4 +851,66 @@ public class ReadUtils {
|
|||
return events;
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a read, outputs the read bases in a string format
|
||||
*
|
||||
* @param read the read
|
||||
* @return a string representation of the read bases
|
||||
*/
|
||||
public static String convertReadBasesToString(GATKSAMRecord read) {
|
||||
String bases = "";
|
||||
for (byte b : read.getReadBases()) {
|
||||
bases += (char) b;
|
||||
}
|
||||
return bases.toUpperCase();
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a read, outputs the base qualities in a string format
|
||||
*
|
||||
* @param quals the read qualities
|
||||
* @return a string representation of the base qualities
|
||||
*/
|
||||
public static String convertReadQualToString(byte[] quals) {
|
||||
String result = "";
|
||||
for (byte b : quals) {
|
||||
result += (char) (33 + b);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a read, outputs the base qualities in a string format
|
||||
*
|
||||
* @param read the read
|
||||
* @return a string representation of the base qualities
|
||||
*/
|
||||
public static String convertReadQualToString(GATKSAMRecord read) {
|
||||
return convertReadQualToString(read.getBaseQualities());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the reverse complement of the read bases
|
||||
*
|
||||
* @param bases the read bases
|
||||
* @return the reverse complement of the read bases
|
||||
*/
|
||||
public static String getBasesReverseComplement(byte[] bases) {
|
||||
String reverse = "";
|
||||
for (int i = bases.length-1; i >=0; i--) {
|
||||
reverse += (char) BaseUtils.getComplement(bases[i]);
|
||||
}
|
||||
return reverse;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the reverse complement of the read bases
|
||||
*
|
||||
* @param read the read
|
||||
* @return the reverse complement of the read bases
|
||||
*/
|
||||
public static String getBasesReverseComplement(GATKSAMRecord read) {
|
||||
return getBasesReverseComplement(read.getReadBases());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
package org.broadinstitute.variant.utils;
|
||||
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
@ -497,4 +498,26 @@ public class BaseUtils {
|
|||
|
||||
return randomBaseIndex;
|
||||
}
|
||||
|
||||
public static byte getComplement(byte base) {
|
||||
switch(base) {
|
||||
case 'a':
|
||||
case 'A':
|
||||
return 'T';
|
||||
case 'c':
|
||||
case 'C':
|
||||
return 'G';
|
||||
case 'g':
|
||||
case 'G':
|
||||
return 'C';
|
||||
case 't':
|
||||
case 'T':
|
||||
return 'A';
|
||||
case 'n':
|
||||
case 'N':
|
||||
return 'N';
|
||||
default:
|
||||
throw new ReviewedStingException("base must be A, C, G or T. " + (char) base + " is not a valid base.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,12 +26,15 @@
|
|||
package org.broadinstitute.sting.utils.sam;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
|
||||
|
||||
public class ReadUtilsUnitTest extends BaseTest {
|
||||
|
|
@ -145,4 +148,21 @@ public class ReadUtilsUnitTest extends BaseTest {
|
|||
boundary = get.getAdaptor(read);
|
||||
Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
|
||||
}
|
||||
|
||||
@Test (enabled = true)
|
||||
public void testGetBasesReverseComplement() {
|
||||
int iterations = 1000;
|
||||
Random random = GenomeAnalysisEngine.getRandomGenerator();
|
||||
while(iterations-- > 0) {
|
||||
final int l = random.nextInt(1000);
|
||||
GATKSAMRecord read = GATKSAMRecord.createRandomRead(l);
|
||||
byte [] original = read.getReadBases();
|
||||
byte [] reconverted = new byte[l];
|
||||
String revComp = ReadUtils.getBasesReverseComplement(read);
|
||||
for (int i=0; i<l; i++) {
|
||||
reconverted[l-1-i] = BaseUtils.getComplement((byte) revComp.charAt(i));
|
||||
}
|
||||
Assert.assertEquals(reconverted, original);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue