Making SplitReads output FastQ's instead of BAM
- eliminates one step in my pipeline - BAM is too finicky and maintaining parameters that wouldn't be useful was becoming a headache, better avoided.
This commit is contained in:
parent
ae38cf3f72
commit
705cccaf63
|
|
@ -0,0 +1,77 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.io;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
|
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.PrintStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* User: carneiro
|
||||||
|
* Date: 1/27/13
|
||||||
|
* Time: 12:54 AM
|
||||||
|
*/
|
||||||
|
public class FastqFileWriter {
|
||||||
|
private PrintStream output;
|
||||||
|
|
||||||
|
public FastqFileWriter(String filename) {
|
||||||
|
try {
|
||||||
|
this.output = new PrintStream(filename);
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
throw new ReviewedStingException("Can't open file " + filename);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addAlignment(GATKSAMRecord read) {
|
||||||
|
output.println("@" + read.getReadName());
|
||||||
|
|
||||||
|
if (read.getReadNegativeStrandFlag()) {
|
||||||
|
output.println(ReadUtils.getBasesReverseComplement(read));
|
||||||
|
output.println("+");
|
||||||
|
output.println(ReadUtils.convertReadQualToString(invertQuals(read.getBaseQualities())));
|
||||||
|
} else {
|
||||||
|
output.println(ReadUtils.convertReadBasesToString(read));
|
||||||
|
output.println("+");
|
||||||
|
output.println(ReadUtils.convertReadQualToString(read));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() {
|
||||||
|
this.output.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private byte[] invertQuals (byte[] quals) {
|
||||||
|
final int l = quals.length;
|
||||||
|
byte[] invertedQuals = new byte[l];
|
||||||
|
for (int i=0; i<l; i++) {
|
||||||
|
invertedQuals[l-1-i] = quals[i];
|
||||||
|
}
|
||||||
|
return invertedQuals;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -31,10 +31,7 @@ import org.broadinstitute.sting.utils.NGSPlatform;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.*;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author ebanks, depristo
|
* @author ebanks, depristo
|
||||||
|
|
@ -137,6 +134,13 @@ public class GATKSAMRecord extends BAMRecord {
|
||||||
flags, readLen, mateReferenceSequenceIndex, mateAlignmentStart, insertSize, variableLengthBlock);
|
flags, readLen, mateReferenceSequenceIndex, mateAlignmentStart, insertSize, variableLengthBlock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static GATKSAMRecord createRandomRead(int length) {
|
||||||
|
List<CigarElement> cigarElements = new LinkedList<CigarElement>();
|
||||||
|
cigarElements.add(new CigarElement(length, CigarOperator.M));
|
||||||
|
Cigar cigar = new Cigar(cigarElements);
|
||||||
|
return ArtificialSAMUtils.createArtificialRead(cigar);
|
||||||
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// *** The following methods are overloaded to cache the appropriate data ***//
|
// *** The following methods are overloaded to cache the appropriate data ***//
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.NGSPlatform;
|
import org.broadinstitute.sting.utils.NGSPlatform;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.variant.utils.BaseUtils;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
@ -850,4 +851,66 @@ public class ReadUtils {
|
||||||
return events;
|
return events;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given a read, outputs the read bases in a string format
|
||||||
|
*
|
||||||
|
* @param read the read
|
||||||
|
* @return a string representation of the read bases
|
||||||
|
*/
|
||||||
|
public static String convertReadBasesToString(GATKSAMRecord read) {
|
||||||
|
String bases = "";
|
||||||
|
for (byte b : read.getReadBases()) {
|
||||||
|
bases += (char) b;
|
||||||
|
}
|
||||||
|
return bases.toUpperCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given a read, outputs the base qualities in a string format
|
||||||
|
*
|
||||||
|
* @param quals the read qualities
|
||||||
|
* @return a string representation of the base qualities
|
||||||
|
*/
|
||||||
|
public static String convertReadQualToString(byte[] quals) {
|
||||||
|
String result = "";
|
||||||
|
for (byte b : quals) {
|
||||||
|
result += (char) (33 + b);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given a read, outputs the base qualities in a string format
|
||||||
|
*
|
||||||
|
* @param read the read
|
||||||
|
* @return a string representation of the base qualities
|
||||||
|
*/
|
||||||
|
public static String convertReadQualToString(GATKSAMRecord read) {
|
||||||
|
return convertReadQualToString(read.getBaseQualities());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the reverse complement of the read bases
|
||||||
|
*
|
||||||
|
* @param bases the read bases
|
||||||
|
* @return the reverse complement of the read bases
|
||||||
|
*/
|
||||||
|
public static String getBasesReverseComplement(byte[] bases) {
|
||||||
|
String reverse = "";
|
||||||
|
for (int i = bases.length-1; i >=0; i--) {
|
||||||
|
reverse += (char) BaseUtils.getComplement(bases[i]);
|
||||||
|
}
|
||||||
|
return reverse;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the reverse complement of the read bases
|
||||||
|
*
|
||||||
|
* @param read the read
|
||||||
|
* @return the reverse complement of the read bases
|
||||||
|
*/
|
||||||
|
public static String getBasesReverseComplement(GATKSAMRecord read) {
|
||||||
|
return getBasesReverseComplement(read.getReadBases());
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@
|
||||||
package org.broadinstitute.variant.utils;
|
package org.broadinstitute.variant.utils;
|
||||||
|
|
||||||
import net.sf.samtools.util.StringUtil;
|
import net.sf.samtools.util.StringUtil;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
@ -497,4 +498,26 @@ public class BaseUtils {
|
||||||
|
|
||||||
return randomBaseIndex;
|
return randomBaseIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static byte getComplement(byte base) {
|
||||||
|
switch(base) {
|
||||||
|
case 'a':
|
||||||
|
case 'A':
|
||||||
|
return 'T';
|
||||||
|
case 'c':
|
||||||
|
case 'C':
|
||||||
|
return 'G';
|
||||||
|
case 'g':
|
||||||
|
case 'G':
|
||||||
|
return 'C';
|
||||||
|
case 't':
|
||||||
|
case 'T':
|
||||||
|
return 'A';
|
||||||
|
case 'n':
|
||||||
|
case 'N':
|
||||||
|
return 'N';
|
||||||
|
default:
|
||||||
|
throw new ReviewedStingException("base must be A, C, G or T. " + (char) base + " is not a valid base.");
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -26,12 +26,15 @@
|
||||||
package org.broadinstitute.sting.utils.sam;
|
package org.broadinstitute.sting.utils.sam;
|
||||||
|
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
|
import org.broadinstitute.variant.utils.BaseUtils;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.DataProvider;
|
import org.testng.annotations.DataProvider;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
|
||||||
public class ReadUtilsUnitTest extends BaseTest {
|
public class ReadUtilsUnitTest extends BaseTest {
|
||||||
|
|
@ -145,4 +148,21 @@ public class ReadUtilsUnitTest extends BaseTest {
|
||||||
boundary = get.getAdaptor(read);
|
boundary = get.getAdaptor(read);
|
||||||
Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
|
Assert.assertEquals(boundary, ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test (enabled = true)
|
||||||
|
public void testGetBasesReverseComplement() {
|
||||||
|
int iterations = 1000;
|
||||||
|
Random random = GenomeAnalysisEngine.getRandomGenerator();
|
||||||
|
while(iterations-- > 0) {
|
||||||
|
final int l = random.nextInt(1000);
|
||||||
|
GATKSAMRecord read = GATKSAMRecord.createRandomRead(l);
|
||||||
|
byte [] original = read.getReadBases();
|
||||||
|
byte [] reconverted = new byte[l];
|
||||||
|
String revComp = ReadUtils.getBasesReverseComplement(read);
|
||||||
|
for (int i=0; i<l; i++) {
|
||||||
|
reconverted[l-1-i] = BaseUtils.getComplement((byte) revComp.charAt(i));
|
||||||
|
}
|
||||||
|
Assert.assertEquals(reconverted, original);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue