2009-11-21 01:08:09 +08:00
|
|
|
package org.broadinstitute.sting.alignment.bwa;
|
|
|
|
|
|
|
|
|
|
import org.broadinstitute.sting.utils.StingException;
|
2010-01-03 04:19:14 +08:00
|
|
|
import org.broadinstitute.sting.alignment.reference.packing.PackUtils;
|
|
|
|
|
import org.broadinstitute.sting.alignment.reference.bwt.BWT;
|
|
|
|
|
import org.broadinstitute.sting.alignment.reference.bwt.BWTWriter;
|
|
|
|
|
import org.broadinstitute.sting.alignment.reference.bwt.SuffixArray;
|
|
|
|
|
import org.broadinstitute.sting.alignment.reference.bwt.SuffixArrayWriter;
|
|
|
|
|
import org.broadinstitute.sting.alignment.reference.bwt.ANNWriter;
|
|
|
|
|
import org.broadinstitute.sting.alignment.reference.bwt.AMBWriter;
|
|
|
|
|
|
|
|
|
|
import java.io.File;
|
|
|
|
|
import java.io.IOException;
|
|
|
|
|
|
|
|
|
|
import net.sf.samtools.SAMSequenceDictionary;
|
|
|
|
|
import net.sf.samtools.SAMSequenceRecord;
|
2009-11-21 01:08:09 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Support files for BWT.
|
|
|
|
|
*
|
|
|
|
|
* @author mhanna
|
|
|
|
|
* @version 0.1
|
|
|
|
|
*/
|
|
|
|
|
public class BWTFiles {
|
|
|
|
|
/**
|
|
|
|
|
* ANN (?) file name.
|
|
|
|
|
*/
|
2010-01-03 04:19:14 +08:00
|
|
|
public final File annFile;
|
2009-11-21 01:08:09 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* AMB (?) file name.
|
|
|
|
|
*/
|
2010-01-03 04:19:14 +08:00
|
|
|
public final File ambFile;
|
2009-11-21 01:08:09 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Packed reference sequence file.
|
|
|
|
|
*/
|
2010-01-03 04:19:14 +08:00
|
|
|
public final File pacFile;
|
2009-11-21 01:08:09 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Forward BWT file.
|
|
|
|
|
*/
|
2010-01-03 04:19:14 +08:00
|
|
|
public final File forwardBWTFile;
|
2009-11-21 01:08:09 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Forward suffix array file.
|
|
|
|
|
*/
|
2010-01-03 04:19:14 +08:00
|
|
|
public final File forwardSAFile;
|
2009-11-21 01:08:09 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Reverse BWT file.
|
|
|
|
|
*/
|
2010-01-03 04:19:14 +08:00
|
|
|
public final File reverseBWTFile;
|
2009-11-21 01:08:09 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Reverse suffix array file.
|
|
|
|
|
*/
|
2010-01-03 04:19:14 +08:00
|
|
|
public final File reverseSAFile;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Where these files autogenerated on the fly?
|
|
|
|
|
*/
|
|
|
|
|
private final boolean autogenerated;
|
2009-11-21 01:08:09 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Create a new BWA configuration file using the given prefix.
|
|
|
|
|
* @param prefix Prefix to use when creating the configuration. Must not be null.
|
|
|
|
|
*/
|
|
|
|
|
public BWTFiles(String prefix) {
|
|
|
|
|
if(prefix == null)
|
|
|
|
|
throw new StingException("Prefix must not be null.");
|
2010-01-03 04:19:14 +08:00
|
|
|
annFile = new File(prefix + ".ann");
|
|
|
|
|
ambFile = new File(prefix + ".amb");
|
|
|
|
|
pacFile = new File(prefix + ".pac");
|
|
|
|
|
forwardBWTFile = new File(prefix + ".bwt");
|
|
|
|
|
forwardSAFile = new File(prefix + ".sa");
|
|
|
|
|
reverseBWTFile = new File(prefix + ".rbwt");
|
|
|
|
|
reverseSAFile = new File(prefix + ".rsa");
|
|
|
|
|
autogenerated = false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Hand-create a new BWTFiles object, specifying a unique file object for each type.
|
|
|
|
|
* @param annFile ANN (alternate dictionary) file.
|
|
|
|
|
* @param ambFile AMB (holes) files.
|
|
|
|
|
* @param pacFile Packed representation of the forward reference sequence.
|
|
|
|
|
* @param forwardBWTFile BWT representation of the forward reference sequence.
|
|
|
|
|
* @param forwardSAFile SA representation of the forward reference sequence.
|
|
|
|
|
* @param reverseBWTFile BWT representation of the reversed reference sequence.
|
|
|
|
|
* @param reverseSAFile SA representation of the reversed reference sequence.
|
|
|
|
|
*/
|
|
|
|
|
private BWTFiles(File annFile,
|
|
|
|
|
File ambFile,
|
|
|
|
|
File pacFile,
|
|
|
|
|
File forwardBWTFile,
|
|
|
|
|
File forwardSAFile,
|
|
|
|
|
File reverseBWTFile,
|
|
|
|
|
File reverseSAFile) {
|
|
|
|
|
this.annFile = annFile;
|
|
|
|
|
this.ambFile = ambFile;
|
|
|
|
|
this.pacFile = pacFile;
|
|
|
|
|
this.forwardBWTFile = forwardBWTFile;
|
|
|
|
|
this.forwardSAFile = forwardSAFile;
|
|
|
|
|
this.reverseBWTFile = reverseBWTFile;
|
|
|
|
|
this.reverseSAFile = reverseSAFile;
|
|
|
|
|
autogenerated = true;
|
2009-11-21 01:08:09 +08:00
|
|
|
}
|
2010-01-03 04:19:14 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Close out this files object, in the process deleting any temporary filse
|
|
|
|
|
* that were created.
|
|
|
|
|
*/
|
|
|
|
|
public void close() {
|
|
|
|
|
if(autogenerated) {
|
|
|
|
|
boolean success = true;
|
|
|
|
|
success = annFile.delete();
|
|
|
|
|
success &= ambFile.delete();
|
|
|
|
|
success &= pacFile.delete();
|
|
|
|
|
success &= forwardBWTFile.delete();
|
|
|
|
|
success &= forwardSAFile.delete();
|
|
|
|
|
success &= reverseBWTFile.delete();
|
|
|
|
|
success &= reverseSAFile.delete();
|
|
|
|
|
|
|
|
|
|
if(!success)
|
|
|
|
|
throw new StingException("Unable to clean up autogenerated representation");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Create a new set of BWT files from the given reference sequence.
|
|
|
|
|
* @param referenceSequence Sequence from which to build metadata.
|
|
|
|
|
* @return A new object representing encoded representations of each sequence.
|
|
|
|
|
*/
|
|
|
|
|
public static BWTFiles createFromReferenceSequence(byte[] referenceSequence) {
|
|
|
|
|
File annFile,ambFile,pacFile,bwtFile,saFile,rpacFile,rbwtFile,rsaFile;
|
|
|
|
|
try {
|
|
|
|
|
// Write the ann and amb for this reference sequence.
|
|
|
|
|
annFile = File.createTempFile("bwt","ann");
|
|
|
|
|
ambFile = File.createTempFile("bwt","amb");
|
|
|
|
|
|
|
|
|
|
SAMSequenceDictionary dictionary = new SAMSequenceDictionary();
|
|
|
|
|
dictionary.addSequence(new SAMSequenceRecord("autogenerated",referenceSequence.length));
|
|
|
|
|
|
|
|
|
|
ANNWriter annWriter = new ANNWriter(annFile);
|
|
|
|
|
annWriter.write(dictionary);
|
|
|
|
|
annWriter.close();
|
|
|
|
|
|
|
|
|
|
AMBWriter ambWriter = new AMBWriter(ambFile);
|
|
|
|
|
ambWriter.writeEmpty(dictionary);
|
|
|
|
|
ambWriter.close();
|
|
|
|
|
|
|
|
|
|
// Write the encoded files for the forward version of this reference sequence.
|
|
|
|
|
pacFile = File.createTempFile("bwt","pac");
|
|
|
|
|
bwtFile = File.createTempFile("bwt","bwt");
|
|
|
|
|
saFile = File.createTempFile("bwt","sa");
|
|
|
|
|
|
|
|
|
|
writeEncodedReferenceSequence(referenceSequence,pacFile,bwtFile,saFile);
|
|
|
|
|
|
|
|
|
|
// Write the encoded files for the reverse version of this reference sequence.
|
|
|
|
|
byte[] reverseReferenceSequence = new byte[referenceSequence.length];
|
|
|
|
|
System.arraycopy(referenceSequence,0,reverseReferenceSequence,0,referenceSequence.length);
|
|
|
|
|
|
|
|
|
|
rpacFile = File.createTempFile("bwt","rpac");
|
|
|
|
|
rbwtFile = File.createTempFile("bwt","rbwt");
|
|
|
|
|
rsaFile = File.createTempFile("bwt","rsa");
|
|
|
|
|
|
|
|
|
|
writeEncodedReferenceSequence(reverseReferenceSequence,rpacFile,rbwtFile,rsaFile);
|
|
|
|
|
}
|
|
|
|
|
catch(IOException ex) {
|
|
|
|
|
throw new StingException("Unable to write autogenerated reference sequence to temporary files");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Make sure that, at the very least, all temporary files are deleted on exit.
|
|
|
|
|
annFile.deleteOnExit();
|
|
|
|
|
ambFile.deleteOnExit();
|
|
|
|
|
pacFile.deleteOnExit();
|
|
|
|
|
bwtFile.deleteOnExit();
|
|
|
|
|
saFile.deleteOnExit();
|
|
|
|
|
rpacFile.deleteOnExit();
|
|
|
|
|
rbwtFile.deleteOnExit();
|
|
|
|
|
rsaFile.deleteOnExit();
|
|
|
|
|
|
|
|
|
|
return new BWTFiles(annFile,ambFile,pacFile,bwtFile,saFile,rbwtFile,rsaFile);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Write the encoded form of the reference sequence. In the case of BWA, the encoded reference
|
|
|
|
|
* sequence is the reference itself in PAC format, the BWT, and the suffix array.
|
|
|
|
|
* @param referenceSequence The reference sequence to encode.
|
|
|
|
|
* @param pacFile Target for the PAC-encoded reference.
|
|
|
|
|
* @param bwtFile Target for the BWT representation of the reference.
|
|
|
|
|
* @param suffixArrayFile Target for the suffix array encoding of the reference.
|
|
|
|
|
* @throws java.io.IOException In case of issues writing to the file.
|
|
|
|
|
*/
|
|
|
|
|
private static void writeEncodedReferenceSequence(byte[] referenceSequence,
|
|
|
|
|
File pacFile,
|
|
|
|
|
File bwtFile,
|
|
|
|
|
File suffixArrayFile) throws IOException {
|
|
|
|
|
PackUtils.writeReferenceSequence(pacFile,referenceSequence);
|
|
|
|
|
|
|
|
|
|
BWT bwt = BWT.createFromReferenceSequence(referenceSequence);
|
|
|
|
|
BWTWriter bwtWriter = new BWTWriter(bwtFile);
|
|
|
|
|
bwtWriter.write(bwt);
|
|
|
|
|
bwtWriter.close();
|
|
|
|
|
|
|
|
|
|
SuffixArray suffixArray = SuffixArray.createFromReferenceSequence(referenceSequence);
|
|
|
|
|
SuffixArrayWriter suffixArrayWriter = new SuffixArrayWriter(suffixArrayFile);
|
|
|
|
|
suffixArrayWriter.write(suffixArray);
|
|
|
|
|
suffixArrayWriter.close();
|
|
|
|
|
}
|
2009-11-21 01:08:09 +08:00
|
|
|
}
|