Added very primitive read fishing walker with lots of hard coding. Fixed
bugs encountered when testing read fishing in Ecoli. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2496 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
7b702b086f
commit
29c129aced
|
|
@ -1,6 +1,7 @@
|
||||||
package org.broadinstitute.sting.alignment.bwa;
|
package org.broadinstitute.sting.alignment.bwa;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.alignment.reference.packing.PackUtils;
|
import org.broadinstitute.sting.alignment.reference.packing.PackUtils;
|
||||||
import org.broadinstitute.sting.alignment.reference.bwt.BWT;
|
import org.broadinstitute.sting.alignment.reference.bwt.BWT;
|
||||||
import org.broadinstitute.sting.alignment.reference.bwt.BWTWriter;
|
import org.broadinstitute.sting.alignment.reference.bwt.BWTWriter;
|
||||||
|
|
@ -14,6 +15,7 @@ import java.io.IOException;
|
||||||
|
|
||||||
import net.sf.samtools.SAMSequenceDictionary;
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
|
import net.sf.samtools.util.StringUtil;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Support files for BWT.
|
* Support files for BWT.
|
||||||
|
|
@ -132,14 +134,18 @@ public class BWTFiles {
|
||||||
* @return A new object representing encoded representations of each sequence.
|
* @return A new object representing encoded representations of each sequence.
|
||||||
*/
|
*/
|
||||||
public static BWTFiles createFromReferenceSequence(byte[] referenceSequence) {
|
public static BWTFiles createFromReferenceSequence(byte[] referenceSequence) {
|
||||||
|
byte[] normalizedReferenceSequence = new byte[referenceSequence.length];
|
||||||
|
System.arraycopy(referenceSequence,0,normalizedReferenceSequence,0,referenceSequence.length);
|
||||||
|
normalizeReferenceSequence(normalizedReferenceSequence);
|
||||||
|
|
||||||
File annFile,ambFile,pacFile,bwtFile,saFile,rpacFile,rbwtFile,rsaFile;
|
File annFile,ambFile,pacFile,bwtFile,saFile,rpacFile,rbwtFile,rsaFile;
|
||||||
try {
|
try {
|
||||||
// Write the ann and amb for this reference sequence.
|
// Write the ann and amb for this reference sequence.
|
||||||
annFile = File.createTempFile("bwt","ann");
|
annFile = File.createTempFile("bwt",".ann");
|
||||||
ambFile = File.createTempFile("bwt","amb");
|
ambFile = File.createTempFile("bwt",".amb");
|
||||||
|
|
||||||
SAMSequenceDictionary dictionary = new SAMSequenceDictionary();
|
SAMSequenceDictionary dictionary = new SAMSequenceDictionary();
|
||||||
dictionary.addSequence(new SAMSequenceRecord("autogenerated",referenceSequence.length));
|
dictionary.addSequence(new SAMSequenceRecord("autogenerated",normalizedReferenceSequence.length));
|
||||||
|
|
||||||
ANNWriter annWriter = new ANNWriter(annFile);
|
ANNWriter annWriter = new ANNWriter(annFile);
|
||||||
annWriter.write(dictionary);
|
annWriter.write(dictionary);
|
||||||
|
|
@ -150,19 +156,18 @@ public class BWTFiles {
|
||||||
ambWriter.close();
|
ambWriter.close();
|
||||||
|
|
||||||
// Write the encoded files for the forward version of this reference sequence.
|
// Write the encoded files for the forward version of this reference sequence.
|
||||||
pacFile = File.createTempFile("bwt","pac");
|
pacFile = File.createTempFile("bwt",".pac");
|
||||||
bwtFile = File.createTempFile("bwt","bwt");
|
bwtFile = File.createTempFile("bwt",".bwt");
|
||||||
saFile = File.createTempFile("bwt","sa");
|
saFile = File.createTempFile("bwt",".sa");
|
||||||
|
|
||||||
writeEncodedReferenceSequence(referenceSequence,pacFile,bwtFile,saFile);
|
writeEncodedReferenceSequence(normalizedReferenceSequence,pacFile,bwtFile,saFile);
|
||||||
|
|
||||||
// Write the encoded files for the reverse version of this reference sequence.
|
// Write the encoded files for the reverse version of this reference sequence.
|
||||||
byte[] reverseReferenceSequence = new byte[referenceSequence.length];
|
byte[] reverseReferenceSequence = BaseUtils.reverse(normalizedReferenceSequence);
|
||||||
System.arraycopy(referenceSequence,0,reverseReferenceSequence,0,referenceSequence.length);
|
|
||||||
|
|
||||||
rpacFile = File.createTempFile("bwt","rpac");
|
rpacFile = File.createTempFile("bwt",".rpac");
|
||||||
rbwtFile = File.createTempFile("bwt","rbwt");
|
rbwtFile = File.createTempFile("bwt",".rbwt");
|
||||||
rsaFile = File.createTempFile("bwt","rsa");
|
rsaFile = File.createTempFile("bwt",".rsa");
|
||||||
|
|
||||||
writeEncodedReferenceSequence(reverseReferenceSequence,rpacFile,rbwtFile,rsaFile);
|
writeEncodedReferenceSequence(reverseReferenceSequence,rpacFile,rbwtFile,rsaFile);
|
||||||
}
|
}
|
||||||
|
|
@ -208,4 +213,18 @@ public class BWTFiles {
|
||||||
suffixArrayWriter.write(suffixArray);
|
suffixArrayWriter.write(suffixArray);
|
||||||
suffixArrayWriter.close();
|
suffixArrayWriter.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert the given reference sequence into a form suitable for building into
|
||||||
|
* on-the-fly sequences.
|
||||||
|
* @param referenceSequence The reference sequence to normalize.
|
||||||
|
* @throws StingException if normalized sequence cannot be generated.
|
||||||
|
*/
|
||||||
|
private static void normalizeReferenceSequence(byte[] referenceSequence) {
|
||||||
|
StringUtil.toUpperCase(referenceSequence);
|
||||||
|
for(byte base: referenceSequence) {
|
||||||
|
if(base != 'A' && base != 'C' && base != 'G' && base != 'T')
|
||||||
|
throw new StingException(String.format("Base type %c is not supported when building references on-the-fly",(char)base));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -45,7 +45,7 @@ public class SuffixArrayWriter {
|
||||||
uintPackedOutputStream.write(1);
|
uintPackedOutputStream.write(1);
|
||||||
// Length of the suffix array.
|
// Length of the suffix array.
|
||||||
uintPackedOutputStream.write(suffixArray.length()-1);
|
uintPackedOutputStream.write(suffixArray.length()-1);
|
||||||
uintPackedOutputStream.write(suffixArray.sequence,1,suffixArray.sequence.length);
|
uintPackedOutputStream.write(suffixArray.sequence,1,suffixArray.sequence.length-1);
|
||||||
}
|
}
|
||||||
catch( IOException ex ) {
|
catch( IOException ex ) {
|
||||||
throw new StingException("Unable to read BWT from input stream.", ex);
|
throw new StingException("Unable to read BWT from input stream.", ex);
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,78 @@
|
||||||
|
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
|
import org.broadinstitute.sting.alignment.bwa.BWAAligner;
|
||||||
|
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
|
||||||
|
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
|
||||||
|
import org.broadinstitute.sting.alignment.Alignment;
|
||||||
|
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
|
import net.sf.picard.reference.ReferenceSequence;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A walker to experiment with fishing for reads in the GATK. Has very limited utility in its current state.
|
||||||
|
*
|
||||||
|
* @author mhanna
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
public class TestReadFishingWalker extends ReadWalker<Integer,Long> {
|
||||||
|
/**
|
||||||
|
* An aligner for the small custom reference.
|
||||||
|
*/
|
||||||
|
private BWAAligner aligner;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void initialize() {
|
||||||
|
ReferenceSequence initialBases;
|
||||||
|
|
||||||
|
try {
|
||||||
|
IndexedFastaSequenceFile reader = new IndexedFastaSequenceFile(getToolkit().getArguments().referenceFile);
|
||||||
|
SAMSequenceRecord firstSequence = reader.getSequenceDictionary().getSequences().get(0);
|
||||||
|
initialBases = reader.getSubsequenceAt(firstSequence.getSequenceName(),1,100);
|
||||||
|
}
|
||||||
|
catch(IOException ex) {
|
||||||
|
throw new StingException("Unable to load initial bases from reference sequence");
|
||||||
|
}
|
||||||
|
|
||||||
|
aligner = new BWACAligner(initialBases.getBases(),new BWAConfiguration());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Integer map(char[] ref, SAMRecord read) {
|
||||||
|
Alignment bestAlignment = aligner.getBestAlignment(read.getReadBases());
|
||||||
|
System.out.println("bestAlignment = " + bestAlignment);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provide an initial value for reduce computations.
|
||||||
|
* @return Initial value of reduce.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Long reduceInit() {
|
||||||
|
return 0L;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reduces a single map with the accumulator provided as the ReduceType.
|
||||||
|
* @param value result of the map.
|
||||||
|
* @param accum accumulator for the reduce.
|
||||||
|
* @return accumulator with result of the map taken into account.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Long reduce(Integer value, Long accum) {
|
||||||
|
return value + accum;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onTraversalDone(Long result) {
|
||||||
|
aligner.close();
|
||||||
|
super.onTraversalDone(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue