Added very primitive read fishing walker with lots of hard coding. Fixed
bugs encountered when testing read fishing in Ecoli. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2496 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
7b702b086f
commit
29c129aced
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.alignment.bwa;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.alignment.reference.packing.PackUtils;
|
||||
import org.broadinstitute.sting.alignment.reference.bwt.BWT;
|
||||
import org.broadinstitute.sting.alignment.reference.bwt.BWTWriter;
|
||||
|
|
@ -14,6 +15,7 @@ import java.io.IOException;
|
|||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
|
||||
/**
|
||||
* Support files for BWT.
|
||||
|
|
@ -132,14 +134,18 @@ public class BWTFiles {
|
|||
* @return A new object representing encoded representations of each sequence.
|
||||
*/
|
||||
public static BWTFiles createFromReferenceSequence(byte[] referenceSequence) {
|
||||
byte[] normalizedReferenceSequence = new byte[referenceSequence.length];
|
||||
System.arraycopy(referenceSequence,0,normalizedReferenceSequence,0,referenceSequence.length);
|
||||
normalizeReferenceSequence(normalizedReferenceSequence);
|
||||
|
||||
File annFile,ambFile,pacFile,bwtFile,saFile,rpacFile,rbwtFile,rsaFile;
|
||||
try {
|
||||
// Write the ann and amb for this reference sequence.
|
||||
annFile = File.createTempFile("bwt","ann");
|
||||
ambFile = File.createTempFile("bwt","amb");
|
||||
annFile = File.createTempFile("bwt",".ann");
|
||||
ambFile = File.createTempFile("bwt",".amb");
|
||||
|
||||
SAMSequenceDictionary dictionary = new SAMSequenceDictionary();
|
||||
dictionary.addSequence(new SAMSequenceRecord("autogenerated",referenceSequence.length));
|
||||
dictionary.addSequence(new SAMSequenceRecord("autogenerated",normalizedReferenceSequence.length));
|
||||
|
||||
ANNWriter annWriter = new ANNWriter(annFile);
|
||||
annWriter.write(dictionary);
|
||||
|
|
@ -150,19 +156,18 @@ public class BWTFiles {
|
|||
ambWriter.close();
|
||||
|
||||
// Write the encoded files for the forward version of this reference sequence.
|
||||
pacFile = File.createTempFile("bwt","pac");
|
||||
bwtFile = File.createTempFile("bwt","bwt");
|
||||
saFile = File.createTempFile("bwt","sa");
|
||||
pacFile = File.createTempFile("bwt",".pac");
|
||||
bwtFile = File.createTempFile("bwt",".bwt");
|
||||
saFile = File.createTempFile("bwt",".sa");
|
||||
|
||||
writeEncodedReferenceSequence(referenceSequence,pacFile,bwtFile,saFile);
|
||||
writeEncodedReferenceSequence(normalizedReferenceSequence,pacFile,bwtFile,saFile);
|
||||
|
||||
// Write the encoded files for the reverse version of this reference sequence.
|
||||
byte[] reverseReferenceSequence = new byte[referenceSequence.length];
|
||||
System.arraycopy(referenceSequence,0,reverseReferenceSequence,0,referenceSequence.length);
|
||||
byte[] reverseReferenceSequence = BaseUtils.reverse(normalizedReferenceSequence);
|
||||
|
||||
rpacFile = File.createTempFile("bwt","rpac");
|
||||
rbwtFile = File.createTempFile("bwt","rbwt");
|
||||
rsaFile = File.createTempFile("bwt","rsa");
|
||||
rpacFile = File.createTempFile("bwt",".rpac");
|
||||
rbwtFile = File.createTempFile("bwt",".rbwt");
|
||||
rsaFile = File.createTempFile("bwt",".rsa");
|
||||
|
||||
writeEncodedReferenceSequence(reverseReferenceSequence,rpacFile,rbwtFile,rsaFile);
|
||||
}
|
||||
|
|
@ -207,5 +212,19 @@ public class BWTFiles {
|
|||
SuffixArrayWriter suffixArrayWriter = new SuffixArrayWriter(suffixArrayFile);
|
||||
suffixArrayWriter.write(suffixArray);
|
||||
suffixArrayWriter.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert the given reference sequence into a form suitable for building into
|
||||
* on-the-fly sequences.
|
||||
* @param referenceSequence The reference sequence to normalize.
|
||||
* @throws StingException if normalized sequence cannot be generated.
|
||||
*/
|
||||
private static void normalizeReferenceSequence(byte[] referenceSequence) {
|
||||
StringUtil.toUpperCase(referenceSequence);
|
||||
for(byte base: referenceSequence) {
|
||||
if(base != 'A' && base != 'C' && base != 'G' && base != 'T')
|
||||
throw new StingException(String.format("Base type %c is not supported when building references on-the-fly",(char)base));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ public class SuffixArrayWriter {
|
|||
uintPackedOutputStream.write(1);
|
||||
// Length of the suffix array.
|
||||
uintPackedOutputStream.write(suffixArray.length()-1);
|
||||
uintPackedOutputStream.write(suffixArray.sequence,1,suffixArray.sequence.length);
|
||||
uintPackedOutputStream.write(suffixArray.sequence,1,suffixArray.sequence.length-1);
|
||||
}
|
||||
catch( IOException ex ) {
|
||||
throw new StingException("Unable to read BWT from input stream.", ex);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,78 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.alignment.bwa.BWAAligner;
|
||||
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
|
||||
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
|
||||
import org.broadinstitute.sting.alignment.Alignment;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import net.sf.picard.reference.ReferenceSequence;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* A walker to experiment with fishing for reads in the GATK. Has very limited utility in its current state.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
public class TestReadFishingWalker extends ReadWalker<Integer,Long> {
|
||||
/**
|
||||
* An aligner for the small custom reference.
|
||||
*/
|
||||
private BWAAligner aligner;
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
ReferenceSequence initialBases;
|
||||
|
||||
try {
|
||||
IndexedFastaSequenceFile reader = new IndexedFastaSequenceFile(getToolkit().getArguments().referenceFile);
|
||||
SAMSequenceRecord firstSequence = reader.getSequenceDictionary().getSequences().get(0);
|
||||
initialBases = reader.getSubsequenceAt(firstSequence.getSequenceName(),1,100);
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new StingException("Unable to load initial bases from reference sequence");
|
||||
}
|
||||
|
||||
aligner = new BWACAligner(initialBases.getBases(),new BWAConfiguration());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer map(char[] ref, SAMRecord read) {
|
||||
Alignment bestAlignment = aligner.getBestAlignment(read.getReadBases());
|
||||
System.out.println("bestAlignment = " + bestAlignment);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Provide an initial value for reduce computations.
|
||||
* @return Initial value of reduce.
|
||||
*/
|
||||
@Override
|
||||
public Long reduceInit() {
|
||||
return 0L;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reduces a single map with the accumulator provided as the ReduceType.
|
||||
* @param value result of the map.
|
||||
* @param accum accumulator for the reduce.
|
||||
* @return accumulator with result of the map taken into account.
|
||||
*/
|
||||
@Override
|
||||
public Long reduce(Integer value, Long accum) {
|
||||
return value + accum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTraversalDone(Long result) {
|
||||
aligner.close();
|
||||
super.onTraversalDone(result);
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue