From a4b69d0adfcfa128416f8e768d888e7b4fc25660 Mon Sep 17 00:00:00 2001 From: hanna Date: Tue, 5 Jan 2010 14:48:19 +0000 Subject: [PATCH] Misc bug fixes. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2501 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/alignment/bwa/BWAAligner.java | 7 -- .../sting/alignment/bwa/BWTFiles.java | 14 +++- .../sting/alignment/bwa/c/BWACAligner.java | 9 +- .../alignment/bwa/java/BWAJavaAligner.java | 2 +- .../alignment/reference/bwt/SuffixArray.java | 2 +- .../walkers/TestReadFishingWalker.java | 82 +++++++++++++++++-- 6 files changed, 94 insertions(+), 22 deletions(-) diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java b/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java index 0fe97b55d..ddbf784f5 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/BWAAligner.java @@ -30,13 +30,6 @@ public abstract class BWAAligner implements Aligner { this.configuration = configuration; } - /** - * Close the existing aligner, freeing resources it consumed. - */ - public void close() { - bwtFiles.close(); - } - /** * Update the configuration passed to the BWA aligner. * @param configuration New configuration to set. diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/BWTFiles.java b/java/src/org/broadinstitute/sting/alignment/bwa/BWTFiles.java index 735d94e33..ac1f046bd 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/BWTFiles.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/BWTFiles.java @@ -39,6 +39,11 @@ public class BWTFiles { */ public final File pacFile; + /** + * Reverse of packed reference sequence file. + */ + public final File rpacFile; + /** * Forward BWT file. */ @@ -62,7 +67,7 @@ public class BWTFiles { /** * Where these files autogenerated on the fly? */ - private final boolean autogenerated; + public final boolean autogenerated; /** * Create a new BWA configuration file using the given prefix. @@ -74,6 +79,7 @@ public class BWTFiles { annFile = new File(prefix + ".ann"); ambFile = new File(prefix + ".amb"); pacFile = new File(prefix + ".pac"); + rpacFile = new File(prefix + ".rpac"); forwardBWTFile = new File(prefix + ".bwt"); forwardSAFile = new File(prefix + ".sa"); reverseBWTFile = new File(prefix + ".rbwt"); @@ -88,6 +94,7 @@ public class BWTFiles { * @param pacFile Packed representation of the forward reference sequence. * @param forwardBWTFile BWT representation of the forward reference sequence. * @param forwardSAFile SA representation of the forward reference sequence. + * @param rpacFile Packed representation of the reversed reference sequence. * @param reverseBWTFile BWT representation of the reversed reference sequence. * @param reverseSAFile SA representation of the reversed reference sequence. */ @@ -96,6 +103,7 @@ public class BWTFiles { File pacFile, File forwardBWTFile, File forwardSAFile, + File rpacFile, File reverseBWTFile, File reverseSAFile) { this.annFile = annFile; @@ -103,6 +111,7 @@ public class BWTFiles { this.pacFile = pacFile; this.forwardBWTFile = forwardBWTFile; this.forwardSAFile = forwardSAFile; + this.rpacFile = rpacFile; this.reverseBWTFile = reverseBWTFile; this.reverseSAFile = reverseSAFile; autogenerated = true; @@ -120,6 +129,7 @@ public class BWTFiles { success &= pacFile.delete(); success &= forwardBWTFile.delete(); success &= forwardSAFile.delete(); + success &= rpacFile.delete(); success &= reverseBWTFile.delete(); success &= reverseSAFile.delete(); @@ -185,7 +195,7 @@ public class BWTFiles { rbwtFile.deleteOnExit(); rsaFile.deleteOnExit(); - return new BWTFiles(annFile,ambFile,pacFile,bwtFile,saFile,rbwtFile,rsaFile); + return new BWTFiles(annFile,ambFile,pacFile,bwtFile,saFile,rpacFile,rbwtFile,rsaFile); } /** diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java b/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java index 69ca2bfcd..40478af7e 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/c/BWACAligner.java @@ -48,7 +48,9 @@ public class BWACAligner extends BWAAligner { * @param configuration Configuration for the given aligner. */ public BWACAligner(byte[] referenceSequence, BWAConfiguration configuration) { - this(BWTFiles.createFromReferenceSequence(referenceSequence),configuration); + this(BWTFiles.createFromReferenceSequence(referenceSequence),configuration); + // Now that the temporary files are created, the temporary files can be destroyed. + bwtFiles.close(); } /** @@ -70,7 +72,6 @@ public class BWACAligner extends BWAAligner { if(thunkPointer == 0) throw new StingException("BWA/C close attempted, but BWA/C is not properly initialized."); destroy(thunkPointer); - super.close(); } /** @@ -93,6 +94,8 @@ public class BWACAligner extends BWAAligner { */ @Override public SAMRecord align(final SAMRecord read, final SAMFileHeader newHeader) { + if(bwtFiles.autogenerated) + throw new UnsupportedOperationException("Cannot create target alignment; source contig was generated ad-hoc and is not reliable"); return Alignment.convertToRead(getBestAlignment(read.getReadBases()),read,newHeader); } @@ -148,6 +151,8 @@ public class BWACAligner extends BWAAligner { */ @Override public Iterable alignAll(final SAMRecord read, final SAMFileHeader newHeader) { + if(bwtFiles.autogenerated) + throw new UnsupportedOperationException("Cannot create target alignment; source contig was generated ad-hoc and is not reliable"); final Iterable alignments = getAllAlignments(read.getReadBases()); return new Iterable() { public Iterator iterator() { diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java b/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java index 15aa7f49f..95544fb9e 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java @@ -87,7 +87,7 @@ public class BWAJavaAligner extends BWAAligner { */ @Override public void close() { - super.close(); + throw new UnsupportedOperationException("BWA aligner can't currently be closed."); } /** diff --git a/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArray.java b/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArray.java index c4968cff5..69ba44bcb 100644 --- a/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArray.java +++ b/java/src/org/broadinstitute/sting/alignment/reference/bwt/SuffixArray.java @@ -113,7 +113,7 @@ public class SuffixArray { // Find the first element in the inverse suffix array. long inverseSA0 = -1; - for(i = 0; i < sequence.length; i++) { + for(i = 0; i < suffixArray.length; i++) { if(suffixArray[i] == 0) inverseSA0 = i; } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestReadFishingWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestReadFishingWalker.java index 14bc47d0d..dfd35e262 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestReadFishingWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestReadFishingWalker.java @@ -5,13 +5,23 @@ import org.broadinstitute.sting.alignment.bwa.BWAAligner; import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; import org.broadinstitute.sting.alignment.Alignment; -import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMSequenceRecord; +import net.sf.samtools.util.StringUtil; import net.sf.picard.reference.ReferenceSequence; +import java.io.File; +import java.io.FileInputStream; import java.io.IOException; +import java.util.Scanner; +import java.util.SortedSet; +import java.util.TreeMap; +import java.util.SortedMap; /** * A walker to experiment with fishing for reads in the GATK. Has very limited utility in its current state. @@ -25,20 +35,74 @@ public class TestReadFishingWalker extends ReadWalker { */ private BWAAligner aligner; - @Override - public void initialize() { - ReferenceSequence initialBases; + @Argument(fullName="indel_calls",shortName="ic",doc="Indel calls to use to derive custom references",required=true) + private File indelCalls; + @Argument(fullName="buffer_width",shortName="bw",doc="How much reference to extract around the given event",required=false) + private int bufferWidth = 36; + + private SortedMap aligners = new TreeMap(); + + @Override + public void initialize() { + long startTime = System.currentTimeMillis(); + int numAlignersCreated = 0; + + IndexedFastaSequenceFile referenceReader; + FileInputStream indelCallInputStream; try { - IndexedFastaSequenceFile reader = new IndexedFastaSequenceFile(getToolkit().getArguments().referenceFile); - SAMSequenceRecord firstSequence = reader.getSequenceDictionary().getSequences().get(0); - initialBases = reader.getSubsequenceAt(firstSequence.getSequenceName(),1,100); + referenceReader = new IndexedFastaSequenceFile(getToolkit().getArguments().referenceFile); + indelCallInputStream = new FileInputStream(indelCalls); } catch(IOException ex) { - throw new StingException("Unable to load initial bases from reference sequence"); + throw new StingException("Unable to load indel calls."); } - aligner = new BWACAligner(initialBases.getBases(),new BWAConfiguration()); + Scanner indelCallReader = new Scanner(indelCallInputStream); + + while(indelCallReader.hasNext()) { + String contig = indelCallReader.next(); + int eventPos = indelCallReader.nextInt(); + int eventLength = indelCallReader.nextInt(); + char type = indelCallReader.next().toUpperCase().charAt(0); + byte[] bases = StringUtil.stringToBytes(indelCallReader.next()); + String sample = indelCallReader.next(); + + byte[] revisedReference; + int start,stop; + + if(type == 'D') { + start = eventPos-eventLength-bufferWidth; + stop = eventPos+eventLength+bufferWidth; + int eventStart = eventPos - start + 1; + int eventStop = eventStart + eventLength - 1; + + ReferenceSequence referenceSequence = referenceReader.getSubsequenceAt(contig,start,stop); + revisedReference = new byte[(stop-start+1) - eventLength]; + + System.arraycopy(referenceSequence.getBases(),0,revisedReference,0,eventStart); + System.arraycopy(referenceSequence.getBases(),eventStop+1,revisedReference,eventStart,stop-start-eventStop); + + } + else if(type == 'I') { + start = eventPos-bufferWidth; + stop = eventPos+bufferWidth; + int eventStart = eventPos - start + 1; + + ReferenceSequence referenceSequence = referenceReader.getSubsequenceAt(contig,start,stop); + revisedReference = new byte[(stop-start+1) + eventLength]; + + System.arraycopy(referenceSequence.getBases(),0,revisedReference,0,bufferWidth+1); + System.arraycopy(bases,0,revisedReference,eventStart,eventLength); + System.arraycopy(referenceSequence.getBases(),eventStart,revisedReference,eventStart+eventLength,bufferWidth); + } + else + throw new StingException("Invalid indel type: " + type); + + aligners.put(GenomeLocParser.createGenomeLoc(contig,start,stop),new BWACAligner(revisedReference,new BWAConfiguration())); + if(++numAlignersCreated % 100 == 0) + out.printf("Created %d aligners in %dms%n",++numAlignersCreated,System.currentTimeMillis()-startTime); + } } @Override