Support negative strand alignments.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1748 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d3b1732cca
commit
665951f9f0
|
|
@ -4,6 +4,7 @@ import org.broadinstitute.sting.alignment.bwa.bwt.*;
|
||||||
import org.broadinstitute.sting.alignment.Aligner;
|
import org.broadinstitute.sting.alignment.Aligner;
|
||||||
import org.broadinstitute.sting.alignment.Alignment;
|
import org.broadinstitute.sting.alignment.Alignment;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
@ -21,24 +22,23 @@ import net.sf.samtools.SAMFileReader;
|
||||||
*/
|
*/
|
||||||
public class AlignerTestHarness {
|
public class AlignerTestHarness {
|
||||||
public static void main( String argv[] ) throws FileNotFoundException {
|
public static void main( String argv[] ) throws FileNotFoundException {
|
||||||
if( argv.length != 5 ) {
|
if( argv.length != 6 ) {
|
||||||
System.out.println("PerfectAlignerTestHarness <fasta> <bwt> <rbwt> <sa> <bam>");
|
System.out.println("PerfectAlignerTestHarness <fasta> <bwt> <rbwt> <sa> <rsa> <bam>");
|
||||||
System.exit(1);
|
System.exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
File referenceFile = new File(argv[0]);
|
File referenceFile = new File(argv[0]);
|
||||||
File bwtFile = new File(argv[1]);
|
File bwtFile = new File(argv[1]);
|
||||||
File rbwtFile = new File(argv[2]);
|
File rbwtFile = new File(argv[2]);
|
||||||
File reverseSuffixArrayFile = new File(argv[3]);
|
File suffixArrayFile = new File(argv[3]);
|
||||||
File bamFile = new File(argv[4]);
|
File reverseSuffixArrayFile = new File(argv[4]);
|
||||||
|
File bamFile = new File(argv[5]);
|
||||||
|
|
||||||
align(referenceFile,bwtFile,rbwtFile,reverseSuffixArrayFile,bamFile);
|
align(referenceFile,bwtFile,rbwtFile,suffixArrayFile,reverseSuffixArrayFile,bamFile);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void align(File referenceFile, File bwtFile, File rbwtFile, File reverseSuffixArrayFile, File bamFile) throws FileNotFoundException {
|
private static void align(File referenceFile, File bwtFile, File rbwtFile, File suffixArrayFile, File reverseSuffixArrayFile, File bamFile) throws FileNotFoundException {
|
||||||
BWT bwt = new BWTReader(bwtFile).read();
|
Aligner aligner = new BWAAligner(bwtFile,rbwtFile,suffixArrayFile,reverseSuffixArrayFile);
|
||||||
|
|
||||||
Aligner aligner = new BWAAligner(bwtFile,rbwtFile,reverseSuffixArrayFile);
|
|
||||||
int count = 0;
|
int count = 0;
|
||||||
|
|
||||||
SAMFileReader reader = new SAMFileReader(bamFile);
|
SAMFileReader reader = new SAMFileReader(bamFile);
|
||||||
|
|
@ -46,12 +46,31 @@ public class AlignerTestHarness {
|
||||||
|
|
||||||
for(SAMRecord read: reader) {
|
for(SAMRecord read: reader) {
|
||||||
count++;
|
count++;
|
||||||
//if( count > 39 ) break;
|
//if( count > 25000 ) break;
|
||||||
//if( count != 39 ) continue;
|
//if( count != 39 ) continue;
|
||||||
//if( !read.getReadName().endsWith("1507:1636#0") )
|
//if( !read.getReadName().endsWith("1507:1636#0") )
|
||||||
// continue;
|
// continue;
|
||||||
|
|
||||||
List<Alignment> alignments = aligner.align(read);
|
SAMRecord alignmentCleaned = null;
|
||||||
|
try {
|
||||||
|
alignmentCleaned = (SAMRecord)read.clone();
|
||||||
|
}
|
||||||
|
catch( CloneNotSupportedException ex ) {
|
||||||
|
throw new StingException("SAMRecord clone not supported", ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( alignmentCleaned.getReadNegativeStrandFlag() )
|
||||||
|
alignmentCleaned.setReadBases(BaseUtils.simpleReverseComplement(alignmentCleaned.getReadBases()));
|
||||||
|
|
||||||
|
alignmentCleaned.setReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
|
||||||
|
alignmentCleaned.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
|
||||||
|
alignmentCleaned.setMappingQuality(SAMRecord.NO_MAPPING_QUALITY);
|
||||||
|
alignmentCleaned.setCigarString(SAMRecord.NO_ALIGNMENT_CIGAR);
|
||||||
|
|
||||||
|
// Clear everything except flags pertaining to pairing and set 'unmapped' status to true.
|
||||||
|
alignmentCleaned.setFlags(alignmentCleaned.getFlags() & 0x00A1 | 0x000C);
|
||||||
|
|
||||||
|
List<Alignment> alignments = aligner.align(alignmentCleaned);
|
||||||
if(alignments.size() == 0 )
|
if(alignments.size() == 0 )
|
||||||
throw new StingException(String.format("Unable to align read %s to reference; count = %d",read.getReadName(),count));
|
throw new StingException(String.format("Unable to align read %s to reference; count = %d",read.getReadName(),count));
|
||||||
|
|
||||||
|
|
@ -59,6 +78,9 @@ public class AlignerTestHarness {
|
||||||
|
|
||||||
System.out.printf("%s: Aligned read to reference at position %d with %d mismatches, %d gap opens, and %d gap extensions.%n", read.getReadName(), alignment.getAlignmentStart(), alignment.getMismatches(), alignment.getGapOpens(), alignment.getGapExtensions());
|
System.out.printf("%s: Aligned read to reference at position %d with %d mismatches, %d gap opens, and %d gap extensions.%n", read.getReadName(), alignment.getAlignmentStart(), alignment.getMismatches(), alignment.getGapOpens(), alignment.getGapExtensions());
|
||||||
|
|
||||||
|
if( read.getReadNegativeStrandFlag() != alignment.isNegativeStrand() )
|
||||||
|
throw new StingException("Read has been aligned in wrong direction");
|
||||||
|
|
||||||
if( read.getAlignmentStart() != alignment.getAlignmentStart() ) {
|
if( read.getAlignmentStart() != alignment.getAlignmentStart() ) {
|
||||||
IndexedFastaSequenceFile reference = new IndexedFastaSequenceFile(referenceFile);
|
IndexedFastaSequenceFile reference = new IndexedFastaSequenceFile(referenceFile);
|
||||||
String expectedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),read.getAlignmentStart(),read.getAlignmentStart()+read.getReadLength()-1).getBases());
|
String expectedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),read.getAlignmentStart(),read.getAlignmentStart()+read.getReadLength()-1).getBases());
|
||||||
|
|
@ -71,7 +93,7 @@ public class AlignerTestHarness {
|
||||||
String alignedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),alignments.get(0).getAlignmentStart(),alignments.get(0).getAlignmentStart()+read.getReadLength()-1).getBases());
|
String alignedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),alignments.get(0).getAlignmentStart(),alignments.get(0).getAlignmentStart()+read.getReadLength()-1).getBases());
|
||||||
int actualMismatches = 0;
|
int actualMismatches = 0;
|
||||||
for( int i = 0; i < read.getReadLength(); i++ ) {
|
for( int i = 0; i < read.getReadLength(); i++ ) {
|
||||||
if( read.getReadBases()[i] != expectedRef.charAt(i) )
|
if( read.getReadBases()[i] != alignedRef.charAt(i) )
|
||||||
actualMismatches++;
|
actualMismatches++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,11 @@ public class BWAAligner implements Aligner {
|
||||||
/**
|
/**
|
||||||
* Suffix array in the forward direction.
|
* Suffix array in the forward direction.
|
||||||
*/
|
*/
|
||||||
|
private SuffixArray forwardSuffixArray;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Suffix array in the reverse direction.
|
||||||
|
*/
|
||||||
private SuffixArray reverseSuffixArray;
|
private SuffixArray reverseSuffixArray;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -62,41 +67,47 @@ public class BWAAligner implements Aligner {
|
||||||
*/
|
*/
|
||||||
public final int GAP_EXTENSION_PENALTY = 4;
|
public final int GAP_EXTENSION_PENALTY = 4;
|
||||||
|
|
||||||
public BWAAligner( File forwardBWTFile, File reverseBWTFile, File reverseSuffixArrayFile ) {
|
public BWAAligner( File forwardBWTFile, File reverseBWTFile, File forwardSuffixArrayFile, File reverseSuffixArrayFile ) {
|
||||||
forwardBWT = new BWTReader(forwardBWTFile).read();
|
forwardBWT = new BWTReader(forwardBWTFile).read();
|
||||||
reverseBWT = new BWTReader(reverseBWTFile).read();
|
reverseBWT = new BWTReader(reverseBWTFile).read();
|
||||||
|
forwardSuffixArray = new SuffixArrayReader(forwardSuffixArrayFile).read();
|
||||||
reverseSuffixArray = new SuffixArrayReader(reverseSuffixArrayFile).read();
|
reverseSuffixArray = new SuffixArrayReader(reverseSuffixArrayFile).read();
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<Alignment> align( SAMRecord read ) {
|
public List<Alignment> align( SAMRecord read ) {
|
||||||
byte[] forwardBases = read.getReadBases();
|
byte[] uncomplementedBases = read.getReadBases();
|
||||||
byte[] reverseBases = BaseUtils.simpleReverseComplement(forwardBases);
|
byte[] complementedBases = BaseUtils.reverse(BaseUtils.simpleReverseComplement(uncomplementedBases));
|
||||||
|
|
||||||
List<LowerBound> forwardLowerBounds = LowerBound.create(forwardBases,forwardBWT);
|
List<LowerBound> forwardLowerBounds = LowerBound.create(uncomplementedBases,forwardBWT);
|
||||||
List<LowerBound> reverseLowerBounds = LowerBound.create(reverseBases,reverseBWT);
|
List<LowerBound> reverseLowerBounds = LowerBound.create(complementedBases,reverseBWT);
|
||||||
|
|
||||||
//for( int i = 0; i < forwardLowerBounds.size(); i++ )
|
/*
|
||||||
// System.out.printf("ForwardBWT: lb[%d] = %s%n",i,forwardLowerBounds.get(i));
|
for( int i = 0; i < forwardLowerBounds.size(); i++ )
|
||||||
//for( int i = 0; i < reverseLowerBounds.size(); i++ )
|
System.out.printf("ForwardBWT: lb[%d] = %s%n",i,forwardLowerBounds.get(i));
|
||||||
// System.out.printf("ReverseBWT: lb[%d] = %s%n",i,reverseLowerBounds.get(i));
|
for( int i = 0; i < reverseLowerBounds.size(); i++ )
|
||||||
|
System.out.printf("ReverseBWT: lb[%d] = %s%n",i,reverseLowerBounds.get(i));
|
||||||
|
*/
|
||||||
|
|
||||||
PriorityQueue<BWAAlignment> alignments = new PriorityQueue<BWAAlignment>();
|
PriorityQueue<BWAAlignment> alignments = new PriorityQueue<BWAAlignment>();
|
||||||
|
|
||||||
// Create a fictional initial alignment, with the position just off the end of the read, and the limits
|
// Create a fictional initial alignment, with the position just off the end of the read, and the limits
|
||||||
// set as the entire BWT.
|
// set as the entire BWT.
|
||||||
alignments.add(createSeedAlignment(forwardBWT));
|
alignments.add(createSeedAlignment(forwardBWT));
|
||||||
//alignments.add(createSeedAlignment(reverseBWT));
|
alignments.add(createSeedAlignment(reverseBWT));
|
||||||
|
|
||||||
while(!alignments.isEmpty()) {
|
while(!alignments.isEmpty()) {
|
||||||
BWAAlignment alignment = alignments.remove();
|
BWAAlignment alignment = alignments.remove();
|
||||||
|
|
||||||
byte[] bases = alignment.negativeStrand ? forwardBases : reverseBases;
|
byte[] bases = alignment.negativeStrand ? complementedBases : uncomplementedBases;
|
||||||
BWT bwt = alignment.negativeStrand ? reverseBWT : forwardBWT;
|
BWT bwt = alignment.negativeStrand ? forwardBWT : reverseBWT;
|
||||||
List<LowerBound> lowerBounds = alignment.negativeStrand ? forwardLowerBounds : reverseLowerBounds;
|
List<LowerBound> lowerBounds = alignment.negativeStrand ? reverseLowerBounds : forwardLowerBounds;
|
||||||
|
|
||||||
// Done with this particular alignment.
|
// Done with this particular alignment.
|
||||||
if(alignment.position == read.getReadLength()-1) {
|
if(alignment.position == read.getReadLength()-1) {
|
||||||
alignment.alignmentStart = reverseBWT.length() - (reverseSuffixArray.get(alignment.loBound)+read.getReadLength()-alignment.gapOpens-alignment.gapExtensions) + 1;
|
if( !alignment.isNegativeStrand() )
|
||||||
|
alignment.alignmentStart = reverseBWT.length() - (reverseSuffixArray.get(alignment.loBound)+read.getReadLength()-alignment.gapOpens-alignment.gapExtensions) + 1;
|
||||||
|
else
|
||||||
|
alignment.alignmentStart = forwardSuffixArray.get(alignment.loBound) + 1;
|
||||||
return Collections.<Alignment>singletonList(alignment);
|
return Collections.<Alignment>singletonList(alignment);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -211,6 +222,7 @@ public class BWAAligner implements Aligner {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create new alignments representing a deletion a this point in the read.
|
* Create new alignments representing a deletion a this point in the read.
|
||||||
|
* @param bwt source BWT for inferring deletion info.
|
||||||
* @param alignment Alignment from which to derive the deletion.
|
* @param alignment Alignment from which to derive the deletion.
|
||||||
* @return New alignments reflecting all possible deletions.
|
* @return New alignments reflecting all possible deletions.
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue