2009-09-18 07:28:47 +08:00
package org.broadinstitute.sting.alignment.bwa ;
2009-09-15 05:54:56 +08:00
2009-09-18 07:28:47 +08:00
import org.broadinstitute.sting.alignment.bwa.bwt.* ;
2009-09-23 03:05:10 +08:00
import org.broadinstitute.sting.alignment.Aligner ;
import org.broadinstitute.sting.alignment.Alignment ;
2009-09-24 07:44:59 +08:00
import org.broadinstitute.sting.utils.StingException ;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile ;
2009-09-15 05:54:56 +08:00
import java.io.File ;
import java.io.FileNotFoundException ;
2009-09-23 03:05:10 +08:00
import java.util.List ;
2009-09-15 05:54:56 +08:00
2009-09-23 03:05:10 +08:00
import net.sf.samtools.SAMRecord ;
import net.sf.samtools.SAMFileReader ;
2009-09-15 05:54:56 +08:00
/ * *
* A test harness to ensure that the perfect aligner works .
*
* @author mhanna
* @version 0.1
* /
2009-09-23 03:05:10 +08:00
public class AlignerTestHarness {
2009-09-15 05:54:56 +08:00
public static void main ( String argv [ ] ) throws FileNotFoundException {
2009-09-24 07:44:59 +08:00
if ( argv . length ! = 5 ) {
System . out . println ( "PerfectAlignerTestHarness <fasta> <bwt> <rbwt> <sa> <bam>" ) ;
2009-09-15 05:54:56 +08:00
System . exit ( 1 ) ;
}
File referenceFile = new File ( argv [ 0 ] ) ;
File bwtFile = new File ( argv [ 1 ] ) ;
2009-09-23 03:05:10 +08:00
File rbwtFile = new File ( argv [ 2 ] ) ;
2009-09-24 07:44:59 +08:00
File reverseSuffixArrayFile = new File ( argv [ 3 ] ) ;
File bamFile = new File ( argv [ 4 ] ) ;
2009-09-23 03:05:10 +08:00
2009-09-24 07:44:59 +08:00
align ( referenceFile , bwtFile , rbwtFile , reverseSuffixArrayFile , bamFile ) ;
2009-09-23 03:05:10 +08:00
}
2009-09-15 05:54:56 +08:00
2009-09-24 07:44:59 +08:00
private static void align ( File referenceFile , File bwtFile , File rbwtFile , File reverseSuffixArrayFile , File bamFile ) throws FileNotFoundException {
2009-09-23 03:05:10 +08:00
BWT bwt = new BWTReader ( bwtFile ) . read ( ) ;
2009-09-24 07:44:59 +08:00
Aligner aligner = new BWAAligner ( bwtFile , rbwtFile , reverseSuffixArrayFile ) ;
2009-09-23 03:05:10 +08:00
int count = 0 ;
SAMFileReader reader = new SAMFileReader ( bamFile ) ;
reader . setValidationStringency ( SAMFileReader . ValidationStringency . SILENT ) ;
for ( SAMRecord read : reader ) {
count + + ;
2009-09-25 05:41:30 +08:00
//if( count > 39 ) break;
//if( count != 39 ) continue;
2009-09-24 07:44:59 +08:00
//if( !read.getReadName().endsWith("1507:1636#0") )
// continue;
List < Alignment > alignments = aligner . align ( read ) ;
if ( alignments . size ( ) = = 0 )
2009-09-25 05:03:02 +08:00
throw new StingException ( String . format ( "Unable to align read %s to reference; count = %d" , read . getReadName ( ) , count ) ) ;
2009-09-24 07:44:59 +08:00
Alignment alignment = alignments . get ( 0 ) ;
2009-09-25 05:03:02 +08:00
System . out . printf ( "%s: Aligned read to reference at position %d with %d mismatches, %d gap opens, and %d gap extensions.%n" , read . getReadName ( ) , alignment . getAlignmentStart ( ) , alignment . getMismatches ( ) , alignment . getGapOpens ( ) , alignment . getGapExtensions ( ) ) ;
2009-09-24 07:44:59 +08:00
if ( read . getAlignmentStart ( ) ! = alignment . getAlignmentStart ( ) ) {
IndexedFastaSequenceFile reference = new IndexedFastaSequenceFile ( referenceFile ) ;
String expectedRef = new String ( reference . getSubsequenceAt ( reference . getSequenceDictionary ( ) . getSequences ( ) . get ( 0 ) . getSequenceName ( ) , read . getAlignmentStart ( ) , read . getAlignmentStart ( ) + read . getReadLength ( ) - 1 ) . getBases ( ) ) ;
int expectedMismatches = 0 ;
for ( int i = 0 ; i < read . getReadLength ( ) ; i + + ) {
if ( read . getReadBases ( ) [ i ] ! = expectedRef . charAt ( i ) )
expectedMismatches + + ;
}
String alignedRef = new String ( reference . getSubsequenceAt ( reference . getSequenceDictionary ( ) . getSequences ( ) . get ( 0 ) . getSequenceName ( ) , alignments . get ( 0 ) . getAlignmentStart ( ) , alignments . get ( 0 ) . getAlignmentStart ( ) + read . getReadLength ( ) - 1 ) . getBases ( ) ) ;
int actualMismatches = 0 ;
for ( int i = 0 ; i < read . getReadLength ( ) ; i + + ) {
if ( read . getReadBases ( ) [ i ] ! = expectedRef . charAt ( i ) )
actualMismatches + + ;
}
if ( expectedMismatches ! = actualMismatches ) {
System . out . printf ( "read = %s%n" , read . getReadString ( ) ) ;
System . out . printf ( "expected ref = %s%n" , expectedRef ) ;
System . out . printf ( "actual ref = %s%n" , alignedRef ) ;
throw new StingException ( String . format ( "Read %s was placed at incorrect location; target alignment = %d; actual alignment = %d%n" , read . getReadName ( ) , read . getAlignmentStart ( ) , alignment . getAlignmentStart ( ) ) ) ;
}
2009-09-15 05:54:56 +08:00
}
2009-09-24 07:44:59 +08:00
if ( count % 1000 = = 0 )
System . out . printf ( "%d reads examined.%n" , count ) ;
2009-09-15 05:54:56 +08:00
}
2009-09-23 03:05:10 +08:00
2009-09-24 07:44:59 +08:00
System . out . printf ( "%d reads examined.%n" , count ) ;
}
2009-09-23 03:05:10 +08:00
2009-09-15 05:54:56 +08:00
}