2009-09-18 07:28:47 +08:00
package org.broadinstitute.sting.alignment.bwa ;
2009-09-15 05:54:56 +08:00
2009-09-23 03:05:10 +08:00
import org.broadinstitute.sting.alignment.Aligner ;
import org.broadinstitute.sting.alignment.Alignment ;
2009-09-24 07:44:59 +08:00
import org.broadinstitute.sting.utils.StingException ;
2009-10-01 02:10:26 +08:00
import org.broadinstitute.sting.utils.BaseUtils ;
2009-09-24 07:44:59 +08:00
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile ;
2009-09-15 05:54:56 +08:00
import java.io.File ;
import java.io.FileNotFoundException ;
2009-09-23 03:05:10 +08:00
import java.util.List ;
2009-09-15 05:54:56 +08:00
2009-09-23 03:05:10 +08:00
import net.sf.samtools.SAMRecord ;
import net.sf.samtools.SAMFileReader ;
2009-09-15 05:54:56 +08:00
/ * *
* A test harness to ensure that the perfect aligner works .
*
* @author mhanna
* @version 0.1
* /
2009-09-23 03:05:10 +08:00
public class AlignerTestHarness {
2009-09-15 05:54:56 +08:00
public static void main ( String argv [ ] ) throws FileNotFoundException {
2009-10-01 02:10:26 +08:00
if ( argv . length ! = 6 ) {
System . out . println ( "PerfectAlignerTestHarness <fasta> <bwt> <rbwt> <sa> <rsa> <bam>" ) ;
2009-09-15 05:54:56 +08:00
System . exit ( 1 ) ;
}
File referenceFile = new File ( argv [ 0 ] ) ;
File bwtFile = new File ( argv [ 1 ] ) ;
2009-09-23 03:05:10 +08:00
File rbwtFile = new File ( argv [ 2 ] ) ;
2009-10-01 02:10:26 +08:00
File suffixArrayFile = new File ( argv [ 3 ] ) ;
File reverseSuffixArrayFile = new File ( argv [ 4 ] ) ;
File bamFile = new File ( argv [ 5 ] ) ;
2009-09-23 03:05:10 +08:00
2009-10-01 02:10:26 +08:00
align ( referenceFile , bwtFile , rbwtFile , suffixArrayFile , reverseSuffixArrayFile , bamFile ) ;
2009-09-23 03:05:10 +08:00
}
2009-09-15 05:54:56 +08:00
2009-10-01 02:10:26 +08:00
private static void align ( File referenceFile , File bwtFile , File rbwtFile , File suffixArrayFile , File reverseSuffixArrayFile , File bamFile ) throws FileNotFoundException {
Aligner aligner = new BWAAligner ( bwtFile , rbwtFile , suffixArrayFile , reverseSuffixArrayFile ) ;
2009-09-23 03:05:10 +08:00
int count = 0 ;
SAMFileReader reader = new SAMFileReader ( bamFile ) ;
reader . setValidationStringency ( SAMFileReader . ValidationStringency . SILENT ) ;
2009-10-05 02:20:20 +08:00
int mismatches = 0 ;
int failures = 0 ;
2009-10-04 05:55:18 +08:00
2009-09-23 03:05:10 +08:00
for ( SAMRecord read : reader ) {
count + + ;
2009-10-05 02:20:20 +08:00
//if( count > 100000 ) break;
2009-10-04 05:55:18 +08:00
//if( count != 2 ) continue;
2009-10-05 02:20:20 +08:00
//if( !read.getReadName().endsWith("SL-XBC:1:90:15:1280#0") )
2009-09-24 07:44:59 +08:00
// continue;
2009-10-01 02:10:26 +08:00
SAMRecord alignmentCleaned = null ;
try {
alignmentCleaned = ( SAMRecord ) read . clone ( ) ;
}
catch ( CloneNotSupportedException ex ) {
throw new StingException ( "SAMRecord clone not supported" , ex ) ;
}
if ( alignmentCleaned . getReadNegativeStrandFlag ( ) )
alignmentCleaned . setReadBases ( BaseUtils . simpleReverseComplement ( alignmentCleaned . getReadBases ( ) ) ) ;
alignmentCleaned . setReferenceIndex ( SAMRecord . NO_ALIGNMENT_REFERENCE_INDEX ) ;
alignmentCleaned . setAlignmentStart ( SAMRecord . NO_ALIGNMENT_START ) ;
alignmentCleaned . setMappingQuality ( SAMRecord . NO_MAPPING_QUALITY ) ;
alignmentCleaned . setCigarString ( SAMRecord . NO_ALIGNMENT_CIGAR ) ;
// Clear everything except flags pertaining to pairing and set 'unmapped' status to true.
alignmentCleaned . setFlags ( alignmentCleaned . getFlags ( ) & 0x00A1 | 0x000C ) ;
List < Alignment > alignments = aligner . align ( alignmentCleaned ) ;
2009-10-05 02:20:20 +08:00
if ( alignments . size ( ) = = 0 ) {
//throw new StingException(String.format("Unable to align read %s to reference; count = %d",read.getReadName(),count));
System . out . printf ( "Unable to align read %s to reference; count = %d%n" , read . getReadName ( ) , count ) ;
failures + + ;
}
2009-09-24 07:44:59 +08:00
2009-10-05 02:20:20 +08:00
Alignment foundAlignment = null ;
for ( Alignment alignment : alignments ) {
if ( read . getReadNegativeStrandFlag ( ) ! = alignment . isNegativeStrand ( ) )
continue ;
if ( read . getAlignmentStart ( ) ! = alignment . getAlignmentStart ( ) )
continue ;
2009-09-25 05:03:02 +08:00
2009-10-05 02:20:20 +08:00
foundAlignment = alignment ;
}
2009-09-25 05:03:02 +08:00
2009-10-05 02:20:20 +08:00
if ( foundAlignment ! = null ) {
//System.out.printf("%s: Aligned read to reference at position %d with %d mismatches, %d gap opens, and %d gap extensions.%n", read.getReadName(), foundAlignment.getAlignmentStart(), foundAlignment.getMismatches(), foundAlignment.getGapOpens(), foundAlignment.getGapExtensions());
2009-10-04 05:55:18 +08:00
}
2009-10-05 02:20:20 +08:00
else {
mismatches + + ;
//throw new StingException(String.format("Read %s was placed at incorrect location; target alignment = %d; actual alignment = %d; count = %d%n",read.getReadName(),read.getAlignmentStart(),alignment.getAlignmentStart(),count));
2009-10-01 02:10:26 +08:00
2009-09-24 07:44:59 +08:00
IndexedFastaSequenceFile reference = new IndexedFastaSequenceFile ( referenceFile ) ;
String expectedRef = new String ( reference . getSubsequenceAt ( reference . getSequenceDictionary ( ) . getSequences ( ) . get ( 0 ) . getSequenceName ( ) , read . getAlignmentStart ( ) , read . getAlignmentStart ( ) + read . getReadLength ( ) - 1 ) . getBases ( ) ) ;
2009-10-05 02:20:20 +08:00
System . out . printf ( "read = %s, strand = %b%n" , read . getReadString ( ) , read . getReadNegativeStrandFlag ( ) ) ;
System . out . printf ( "expected ref = %s%n" , expectedRef ) ;
for ( Alignment alignment : alignments ) {
String alignedRef = new String ( reference . getSubsequenceAt ( reference . getSequenceDictionary ( ) . getSequences ( ) . get ( 0 ) . getSequenceName ( ) , alignments . get ( 0 ) . getAlignmentStart ( ) , alignments . get ( 0 ) . getAlignmentStart ( ) + read . getReadLength ( ) - 1 ) . getBases ( ) ) ;
System . out . printf ( "actual ref = %s, strand = %b%n" , alignedRef , read . getReadNegativeStrandFlag ( ) ) ;
//System.out.printf("(reversed) = %s, strand = %b%n", BaseUtils.simpleReverseComplement(alignedRef), !read.getReadNegativeStrandFlag());
2009-09-24 07:44:59 +08:00
}
2009-09-15 05:54:56 +08:00
}
2009-10-05 02:20:20 +08:00
2009-09-24 07:44:59 +08:00
if ( count % 1000 = = 0 )
System . out . printf ( "%d reads examined.%n" , count ) ;
2009-09-15 05:54:56 +08:00
}
2009-09-23 03:05:10 +08:00
2009-10-05 02:20:20 +08:00
System . out . printf ( "%d reads examined; %d mismatches; %d failures.%n" , count , mismatches , failures ) ;
2009-09-24 07:44:59 +08:00
}
2009-09-23 03:05:10 +08:00
2009-09-15 05:54:56 +08:00
}