2009-09-18 07:28:47 +08:00
package org.broadinstitute.sting.alignment.bwa ;
2009-09-15 05:54:56 +08:00
2009-09-23 03:05:10 +08:00
import org.broadinstitute.sting.alignment.Aligner ;
import org.broadinstitute.sting.alignment.Alignment ;
2009-09-24 07:44:59 +08:00
import org.broadinstitute.sting.utils.StingException ;
2009-10-01 02:10:26 +08:00
import org.broadinstitute.sting.utils.BaseUtils ;
2009-09-24 07:44:59 +08:00
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile ;
2009-09-15 05:54:56 +08:00
import java.io.File ;
import java.io.FileNotFoundException ;
2009-09-23 03:05:10 +08:00
import java.util.List ;
2009-09-15 05:54:56 +08:00
2009-10-09 03:45:30 +08:00
import net.sf.samtools.* ;
2009-09-15 05:54:56 +08:00
/ * *
* A test harness to ensure that the perfect aligner works .
*
* @author mhanna
* @version 0.1
* /
2009-09-23 03:05:10 +08:00
public class AlignerTestHarness {
2009-09-15 05:54:56 +08:00
public static void main ( String argv [ ] ) throws FileNotFoundException {
2009-10-01 02:10:26 +08:00
if ( argv . length ! = 6 ) {
System . out . println ( "PerfectAlignerTestHarness <fasta> <bwt> <rbwt> <sa> <rsa> <bam>" ) ;
2009-09-15 05:54:56 +08:00
System . exit ( 1 ) ;
}
File referenceFile = new File ( argv [ 0 ] ) ;
File bwtFile = new File ( argv [ 1 ] ) ;
2009-09-23 03:05:10 +08:00
File rbwtFile = new File ( argv [ 2 ] ) ;
2009-10-01 02:10:26 +08:00
File suffixArrayFile = new File ( argv [ 3 ] ) ;
File reverseSuffixArrayFile = new File ( argv [ 4 ] ) ;
File bamFile = new File ( argv [ 5 ] ) ;
2009-09-23 03:05:10 +08:00
2009-10-01 02:10:26 +08:00
align ( referenceFile , bwtFile , rbwtFile , suffixArrayFile , reverseSuffixArrayFile , bamFile ) ;
2009-09-23 03:05:10 +08:00
}
2009-09-15 05:54:56 +08:00
2009-10-01 02:10:26 +08:00
private static void align ( File referenceFile , File bwtFile , File rbwtFile , File suffixArrayFile , File reverseSuffixArrayFile , File bamFile ) throws FileNotFoundException {
Aligner aligner = new BWAAligner ( bwtFile , rbwtFile , suffixArrayFile , reverseSuffixArrayFile ) ;
2009-09-23 03:05:10 +08:00
int count = 0 ;
SAMFileReader reader = new SAMFileReader ( bamFile ) ;
reader . setValidationStringency ( SAMFileReader . ValidationStringency . SILENT ) ;
2009-10-05 02:20:20 +08:00
int mismatches = 0 ;
int failures = 0 ;
2009-10-04 05:55:18 +08:00
2009-09-23 03:05:10 +08:00
for ( SAMRecord read : reader ) {
count + + ;
2009-10-13 23:50:04 +08:00
//if( count > 100000 ) break;
//if( count < 366000 ) continue;
2009-10-04 05:55:18 +08:00
//if( count != 2 ) continue;
2009-10-09 03:45:30 +08:00
//if( !read.getReadName().endsWith("SL-XBC:1:82:506:404#0") )
// continue;
//if( !read.getReadName().endsWith("SL-XBC:1:36:30:1926#0") )
2009-09-24 07:44:59 +08:00
// continue;
2009-10-01 02:10:26 +08:00
SAMRecord alignmentCleaned = null ;
try {
alignmentCleaned = ( SAMRecord ) read . clone ( ) ;
}
catch ( CloneNotSupportedException ex ) {
throw new StingException ( "SAMRecord clone not supported" , ex ) ;
}
if ( alignmentCleaned . getReadNegativeStrandFlag ( ) )
alignmentCleaned . setReadBases ( BaseUtils . simpleReverseComplement ( alignmentCleaned . getReadBases ( ) ) ) ;
alignmentCleaned . setReferenceIndex ( SAMRecord . NO_ALIGNMENT_REFERENCE_INDEX ) ;
alignmentCleaned . setAlignmentStart ( SAMRecord . NO_ALIGNMENT_START ) ;
alignmentCleaned . setMappingQuality ( SAMRecord . NO_MAPPING_QUALITY ) ;
alignmentCleaned . setCigarString ( SAMRecord . NO_ALIGNMENT_CIGAR ) ;
// Clear everything except flags pertaining to pairing and set 'unmapped' status to true.
alignmentCleaned . setFlags ( alignmentCleaned . getFlags ( ) & 0x00A1 | 0x000C ) ;
List < Alignment > alignments = aligner . align ( alignmentCleaned ) ;
2009-10-05 02:20:20 +08:00
if ( alignments . size ( ) = = 0 ) {
//throw new StingException(String.format("Unable to align read %s to reference; count = %d",read.getReadName(),count));
System . out . printf ( "Unable to align read %s to reference; count = %d%n" , read . getReadName ( ) , count ) ;
failures + + ;
}
2009-09-24 07:44:59 +08:00
2009-10-05 02:20:20 +08:00
Alignment foundAlignment = null ;
for ( Alignment alignment : alignments ) {
if ( read . getReadNegativeStrandFlag ( ) ! = alignment . isNegativeStrand ( ) )
continue ;
if ( read . getAlignmentStart ( ) ! = alignment . getAlignmentStart ( ) )
continue ;
2009-09-25 05:03:02 +08:00
2009-10-05 02:20:20 +08:00
foundAlignment = alignment ;
}
2009-09-25 05:03:02 +08:00
2009-10-05 02:20:20 +08:00
if ( foundAlignment ! = null ) {
//System.out.printf("%s: Aligned read to reference at position %d with %d mismatches, %d gap opens, and %d gap extensions.%n", read.getReadName(), foundAlignment.getAlignmentStart(), foundAlignment.getMismatches(), foundAlignment.getGapOpens(), foundAlignment.getGapExtensions());
2009-10-04 05:55:18 +08:00
}
2009-10-05 02:20:20 +08:00
else {
2009-10-13 23:50:04 +08:00
System . out . printf ( "Error aligning read %s%n" , read . getReadName ( ) ) ;
2009-10-05 02:20:20 +08:00
mismatches + + ;
2009-10-01 02:10:26 +08:00
2009-09-24 07:44:59 +08:00
IndexedFastaSequenceFile reference = new IndexedFastaSequenceFile ( referenceFile ) ;
2009-10-09 03:45:30 +08:00
System . out . printf ( "read = %s, position = %d, negative strand = %b%n" , formatBasesBasedOnCigar ( read . getReadString ( ) , read . getCigar ( ) , CigarOperator . DELETION ) ,
read . getAlignmentStart ( ) ,
read . getReadNegativeStrandFlag ( ) ) ;
int numDeletions = numDeletionsInCigar ( read . getCigar ( ) ) ;
String expectedRef = new String ( reference . getSubsequenceAt ( reference . getSequenceDictionary ( ) . getSequences ( ) . get ( 0 ) . getSequenceName ( ) , read . getAlignmentStart ( ) , read . getAlignmentStart ( ) + read . getReadLength ( ) + numDeletions - 1 ) . getBases ( ) ) ;
System . out . printf ( "expected ref = %s%n" , formatBasesBasedOnCigar ( expectedRef , read . getCigar ( ) , CigarOperator . INSERTION ) ) ;
2009-10-05 02:20:20 +08:00
for ( Alignment alignment : alignments ) {
2009-10-09 03:45:30 +08:00
System . out . println ( ) ;
Cigar cigar = ( ( BWAAlignment ) alignment ) . getCigar ( ) ;
System . out . printf ( "read = %s%n" , formatBasesBasedOnCigar ( read . getReadString ( ) , cigar , CigarOperator . DELETION ) ) ;
int deletionCount = ( ( BWAAlignment ) alignment ) . getNumberOfBasesMatchingState ( AlignmentState . DELETION ) ;
String alignedRef = new String ( reference . getSubsequenceAt ( reference . getSequenceDictionary ( ) . getSequences ( ) . get ( 0 ) . getSequenceName ( ) , alignment . getAlignmentStart ( ) , alignment . getAlignmentStart ( ) + read . getReadLength ( ) + deletionCount - 1 ) . getBases ( ) ) ;
System . out . printf ( "actual ref = %s, position = %d, negative strand = %b%n" , formatBasesBasedOnCigar ( alignedRef , cigar , CigarOperator . INSERTION ) ,
alignment . getAlignmentStart ( ) ,
alignment . isNegativeStrand ( ) ) ;
2009-09-24 07:44:59 +08:00
}
2009-10-09 03:45:30 +08:00
//throw new StingException(String.format("Read %s was placed at incorrect location; count = %d%n",read.getReadName(),count));
2009-09-15 05:54:56 +08:00
}
2009-10-05 02:20:20 +08:00
2009-09-24 07:44:59 +08:00
if ( count % 1000 = = 0 )
System . out . printf ( "%d reads examined.%n" , count ) ;
2009-09-15 05:54:56 +08:00
}
2009-09-23 03:05:10 +08:00
2009-10-05 02:20:20 +08:00
System . out . printf ( "%d reads examined; %d mismatches; %d failures.%n" , count , mismatches , failures ) ;
2009-09-24 07:44:59 +08:00
}
2009-09-23 03:05:10 +08:00
2009-10-09 03:45:30 +08:00
private static String formatBasesBasedOnCigar ( String bases , Cigar cigar , CigarOperator toBlank ) {
StringBuilder formatted = new StringBuilder ( ) ;
int readIndex = 0 ;
for ( CigarElement cigarElement : cigar . getCigarElements ( ) ) {
if ( cigarElement . getOperator ( ) = = toBlank ) {
int number = cigarElement . getLength ( ) ;
while ( number - - > 0 ) formatted . append ( ' ' ) ;
}
else {
int number = cigarElement . getLength ( ) ;
while ( number - - > 0 ) formatted . append ( bases . charAt ( readIndex + + ) ) ;
}
}
return formatted . toString ( ) ;
}
private static int numDeletionsInCigar ( Cigar cigar ) {
int numDeletions = 0 ;
for ( CigarElement cigarElement : cigar . getCigarElements ( ) ) {
if ( cigarElement . getOperator ( ) = = CigarOperator . DELETION )
numDeletions + = cigarElement . getLength ( ) ;
}
return numDeletions ;
}
2009-09-15 05:54:56 +08:00
}