163 lines
7.8 KiB
Java
163 lines
7.8 KiB
Java
package org.broadinstitute.sting.alignment.bwa;
|
|
|
|
import org.broadinstitute.sting.alignment.Aligner;
|
|
import org.broadinstitute.sting.alignment.Alignment;
|
|
import org.broadinstitute.sting.utils.StingException;
|
|
import org.broadinstitute.sting.utils.BaseUtils;
|
|
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
|
|
|
import java.io.File;
|
|
import java.io.FileNotFoundException;
|
|
import java.util.List;
|
|
|
|
import net.sf.samtools.*;
|
|
|
|
/**
|
|
* A test harness to ensure that the perfect aligner works.
|
|
*
|
|
* @author mhanna
|
|
* @version 0.1
|
|
*/
|
|
public class AlignerTestHarness {
|
|
public static void main( String argv[] ) throws FileNotFoundException {
|
|
if( argv.length != 6 ) {
|
|
System.out.println("PerfectAlignerTestHarness <fasta> <bwt> <rbwt> <sa> <rsa> <bam>");
|
|
System.exit(1);
|
|
}
|
|
|
|
File referenceFile = new File(argv[0]);
|
|
File bwtFile = new File(argv[1]);
|
|
File rbwtFile = new File(argv[2]);
|
|
File suffixArrayFile = new File(argv[3]);
|
|
File reverseSuffixArrayFile = new File(argv[4]);
|
|
File bamFile = new File(argv[5]);
|
|
|
|
align(referenceFile,bwtFile,rbwtFile,suffixArrayFile,reverseSuffixArrayFile,bamFile);
|
|
}
|
|
|
|
private static void align(File referenceFile, File bwtFile, File rbwtFile, File suffixArrayFile, File reverseSuffixArrayFile, File bamFile) throws FileNotFoundException {
|
|
Aligner aligner = new BWAAligner(bwtFile,rbwtFile,suffixArrayFile,reverseSuffixArrayFile);
|
|
int count = 0;
|
|
|
|
SAMFileReader reader = new SAMFileReader(bamFile);
|
|
reader.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
|
|
|
|
int mismatches = 0;
|
|
int failures = 0;
|
|
|
|
for(SAMRecord read: reader) {
|
|
count++;
|
|
if( count > 100000 ) break;
|
|
//if( count < 366000 ) continue;
|
|
//if( count != 2 ) continue;
|
|
//if( !read.getReadName().endsWith("SL-XBC:1:82:506:404#0") )
|
|
// continue;
|
|
//if( !read.getReadName().endsWith("SL-XBC:1:36:30:1926#0") )
|
|
// continue;
|
|
//if( !read.getReadName().endsWith("SL-XBC:1:60:1342:1340#0") )
|
|
// continue;
|
|
|
|
SAMRecord alignmentCleaned = null;
|
|
try {
|
|
alignmentCleaned = (SAMRecord)read.clone();
|
|
}
|
|
catch( CloneNotSupportedException ex ) {
|
|
throw new StingException("SAMRecord clone not supported", ex);
|
|
}
|
|
|
|
if( alignmentCleaned.getReadNegativeStrandFlag() )
|
|
alignmentCleaned.setReadBases(BaseUtils.simpleReverseComplement(alignmentCleaned.getReadBases()));
|
|
|
|
alignmentCleaned.setReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
|
|
alignmentCleaned.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
|
|
alignmentCleaned.setMappingQuality(SAMRecord.NO_MAPPING_QUALITY);
|
|
alignmentCleaned.setCigarString(SAMRecord.NO_ALIGNMENT_CIGAR);
|
|
|
|
// Clear everything except flags pertaining to pairing and set 'unmapped' status to true.
|
|
alignmentCleaned.setFlags(alignmentCleaned.getFlags() & 0x00A1 | 0x000C);
|
|
|
|
List<Alignment> alignments = aligner.align(alignmentCleaned);
|
|
if(alignments.size() == 0 ) {
|
|
//throw new StingException(String.format("Unable to align read %s to reference; count = %d",read.getReadName(),count));
|
|
System.out.printf("Unable to align read %s to reference; count = %d%n",read.getReadName(),count);
|
|
failures++;
|
|
}
|
|
|
|
Alignment foundAlignment = null;
|
|
for( Alignment alignment: alignments ) {
|
|
if( read.getReadNegativeStrandFlag() != alignment.isNegativeStrand() )
|
|
continue;
|
|
if( read.getAlignmentStart() != alignment.getAlignmentStart() )
|
|
continue;
|
|
|
|
foundAlignment = alignment;
|
|
}
|
|
|
|
if( foundAlignment != null ) {
|
|
//System.out.printf("%s: Aligned read to reference at position %d with %d mismatches, %d gap opens, and %d gap extensions.%n", read.getReadName(), foundAlignment.getAlignmentStart(), foundAlignment.getMismatches(), foundAlignment.getGapOpens(), foundAlignment.getGapExtensions());
|
|
}
|
|
else {
|
|
System.out.printf("Error aligning read %s%n", read.getReadName());
|
|
|
|
mismatches++;
|
|
|
|
IndexedFastaSequenceFile reference = new IndexedFastaSequenceFile(referenceFile);
|
|
|
|
System.out.printf("read = %s, position = %d, negative strand = %b%n", formatBasesBasedOnCigar(read.getReadString(),read.getCigar(),CigarOperator.DELETION),
|
|
read.getAlignmentStart(),
|
|
read.getReadNegativeStrandFlag());
|
|
int numDeletions = numDeletionsInCigar(read.getCigar());
|
|
String expectedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),read.getAlignmentStart(),read.getAlignmentStart()+read.getReadLength()+numDeletions-1).getBases());
|
|
System.out.printf("expected ref = %s%n", formatBasesBasedOnCigar(expectedRef,read.getCigar(),CigarOperator.INSERTION));
|
|
|
|
for( Alignment alignment: alignments ) {
|
|
System.out.println();
|
|
|
|
Cigar cigar = ((BWAAlignment)alignment).getCigar();
|
|
|
|
System.out.printf("read = %s%n", formatBasesBasedOnCigar(read.getReadString(),cigar,CigarOperator.DELETION));
|
|
|
|
int deletionCount = ((BWAAlignment)alignment).getNumberOfBasesMatchingState(AlignmentState.DELETION);
|
|
String alignedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),alignment.getAlignmentStart(),alignment.getAlignmentStart()+read.getReadLength()+deletionCount-1).getBases());
|
|
System.out.printf("actual ref = %s, position = %d, negative strand = %b%n", formatBasesBasedOnCigar(alignedRef,cigar,CigarOperator.INSERTION),
|
|
alignment.getAlignmentStart(),
|
|
alignment.isNegativeStrand());
|
|
}
|
|
|
|
//throw new StingException(String.format("Read %s was placed at incorrect location; count = %d%n",read.getReadName(),count));
|
|
}
|
|
|
|
|
|
if( count % 1000 == 0 )
|
|
System.out.printf("%d reads examined.%n",count);
|
|
}
|
|
|
|
System.out.printf("%d reads examined; %d mismatches; %d failures.%n",count,mismatches,failures);
|
|
}
|
|
|
|
private static String formatBasesBasedOnCigar( String bases, Cigar cigar, CigarOperator toBlank ) {
|
|
StringBuilder formatted = new StringBuilder();
|
|
int readIndex = 0;
|
|
for(CigarElement cigarElement: cigar.getCigarElements()) {
|
|
if(cigarElement.getOperator() == toBlank) {
|
|
int number = cigarElement.getLength();
|
|
while( number-- > 0 ) formatted.append(' ');
|
|
}
|
|
else {
|
|
int number = cigarElement.getLength();
|
|
while( number-- > 0 ) formatted.append(bases.charAt(readIndex++));
|
|
}
|
|
}
|
|
return formatted.toString();
|
|
}
|
|
|
|
private static int numDeletionsInCigar( Cigar cigar ) {
|
|
int numDeletions = 0;
|
|
for(CigarElement cigarElement: cigar.getCigarElements()) {
|
|
if(cigarElement.getOperator() == CigarOperator.DELETION)
|
|
numDeletions += cigarElement.getLength();
|
|
}
|
|
return numDeletions;
|
|
}
|
|
}
|