Support 'N's and other mismatch characters in the reference.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1721 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-09-24 21:41:30 +00:00
parent 08e8d2183a
commit 3b79f9eddc
2 changed files with 10 additions and 26 deletions

View File

@ -20,9 +20,6 @@ import net.sf.samtools.SAMFileReader;
* @version 0.1
*/
public class AlignerTestHarness {
private static BWT bwt;
private static SuffixArray suffixArray;
public static void main( String argv[] ) throws FileNotFoundException {
if( argv.length != 5 ) {
System.out.println("PerfectAlignerTestHarness <fasta> <bwt> <rbwt> <sa> <bam>");
@ -49,29 +46,11 @@ public class AlignerTestHarness {
for(SAMRecord read: reader) {
count++;
//if( count > 5000 ) break;
//if( count != 10959 ) continue;
//if( count > 39 ) break;
//if( count != 39 ) continue;
//if( !read.getReadName().endsWith("1507:1636#0") )
// continue;
boolean skipRead = false;
/*
for( CigarElement cigarElement: read.getCigar().getCigarElements() ) {
if( cigarElement.getOperator() != CigarOperator.M ) {
System.out.printf("Skipping read %s because it features indels%n", read.getReadName());
skipRead = true;
}
}
*/
if(read.getReadString().indexOf("N") >= 0) {
System.out.printf("Skipping read %s because it contains Ns%n", read.getReadName());
skipRead = true;
}
if(skipRead) continue;
List<Alignment> alignments = aligner.align(read);
if(alignments.size() == 0 )
throw new StingException(String.format("Unable to align read %s to reference; count = %d",read.getReadName(),count));

View File

@ -54,9 +54,14 @@ public class LowerBound {
int loIndex = 0, hiIndex = bwt.length(), mismatches = 0;
for( int i = bases.length-1; i >= 0; i-- ) {
Base base = Base.fromASCII(bases[i]);
loIndex = bwt.counts(base) + bwt.occurrences(base,loIndex-1) + 1;
hiIndex = bwt.counts(base) + bwt.occurrences(base,hiIndex);
if( loIndex > hiIndex ) {
// Ignore non-ACGT bases.
if( base != null ) {
loIndex = bwt.counts(base) + bwt.occurrences(base,loIndex-1) + 1;
hiIndex = bwt.counts(base) + bwt.occurrences(base,hiIndex);
}
if( base == null || loIndex > hiIndex ) {
loIndex = 0;
hiIndex = bwt.length();
mismatches++;