From 3b79f9eddcae83769c4a17a1bcfd75388e4dd8ae Mon Sep 17 00:00:00 2001 From: hanna Date: Thu, 24 Sep 2009 21:41:30 +0000 Subject: [PATCH] Support 'N's and other mismatch characters in the reference. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1721 348d0f76-0448-11de-a6fe-93d51630548a --- .../alignment/bwa/AlignerTestHarness.java | 25 ++----------------- .../sting/alignment/bwa/LowerBound.java | 11 +++++--- 2 files changed, 10 insertions(+), 26 deletions(-) diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/AlignerTestHarness.java b/java/src/org/broadinstitute/sting/alignment/bwa/AlignerTestHarness.java index 4e11e4e2c..43aad684e 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/AlignerTestHarness.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/AlignerTestHarness.java @@ -20,9 +20,6 @@ import net.sf.samtools.SAMFileReader; * @version 0.1 */ public class AlignerTestHarness { - private static BWT bwt; - private static SuffixArray suffixArray; - public static void main( String argv[] ) throws FileNotFoundException { if( argv.length != 5 ) { System.out.println("PerfectAlignerTestHarness "); @@ -49,29 +46,11 @@ public class AlignerTestHarness { for(SAMRecord read: reader) { count++; - //if( count > 5000 ) break; - //if( count != 10959 ) continue; + //if( count > 39 ) break; + //if( count != 39 ) continue; //if( !read.getReadName().endsWith("1507:1636#0") ) // continue; - boolean skipRead = false; - - /* - for( CigarElement cigarElement: read.getCigar().getCigarElements() ) { - if( cigarElement.getOperator() != CigarOperator.M ) { - System.out.printf("Skipping read %s because it features indels%n", read.getReadName()); - skipRead = true; - } - } - */ - - if(read.getReadString().indexOf("N") >= 0) { - System.out.printf("Skipping read %s because it contains Ns%n", read.getReadName()); - skipRead = true; - } - - if(skipRead) continue; - List alignments = aligner.align(read); if(alignments.size() == 0 ) throw new StingException(String.format("Unable to align read %s to reference; count = %d",read.getReadName(),count)); diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/LowerBound.java b/java/src/org/broadinstitute/sting/alignment/bwa/LowerBound.java index 82e654ce1..cd091b69a 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/LowerBound.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/LowerBound.java @@ -54,9 +54,14 @@ public class LowerBound { int loIndex = 0, hiIndex = bwt.length(), mismatches = 0; for( int i = bases.length-1; i >= 0; i-- ) { Base base = Base.fromASCII(bases[i]); - loIndex = bwt.counts(base) + bwt.occurrences(base,loIndex-1) + 1; - hiIndex = bwt.counts(base) + bwt.occurrences(base,hiIndex); - if( loIndex > hiIndex ) { + + // Ignore non-ACGT bases. + if( base != null ) { + loIndex = bwt.counts(base) + bwt.occurrences(base,loIndex-1) + 1; + hiIndex = bwt.counts(base) + bwt.occurrences(base,hiIndex); + } + + if( base == null || loIndex > hiIndex ) { loIndex = 0; hiIndex = bwt.length(); mismatches++;