Support 'N's and other mismatch characters in the reference.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1721 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
08e8d2183a
commit
3b79f9eddc
|
|
@ -20,9 +20,6 @@ import net.sf.samtools.SAMFileReader;
|
|||
* @version 0.1
|
||||
*/
|
||||
public class AlignerTestHarness {
|
||||
private static BWT bwt;
|
||||
private static SuffixArray suffixArray;
|
||||
|
||||
public static void main( String argv[] ) throws FileNotFoundException {
|
||||
if( argv.length != 5 ) {
|
||||
System.out.println("PerfectAlignerTestHarness <fasta> <bwt> <rbwt> <sa> <bam>");
|
||||
|
|
@ -49,29 +46,11 @@ public class AlignerTestHarness {
|
|||
|
||||
for(SAMRecord read: reader) {
|
||||
count++;
|
||||
//if( count > 5000 ) break;
|
||||
//if( count != 10959 ) continue;
|
||||
//if( count > 39 ) break;
|
||||
//if( count != 39 ) continue;
|
||||
//if( !read.getReadName().endsWith("1507:1636#0") )
|
||||
// continue;
|
||||
|
||||
boolean skipRead = false;
|
||||
|
||||
/*
|
||||
for( CigarElement cigarElement: read.getCigar().getCigarElements() ) {
|
||||
if( cigarElement.getOperator() != CigarOperator.M ) {
|
||||
System.out.printf("Skipping read %s because it features indels%n", read.getReadName());
|
||||
skipRead = true;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
if(read.getReadString().indexOf("N") >= 0) {
|
||||
System.out.printf("Skipping read %s because it contains Ns%n", read.getReadName());
|
||||
skipRead = true;
|
||||
}
|
||||
|
||||
if(skipRead) continue;
|
||||
|
||||
List<Alignment> alignments = aligner.align(read);
|
||||
if(alignments.size() == 0 )
|
||||
throw new StingException(String.format("Unable to align read %s to reference; count = %d",read.getReadName(),count));
|
||||
|
|
|
|||
|
|
@ -54,9 +54,14 @@ public class LowerBound {
|
|||
int loIndex = 0, hiIndex = bwt.length(), mismatches = 0;
|
||||
for( int i = bases.length-1; i >= 0; i-- ) {
|
||||
Base base = Base.fromASCII(bases[i]);
|
||||
loIndex = bwt.counts(base) + bwt.occurrences(base,loIndex-1) + 1;
|
||||
hiIndex = bwt.counts(base) + bwt.occurrences(base,hiIndex);
|
||||
if( loIndex > hiIndex ) {
|
||||
|
||||
// Ignore non-ACGT bases.
|
||||
if( base != null ) {
|
||||
loIndex = bwt.counts(base) + bwt.occurrences(base,loIndex-1) + 1;
|
||||
hiIndex = bwt.counts(base) + bwt.occurrences(base,hiIndex);
|
||||
}
|
||||
|
||||
if( base == null || loIndex > hiIndex ) {
|
||||
loIndex = 0;
|
||||
hiIndex = bwt.length();
|
||||
mismatches++;
|
||||
|
|
|
|||
Loading…
Reference in New Issue