Fixed 'visualization' of reads that didn't match bwa's alignments exactly.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1796 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
29ad6cd876
commit
95f24d671d
|
|
@ -10,8 +10,7 @@ import java.io.File;
|
|||
import java.io.FileNotFoundException;
|
||||
import java.util.List;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.*;
|
||||
|
||||
/**
|
||||
* A test harness to ensure that the perfect aligner works.
|
||||
|
|
@ -48,9 +47,12 @@ public class AlignerTestHarness {
|
|||
|
||||
for(SAMRecord read: reader) {
|
||||
count++;
|
||||
//if( count > 100000 ) break;
|
||||
//if( count > 144000 ) break;
|
||||
if( count < 366000 ) continue;
|
||||
//if( count != 2 ) continue;
|
||||
//if( !read.getReadName().endsWith("SL-XBC:1:90:15:1280#0") )
|
||||
//if( !read.getReadName().endsWith("SL-XBC:1:82:506:404#0") )
|
||||
// continue;
|
||||
//if( !read.getReadName().endsWith("SL-XBC:1:36:30:1926#0") )
|
||||
// continue;
|
||||
|
||||
SAMRecord alignmentCleaned = null;
|
||||
|
|
@ -94,17 +96,31 @@ public class AlignerTestHarness {
|
|||
}
|
||||
else {
|
||||
mismatches++;
|
||||
//throw new StingException(String.format("Read %s was placed at incorrect location; target alignment = %d; actual alignment = %d; count = %d%n",read.getReadName(),read.getAlignmentStart(),alignment.getAlignmentStart(),count));
|
||||
|
||||
IndexedFastaSequenceFile reference = new IndexedFastaSequenceFile(referenceFile);
|
||||
String expectedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),read.getAlignmentStart(),read.getAlignmentStart()+read.getReadLength()-1).getBases());
|
||||
System.out.printf("read = %s, strand = %b%n", read.getReadString(), read.getReadNegativeStrandFlag());
|
||||
System.out.printf("expected ref = %s%n", expectedRef);
|
||||
|
||||
System.out.printf("read = %s, position = %d, negative strand = %b%n", formatBasesBasedOnCigar(read.getReadString(),read.getCigar(),CigarOperator.DELETION),
|
||||
read.getAlignmentStart(),
|
||||
read.getReadNegativeStrandFlag());
|
||||
int numDeletions = numDeletionsInCigar(read.getCigar());
|
||||
String expectedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),read.getAlignmentStart(),read.getAlignmentStart()+read.getReadLength()+numDeletions-1).getBases());
|
||||
System.out.printf("expected ref = %s%n", formatBasesBasedOnCigar(expectedRef,read.getCigar(),CigarOperator.INSERTION));
|
||||
|
||||
for( Alignment alignment: alignments ) {
|
||||
String alignedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),alignments.get(0).getAlignmentStart(),alignments.get(0).getAlignmentStart()+read.getReadLength()-1).getBases());
|
||||
System.out.printf("actual ref = %s, strand = %b%n", alignedRef, read.getReadNegativeStrandFlag());
|
||||
//System.out.printf("(reversed) = %s, strand = %b%n", BaseUtils.simpleReverseComplement(alignedRef), !read.getReadNegativeStrandFlag());
|
||||
System.out.println();
|
||||
|
||||
Cigar cigar = ((BWAAlignment)alignment).getCigar();
|
||||
|
||||
System.out.printf("read = %s%n", formatBasesBasedOnCigar(read.getReadString(),cigar,CigarOperator.DELETION));
|
||||
|
||||
int deletionCount = ((BWAAlignment)alignment).getNumberOfBasesMatchingState(AlignmentState.DELETION);
|
||||
String alignedRef = new String(reference.getSubsequenceAt(reference.getSequenceDictionary().getSequences().get(0).getSequenceName(),alignment.getAlignmentStart(),alignment.getAlignmentStart()+read.getReadLength()+deletionCount-1).getBases());
|
||||
System.out.printf("actual ref = %s, position = %d, negative strand = %b%n", formatBasesBasedOnCigar(alignedRef,cigar,CigarOperator.INSERTION),
|
||||
alignment.getAlignmentStart(),
|
||||
alignment.isNegativeStrand());
|
||||
}
|
||||
|
||||
//throw new StingException(String.format("Read %s was placed at incorrect location; count = %d%n",read.getReadName(),count));
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -115,4 +131,28 @@ public class AlignerTestHarness {
|
|||
System.out.printf("%d reads examined; %d mismatches; %d failures.%n",count,mismatches,failures);
|
||||
}
|
||||
|
||||
private static String formatBasesBasedOnCigar( String bases, Cigar cigar, CigarOperator toBlank ) {
|
||||
StringBuilder formatted = new StringBuilder();
|
||||
int readIndex = 0;
|
||||
for(CigarElement cigarElement: cigar.getCigarElements()) {
|
||||
if(cigarElement.getOperator() == toBlank) {
|
||||
int number = cigarElement.getLength();
|
||||
while( number-- > 0 ) formatted.append(' ');
|
||||
}
|
||||
else {
|
||||
int number = cigarElement.getLength();
|
||||
while( number-- > 0 ) formatted.append(bases.charAt(readIndex++));
|
||||
}
|
||||
}
|
||||
return formatted.toString();
|
||||
}
|
||||
|
||||
private static int numDeletionsInCigar( Cigar cigar ) {
|
||||
int numDeletions = 0;
|
||||
for(CigarElement cigarElement: cigar.getCigarElements()) {
|
||||
if(cigarElement.getOperator() == CigarOperator.DELETION)
|
||||
numDeletions += cigarElement.getLength();
|
||||
}
|
||||
return numDeletions;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,6 +4,11 @@ import org.broadinstitute.sting.utils.StingException;
|
|||
|
||||
import java.util.Deque;
|
||||
import java.util.ArrayDeque;
|
||||
import java.util.Iterator;
|
||||
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
|
||||
/**
|
||||
* Represents a sequence of matches.
|
||||
|
|
@ -37,6 +42,23 @@ public class AlignmentMatchSequence implements Cloneable {
|
|||
return copy;
|
||||
}
|
||||
|
||||
public Cigar convertToCigar(boolean negativeStrand) {
|
||||
Cigar cigar = new Cigar();
|
||||
Iterator<AlignmentMatchSequenceEntry> iterator = negativeStrand ? entries.descendingIterator() : entries.iterator();
|
||||
while( iterator.hasNext() ) {
|
||||
AlignmentMatchSequenceEntry entry = iterator.next();
|
||||
CigarOperator operator;
|
||||
switch( entry.getAlignmentState() ) {
|
||||
case MATCH_MISMATCH: operator = CigarOperator.MATCH_OR_MISMATCH; break;
|
||||
case INSERTION: operator = CigarOperator.INSERTION; break;
|
||||
case DELETION: operator = CigarOperator.DELETION; break;
|
||||
default: throw new StingException("convertToCigar: cannot process state: " + entry.getAlignmentState());
|
||||
}
|
||||
cigar.add( new CigarElement(entry.count,operator) );
|
||||
}
|
||||
return cigar;
|
||||
}
|
||||
|
||||
/**
|
||||
* All a new alignment of the given state.
|
||||
* @param state State to add to the sequence.
|
||||
|
|
|
|||
|
|
@ -126,9 +126,12 @@ public class BWAAligner implements Aligner {
|
|||
if( finalAlignment.isNegativeStrand() )
|
||||
finalAlignment.alignmentStart = forwardSuffixArray.get(bwtIndex) + 1;
|
||||
else {
|
||||
int sizeAlongReference = finalAlignment.getNumberOfBasesMatchingState(AlignmentState.MATCH_MISMATCH)+finalAlignment.getNumberOfBasesMatchingState(AlignmentState.DELETION);
|
||||
int sizeAlongReference = read.getReadLength() -
|
||||
finalAlignment.getNumberOfBasesMatchingState(AlignmentState.INSERTION) +
|
||||
finalAlignment.getNumberOfBasesMatchingState(AlignmentState.DELETION);
|
||||
finalAlignment.alignmentStart = reverseBWT.length() - reverseSuffixArray.get(bwtIndex) - sizeAlongReference + 1;
|
||||
}
|
||||
|
||||
successfulMatches.add(finalAlignment);
|
||||
|
||||
bestScore = Math.min(finalAlignment.getScore(),bestScore);
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.alignment.bwa;
|
|||
|
||||
import org.broadinstitute.sting.alignment.Alignment;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import net.sf.samtools.Cigar;
|
||||
|
||||
/**
|
||||
* An alignment object to be used incrementally as the BWA aligner
|
||||
|
|
@ -87,6 +88,10 @@ public class BWAAlignment implements Alignment, Cloneable {
|
|||
return negativeStrand;
|
||||
}
|
||||
|
||||
public Cigar getCigar() {
|
||||
return alignmentMatchSequence.convertToCigar(isNegativeStrand());
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the current state of this alignment (state of the last base viewed)..
|
||||
* @return Current state of the alignment.
|
||||
|
|
|
|||
Loading…
Reference in New Issue