Deal with reads whose ends are aligned off the end of a chromosome.
Includes update to ignore non-ATCG bases (not just 'N') (Also, create a BWA dir for future work) git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1117 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
65a788f18a
commit
95e2ae0171
|
|
@ -4,7 +4,6 @@ import org.broadinstitute.sting.gatk.walkers.LocusWindowWalker;
|
|||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.fasta.*;
|
||||
|
|
@ -111,7 +110,6 @@ public class TraverseByLocusWindows extends TraversalEngine {
|
|||
while (readIter.hasNext() && !done) {
|
||||
TraversalStatistics.nRecords++;
|
||||
|
||||
|
||||
SAMRecord read = readIter.next();
|
||||
reads.add(read);
|
||||
if ( read.getAlignmentStart() < leftmostIndex )
|
||||
|
|
@ -134,12 +132,20 @@ public class TraverseByLocusWindows extends TraversalEngine {
|
|||
|
||||
protected <M, T> T carryWalkerOverInterval(LocusWindowWalker<M, T> walker, T sum, LocusContext window) {
|
||||
|
||||
String refBases = new String(sequenceFile.getSubsequenceAt(window.getContig(),window.getLocation().getStart(),window.getLocation().getStop()).getBases());
|
||||
int contigLength = getSAMHeader().getSequence(window.getLocation().getContig()).getSequenceLength();
|
||||
String refSuffix = "";
|
||||
if ( window.getLocation().getStop() > contigLength ) {
|
||||
refSuffix = Utils.dupString('x', (int)window.getLocation().getStop() - contigLength);
|
||||
window.getLocation().setStop(contigLength);
|
||||
}
|
||||
|
||||
StringBuffer refBases = new StringBuffer(new String(sequenceFile.getSubsequenceAt(window.getContig(),window.getLocation().getStart(),window.getLocation().getStop()).getBases()));
|
||||
refBases.append(refSuffix);
|
||||
|
||||
// Iterate forward to get all reference ordered data covering this interval
|
||||
final RefMetaDataTracker tracker = getReferenceOrderedDataAtLocus(window.getLocation());
|
||||
|
||||
sum = walkAtinterval( walker, sum, window, refBases, tracker );
|
||||
sum = walkAtinterval( walker, sum, window, refBases.toString(), tracker );
|
||||
|
||||
printProgress("intervals", window.getLocation());
|
||||
|
||||
|
|
|
|||
|
|
@ -171,8 +171,15 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
|||
for (int readIndex = 0 ; readIndex < readSeq.length() ; refIndex++, readIndex++ ) {
|
||||
if ( refIndex >= refSeq.length() )
|
||||
sum += MAX_QUAL;
|
||||
else if ( Character.toUpperCase(readSeq.charAt(readIndex)) != Character.toUpperCase(refSeq.charAt(refIndex)) )
|
||||
sum += (int)quals.charAt(readIndex) - 33;
|
||||
else {
|
||||
char refChr = refSeq.charAt(refIndex);
|
||||
char readChr = readSeq.charAt(readIndex);
|
||||
if ( BaseUtils.simpleBaseToBaseIndex(readChr) == -1 ||
|
||||
BaseUtils.simpleBaseToBaseIndex(refChr) == -1 )
|
||||
continue; // do not count Ns/Xs/etc ?
|
||||
if ( Character.toUpperCase(readChr) != Character.toUpperCase(refChr) )
|
||||
sum += (int)quals.charAt(readIndex) - 33;
|
||||
}
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
|
@ -692,9 +699,10 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
|||
boolean match = true;
|
||||
|
||||
for ( int testRefPos = newIndex - period, indelPos = 0 ; testRefPos < newIndex; testRefPos++, indelPos++) {
|
||||
if ( Character.toUpperCase(refSeq.charAt(testRefPos)) != indelString.charAt(indelPos) || indelString.charAt(indelPos) == 'N' ) {
|
||||
match = false;
|
||||
break;
|
||||
char indelChr = indelString.charAt(indelPos);
|
||||
if ( Character.toUpperCase(refSeq.charAt(testRefPos)) != indelChr || BaseUtils.simpleBaseToBaseIndex(indelChr) == -1 ) {
|
||||
match = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ( match )
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import net.sf.samtools.Cigar;
|
|||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.picard.reference.ReferenceSequence;
|
||||
import org.broadinstitute.sting.playground.utils.CountedObject;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -41,9 +41,13 @@ public class AlignmentUtils {
|
|||
switch( ce.getOperator() ) {
|
||||
case M:
|
||||
for ( int l = 0 ; l < ce.getLength() ; l++, i_ref++, i_read++ ) {
|
||||
if ( Character.toUpperCase(r.getReadString().charAt(i_read) ) == 'N' ) continue; // do not count N's ?
|
||||
if ( Character.toUpperCase(r.getReadString().charAt(i_read) ) !=
|
||||
Character.toUpperCase((char)ref[i_ref]) ) mm_count++;
|
||||
char refChr = (char)ref[i_ref];
|
||||
char readChr = r.getReadString().charAt(i_read);
|
||||
if ( BaseUtils.simpleBaseToBaseIndex(readChr) == -1 ||
|
||||
BaseUtils.simpleBaseToBaseIndex(refChr) == -1 )
|
||||
continue; // do not count Ns/Xs/etc ?
|
||||
if ( Character.toUpperCase(readChr) != Character.toUpperCase(refChr) )
|
||||
mm_count++;
|
||||
}
|
||||
break;
|
||||
case I: i_read += ce.getLength(); break;
|
||||
|
|
@ -74,9 +78,13 @@ public class AlignmentUtils {
|
|||
switch( ce.getOperator() ) {
|
||||
case M:
|
||||
for ( int l = 0 ; l < ce.getLength() ; l++, i_ref++, i_read++ ) {
|
||||
if ( Character.toUpperCase(r.getReadString().charAt(i_read) ) == 'N' ) continue; // do not count N's ?
|
||||
if ( Character.toUpperCase(r.getReadString().charAt(i_read) ) !=
|
||||
Character.toUpperCase(ref[i_ref]) ) mm_count++;
|
||||
char refChr = (char)ref[i_ref];
|
||||
char readChr = r.getReadString().charAt(i_read);
|
||||
if ( BaseUtils.simpleBaseToBaseIndex(readChr) == -1 ||
|
||||
BaseUtils.simpleBaseToBaseIndex(refChr) == -1 )
|
||||
continue; // do not count Ns/Xs/etc ?
|
||||
if ( Character.toUpperCase(readChr) != Character.toUpperCase(refChr) )
|
||||
mm_count++;
|
||||
}
|
||||
break;
|
||||
case I: i_read += ce.getLength(); break;
|
||||
|
|
@ -123,7 +131,14 @@ public class AlignmentUtils {
|
|||
switch ( ce.getOperator() ) {
|
||||
case M:
|
||||
for (int j = 0 ; j < ce.getLength() ; j++, refIndex++, readIdx++ ) {
|
||||
if ( refIndex < refSeq.length() && Character.toUpperCase(readSeq.charAt(readIdx)) != Character.toUpperCase(refSeq.charAt(refIndex)) )
|
||||
if ( refIndex >= refSeq.length() )
|
||||
continue;
|
||||
char refChr = refSeq.charAt(refIndex);
|
||||
char readChr = readSeq.charAt(readIdx);
|
||||
if ( BaseUtils.simpleBaseToBaseIndex(readChr) == -1 ||
|
||||
BaseUtils.simpleBaseToBaseIndex(refChr) == -1 )
|
||||
continue; // do not count Ns/Xs/etc ?
|
||||
if ( Character.toUpperCase(readChr) != Character.toUpperCase(refChr) )
|
||||
mismatches++;
|
||||
}
|
||||
break;
|
||||
|
|
|
|||
Loading…
Reference in New Issue