Deal with reads whose ends are aligned off the end of a chromosome.
Includes update to ignore non-ATCG bases (not just 'N') (Also, create a BWA dir for future work) git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1117 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
65a788f18a
commit
95e2ae0171
|
|
@ -4,7 +4,6 @@ import org.broadinstitute.sting.gatk.walkers.LocusWindowWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
import org.broadinstitute.sting.gatk.*;
|
import org.broadinstitute.sting.gatk.*;
|
||||||
import org.broadinstitute.sting.gatk.refdata.*;
|
import org.broadinstitute.sting.gatk.refdata.*;
|
||||||
import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2;
|
import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.fasta.*;
|
import org.broadinstitute.sting.utils.fasta.*;
|
||||||
|
|
@ -111,7 +110,6 @@ public class TraverseByLocusWindows extends TraversalEngine {
|
||||||
while (readIter.hasNext() && !done) {
|
while (readIter.hasNext() && !done) {
|
||||||
TraversalStatistics.nRecords++;
|
TraversalStatistics.nRecords++;
|
||||||
|
|
||||||
|
|
||||||
SAMRecord read = readIter.next();
|
SAMRecord read = readIter.next();
|
||||||
reads.add(read);
|
reads.add(read);
|
||||||
if ( read.getAlignmentStart() < leftmostIndex )
|
if ( read.getAlignmentStart() < leftmostIndex )
|
||||||
|
|
@ -134,12 +132,20 @@ public class TraverseByLocusWindows extends TraversalEngine {
|
||||||
|
|
||||||
protected <M, T> T carryWalkerOverInterval(LocusWindowWalker<M, T> walker, T sum, LocusContext window) {
|
protected <M, T> T carryWalkerOverInterval(LocusWindowWalker<M, T> walker, T sum, LocusContext window) {
|
||||||
|
|
||||||
String refBases = new String(sequenceFile.getSubsequenceAt(window.getContig(),window.getLocation().getStart(),window.getLocation().getStop()).getBases());
|
int contigLength = getSAMHeader().getSequence(window.getLocation().getContig()).getSequenceLength();
|
||||||
|
String refSuffix = "";
|
||||||
|
if ( window.getLocation().getStop() > contigLength ) {
|
||||||
|
refSuffix = Utils.dupString('x', (int)window.getLocation().getStop() - contigLength);
|
||||||
|
window.getLocation().setStop(contigLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
StringBuffer refBases = new StringBuffer(new String(sequenceFile.getSubsequenceAt(window.getContig(),window.getLocation().getStart(),window.getLocation().getStop()).getBases()));
|
||||||
|
refBases.append(refSuffix);
|
||||||
|
|
||||||
// Iterate forward to get all reference ordered data covering this interval
|
// Iterate forward to get all reference ordered data covering this interval
|
||||||
final RefMetaDataTracker tracker = getReferenceOrderedDataAtLocus(window.getLocation());
|
final RefMetaDataTracker tracker = getReferenceOrderedDataAtLocus(window.getLocation());
|
||||||
|
|
||||||
sum = walkAtinterval( walker, sum, window, refBases, tracker );
|
sum = walkAtinterval( walker, sum, window, refBases.toString(), tracker );
|
||||||
|
|
||||||
printProgress("intervals", window.getLocation());
|
printProgress("intervals", window.getLocation());
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -171,8 +171,15 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
for (int readIndex = 0 ; readIndex < readSeq.length() ; refIndex++, readIndex++ ) {
|
for (int readIndex = 0 ; readIndex < readSeq.length() ; refIndex++, readIndex++ ) {
|
||||||
if ( refIndex >= refSeq.length() )
|
if ( refIndex >= refSeq.length() )
|
||||||
sum += MAX_QUAL;
|
sum += MAX_QUAL;
|
||||||
else if ( Character.toUpperCase(readSeq.charAt(readIndex)) != Character.toUpperCase(refSeq.charAt(refIndex)) )
|
else {
|
||||||
sum += (int)quals.charAt(readIndex) - 33;
|
char refChr = refSeq.charAt(refIndex);
|
||||||
|
char readChr = readSeq.charAt(readIndex);
|
||||||
|
if ( BaseUtils.simpleBaseToBaseIndex(readChr) == -1 ||
|
||||||
|
BaseUtils.simpleBaseToBaseIndex(refChr) == -1 )
|
||||||
|
continue; // do not count Ns/Xs/etc ?
|
||||||
|
if ( Character.toUpperCase(readChr) != Character.toUpperCase(refChr) )
|
||||||
|
sum += (int)quals.charAt(readIndex) - 33;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return sum;
|
return sum;
|
||||||
}
|
}
|
||||||
|
|
@ -692,9 +699,10 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
||||||
boolean match = true;
|
boolean match = true;
|
||||||
|
|
||||||
for ( int testRefPos = newIndex - period, indelPos = 0 ; testRefPos < newIndex; testRefPos++, indelPos++) {
|
for ( int testRefPos = newIndex - period, indelPos = 0 ; testRefPos < newIndex; testRefPos++, indelPos++) {
|
||||||
if ( Character.toUpperCase(refSeq.charAt(testRefPos)) != indelString.charAt(indelPos) || indelString.charAt(indelPos) == 'N' ) {
|
char indelChr = indelString.charAt(indelPos);
|
||||||
match = false;
|
if ( Character.toUpperCase(refSeq.charAt(testRefPos)) != indelChr || BaseUtils.simpleBaseToBaseIndex(indelChr) == -1 ) {
|
||||||
break;
|
match = false;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ( match )
|
if ( match )
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ import net.sf.samtools.Cigar;
|
||||||
import net.sf.samtools.CigarElement;
|
import net.sf.samtools.CigarElement;
|
||||||
import net.sf.picard.reference.ReferenceSequence;
|
import net.sf.picard.reference.ReferenceSequence;
|
||||||
import org.broadinstitute.sting.playground.utils.CountedObject;
|
import org.broadinstitute.sting.playground.utils.CountedObject;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.*;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -41,9 +41,13 @@ public class AlignmentUtils {
|
||||||
switch( ce.getOperator() ) {
|
switch( ce.getOperator() ) {
|
||||||
case M:
|
case M:
|
||||||
for ( int l = 0 ; l < ce.getLength() ; l++, i_ref++, i_read++ ) {
|
for ( int l = 0 ; l < ce.getLength() ; l++, i_ref++, i_read++ ) {
|
||||||
if ( Character.toUpperCase(r.getReadString().charAt(i_read) ) == 'N' ) continue; // do not count N's ?
|
char refChr = (char)ref[i_ref];
|
||||||
if ( Character.toUpperCase(r.getReadString().charAt(i_read) ) !=
|
char readChr = r.getReadString().charAt(i_read);
|
||||||
Character.toUpperCase((char)ref[i_ref]) ) mm_count++;
|
if ( BaseUtils.simpleBaseToBaseIndex(readChr) == -1 ||
|
||||||
|
BaseUtils.simpleBaseToBaseIndex(refChr) == -1 )
|
||||||
|
continue; // do not count Ns/Xs/etc ?
|
||||||
|
if ( Character.toUpperCase(readChr) != Character.toUpperCase(refChr) )
|
||||||
|
mm_count++;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case I: i_read += ce.getLength(); break;
|
case I: i_read += ce.getLength(); break;
|
||||||
|
|
@ -74,9 +78,13 @@ public class AlignmentUtils {
|
||||||
switch( ce.getOperator() ) {
|
switch( ce.getOperator() ) {
|
||||||
case M:
|
case M:
|
||||||
for ( int l = 0 ; l < ce.getLength() ; l++, i_ref++, i_read++ ) {
|
for ( int l = 0 ; l < ce.getLength() ; l++, i_ref++, i_read++ ) {
|
||||||
if ( Character.toUpperCase(r.getReadString().charAt(i_read) ) == 'N' ) continue; // do not count N's ?
|
char refChr = (char)ref[i_ref];
|
||||||
if ( Character.toUpperCase(r.getReadString().charAt(i_read) ) !=
|
char readChr = r.getReadString().charAt(i_read);
|
||||||
Character.toUpperCase(ref[i_ref]) ) mm_count++;
|
if ( BaseUtils.simpleBaseToBaseIndex(readChr) == -1 ||
|
||||||
|
BaseUtils.simpleBaseToBaseIndex(refChr) == -1 )
|
||||||
|
continue; // do not count Ns/Xs/etc ?
|
||||||
|
if ( Character.toUpperCase(readChr) != Character.toUpperCase(refChr) )
|
||||||
|
mm_count++;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case I: i_read += ce.getLength(); break;
|
case I: i_read += ce.getLength(); break;
|
||||||
|
|
@ -123,7 +131,14 @@ public class AlignmentUtils {
|
||||||
switch ( ce.getOperator() ) {
|
switch ( ce.getOperator() ) {
|
||||||
case M:
|
case M:
|
||||||
for (int j = 0 ; j < ce.getLength() ; j++, refIndex++, readIdx++ ) {
|
for (int j = 0 ; j < ce.getLength() ; j++, refIndex++, readIdx++ ) {
|
||||||
if ( refIndex < refSeq.length() && Character.toUpperCase(readSeq.charAt(readIdx)) != Character.toUpperCase(refSeq.charAt(refIndex)) )
|
if ( refIndex >= refSeq.length() )
|
||||||
|
continue;
|
||||||
|
char refChr = refSeq.charAt(refIndex);
|
||||||
|
char readChr = readSeq.charAt(readIdx);
|
||||||
|
if ( BaseUtils.simpleBaseToBaseIndex(readChr) == -1 ||
|
||||||
|
BaseUtils.simpleBaseToBaseIndex(refChr) == -1 )
|
||||||
|
continue; // do not count Ns/Xs/etc ?
|
||||||
|
if ( Character.toUpperCase(readChr) != Character.toUpperCase(refChr) )
|
||||||
mismatches++;
|
mismatches++;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue