Changed reads traversals from providing a LocusContext from which the reference sequence

could be extracted to a char[] containing the reference bases.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@657 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-05-11 22:45:11 +00:00
parent 052819bed5
commit 23e9e29964
21 changed files with 115 additions and 61 deletions

View File

@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
import edu.mit.broad.picard.reference.ReferenceSequence; import edu.mit.broad.picard.reference.ReferenceSequence;
import net.sf.samtools.util.StringUtil; import net.sf.samtools.util.StringUtil;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMSequenceRecord;
/** /**
* Created by IntelliJ IDEA. * Created by IntelliJ IDEA.
@ -54,6 +55,8 @@ public class ReferenceProvider {
* Gets the bases of the reference that are aligned to the given read. * Gets the bases of the reference that are aligned to the given read.
* @param read the read for which to extract reference information. * @param read the read for which to extract reference information.
* @return The bases corresponding to this read, or null if the read is unmapped. * @return The bases corresponding to this read, or null if the read is unmapped.
* If the alignment goes off the end of the contig, return just the portion
* mapped to the reference.
*/ */
public char[] getReferenceBases( SAMRecord read ) { public char[] getReferenceBases( SAMRecord read ) {
if( read.getReadUnmappedFlag() ) if( read.getReadUnmappedFlag() )
@ -63,6 +66,10 @@ public class ReferenceProvider {
int start = read.getAlignmentStart(); int start = read.getAlignmentStart();
int stop = read.getAlignmentEnd(); int stop = read.getAlignmentEnd();
SAMSequenceRecord sequenceRecord = sequenceFile.getSequenceDictionary().getSequence(contig);
if( stop > sequenceRecord.getSequenceLength() )
stop = sequenceRecord.getSequenceLength();
ReferenceSequence alignmentToReference = sequenceFile.getSubsequenceAt( contig, start, stop ); ReferenceSequence alignmentToReference = sequenceFile.getSubsequenceAt( contig, start, stop );
return StringUtil.bytesToString(alignmentToReference.getBases()).toCharArray(); return StringUtil.bytesToString(alignmentToReference.getBases()).toCharArray();
} }

View File

@ -17,6 +17,7 @@ import java.util.LinkedList;
import java.io.File; import java.io.File;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.StringUtil;
/** /**
* Created by IntelliJ IDEA. * Created by IntelliJ IDEA.
@ -84,11 +85,21 @@ public class TraverseByReads extends TraversalEngine {
final List<SAMRecord> reads = Arrays.asList(read); final List<SAMRecord> reads = Arrays.asList(read);
GenomeLoc loc = new GenomeLoc(read); GenomeLoc loc = new GenomeLoc(read);
char[] refBases = null;
// Jump forward in the reference to this locus location // Jump forward in the reference to this locus location
LocusContext locus = new LocusContext(loc, reads, offsets); LocusContext locus = new LocusContext(loc, reads, offsets);
if (!loc.isUnmapped() && refIter != null) { if (!loc.isUnmapped() && refIter != null) {
final ReferenceIterator refSite = refIter.seekForward(loc); final ReferenceIterator refSite = refIter.seekForward(loc);
locus.setReferenceContig(refSite.getCurrentContig()); locus.setReferenceContig(refSite.getCurrentContig());
// MAQ alignments sometimes go off the end of the reference. Take this
// into account by only returning a shortened version of the reference.
int refStart = read.getAlignmentStart()-1;
int refLength = Math.min( read.getAlignmentEnd() - read.getAlignmentStart() + 1,
refSite.getCurrentContig().length() - read.getAlignmentStart() );
refBases = StringUtil.bytesToString( refSite.getCurrentContig().getBases(),refStart,refLength ).toCharArray();
} }
GenomeLoc.removePastLocs(loc, notYetTraversedLocations); GenomeLoc.removePastLocs(loc, notYetTraversedLocations);
@ -97,9 +108,9 @@ public class TraverseByReads extends TraversalEngine {
// //
// execute the walker contact // execute the walker contact
// //
final boolean keepMeP = walker.filter(locus, read); final boolean keepMeP = walker.filter(refBases, read);
if (keepMeP) { if (keepMeP) {
M x = walker.map(locus, read); M x = walker.map(refBases, read);
sum = walker.reduce(x, sum); sum = walker.reduce(x, sum);
} }

View File

@ -112,9 +112,9 @@ public class TraverseReads extends TraversalEngine {
// update the number of reads we've seen // update the number of reads we've seen
TraversalStatistics.nRecords++; TraversalStatistics.nRecords++;
final boolean keepMeP = readWalker.filter(locus, read); final boolean keepMeP = readWalker.filter(refSeq, read);
if (keepMeP) { if (keepMeP) {
M x = readWalker.map(locus, read); M x = readWalker.map(refSeq, read);
sum = readWalker.reduce(x, sum); sum = readWalker.reduce(x, sum);
} }

View File

@ -4,7 +4,7 @@ import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.gatk.LocusContext;
public class CountReadsWalker extends ReadWalker<Integer, Integer> { public class CountReadsWalker extends ReadWalker<Integer, Integer> {
public Integer map(LocusContext context, SAMRecord read) { public Integer map(char[] ref, SAMRecord read) {
//System.out.println(read.format()); //System.out.println(read.format());
return 1; return 1;
} }

View File

@ -4,7 +4,7 @@ import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.gatk.LocusContext;
public class PrintReadsWalker extends ReadWalker<Integer, Integer> { public class PrintReadsWalker extends ReadWalker<Integer, Integer> {
public Integer map(LocusContext context, SAMRecord read) { public Integer map(char[] ref, SAMRecord read) {
out.println(read.format()); out.println(read.format());
return 1; return 1;
} }

View File

@ -17,11 +17,11 @@ public abstract class ReadWalker<MapType, ReduceType> extends Walker<MapType, Re
/** Must return true for reads that need to be processed. Reads, for which this method return false will /** Must return true for reads that need to be processed. Reads, for which this method return false will
* be skipped by the engine and never passed to the walker. * be skipped by the engine and never passed to the walker.
*/ */
public boolean filter(LocusContext context, SAMRecord read) { public boolean filter(char[] ref, SAMRecord read) {
// We are keeping all the reads // We are keeping all the reads
return true; return true;
} }
// Map over the org.broadinstitute.sting.gatk.LocusContext // Map over the org.broadinstitute.sting.gatk.LocusContext
public abstract MapType map(LocusContext context, SAMRecord read); public abstract MapType map(char[] ref, SAMRecord read);
} }

View File

@ -22,16 +22,14 @@ public class AlignedReadsHistoWalker extends ReadWalker<Integer, Integer> {
} }
} }
public String walkerType() { return "ByRead"; }
// Do we actually want to operate on the context? // Do we actually want to operate on the context?
public boolean filter(LocusContext context, SAMRecord read) { public boolean filter(char[] ref, SAMRecord read) {
// we only want aligned reads // we only want aligned reads
return !read.getReadUnmappedFlag(); return !read.getReadUnmappedFlag();
} }
// Map over the org.broadinstitute.sting.atk.LocusContext // Map over the org.broadinstitute.sting.atk.LocusContext
public Integer map(LocusContext context, SAMRecord read) { public Integer map(char[] ref, SAMRecord read) {
//System.out.println(read.getAttribute("NM")); //System.out.println(read.getAttribute("NM"));
int editDist = Integer.parseInt(read.getAttribute("NM").toString()); int editDist = Integer.parseInt(read.getAttribute("NM").toString());
if (editDist <= 50) if (editDist <= 50)

View File

@ -12,12 +12,12 @@ public class BaseQualityDumpWalker extends ReadWalker<Integer, Integer> {
protected final int MAX_TARGET_EDIT_DISTANCE = 4; //10; protected final int MAX_TARGET_EDIT_DISTANCE = 4; //10;
// Do we actually want to operate on the context? // Do we actually want to operate on the context?
public boolean filter(LocusContext context, SAMRecord read) { public boolean filter(char[] ref, SAMRecord read) {
// we only want aligned reads // we only want aligned reads
return !read.getReadUnmappedFlag(); return !read.getReadUnmappedFlag();
} }
public Integer map(LocusContext context, SAMRecord read) { public Integer map(char[] ref, SAMRecord read) {
int editDist = Integer.parseInt(read.getAttribute("NM").toString()); int editDist = Integer.parseInt(read.getAttribute("NM").toString());

View File

@ -23,12 +23,12 @@ public class BaseQualityHistoWalker extends ReadWalker<Integer, Integer> {
} }
// Do we actually want to operate on the context? // Do we actually want to operate on the context?
public boolean filter(LocusContext context, SAMRecord read) { public boolean filter(char[] ref, SAMRecord read) {
return true; // We are keeping all the reads return true; // We are keeping all the reads
} }
// Map over the org.broadinstitute.sting.gatk.LocusContext // Map over the org.broadinstitute.sting.gatk.LocusContext
public Integer map(LocusContext context, SAMRecord read) { public Integer map(char[] ref, SAMRecord read) {
for ( byte qual : read.getBaseQualities() ) { for ( byte qual : read.getBaseQualities() ) {
//System.out.println(qual); //System.out.println(qual);
this.qualCounts[qual]++; this.qualCounts[qual]++;

View File

@ -6,7 +6,7 @@ import org.broadinstitute.sting.utils.QualityUtils;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
public class DisplayFourBaseReadWalker extends ReadWalker<Integer, Integer> { public class DisplayFourBaseReadWalker extends ReadWalker<Integer, Integer> {
public Integer map(LocusContext context, SAMRecord read) { public Integer map(char[] ref, SAMRecord read) {
String bases = read.getReadString(); String bases = read.getReadString();
boolean displayed = false; boolean displayed = false;

View File

@ -39,7 +39,7 @@ public class IOCrusherWalker extends ReadWalker<SAMRecord, ArrayList<SAMFileWrit
/** /**
* *
*/ */
public SAMRecord map(LocusContext context, SAMRecord read) { public SAMRecord map(char[] ref, SAMRecord read) {
nReadsRead++; nReadsRead++;
return read; return read;
} }

View File

@ -75,7 +75,7 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
} }
} }
public Integer map(LocusContext context, SAMRecord read) { public Integer map(char[] ref, SAMRecord read) {
total_reads++; total_reads++;
@ -84,7 +84,8 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
ptWriter.receive(read); // make sure we still write the read to the output, we do not want to lose data! ptWriter.receive(read); // make sure we still write the read to the output, we do not want to lose data!
return 0; return 0;
} }
/* 11 May 2009 - Commented out because we no longer have the full reference sequence for sanity checking.
ReferenceSequence contig_seq = context.getReferenceContig(); ReferenceSequence contig_seq = context.getReferenceContig();
if ( read.getReferenceName() != cur_contig) { if ( read.getReferenceName() != cur_contig) {
cur_contig = read.getReferenceName(); cur_contig = read.getReferenceName();
@ -94,6 +95,7 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
pileBuilder.setReferenceSequence(refstr); pileBuilder.setReferenceSequence(refstr);
out.println("loaded contig "+cur_contig+" (index="+read.getReferenceIndex()+"); length="+contig_seq.getBases().length+" tst="+contig_seq.toString()); out.println("loaded contig "+cur_contig+" (index="+read.getReferenceIndex()+"); length="+contig_seq.getBases().length+" tst="+contig_seq.toString());
} }
*/
// if ( cur_contig.equals("chrM") || GenomeLoc.compareContigs(cur_contig,"chrY") > 0 ) continue; // skip chrM and unplaced contigs for now // if ( cur_contig.equals("chrM") || GenomeLoc.compareContigs(cur_contig,"chrY") > 0 ) continue; // skip chrM and unplaced contigs for now
@ -138,10 +140,10 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
*/ */
if ( ERR_MODE != null ) { if ( ERR_MODE != null ) {
if ( ERR_MODE.equals("MM")) err = numMismatches(read,contig_seq); if ( ERR_MODE.equals("MM")) err = numMismatches(read,ref);
else if ( ERR_MODE.equals("MC") ) err = AlignmentUtils.numMismatches(read,contig_seq); else if ( ERR_MODE.equals("MC") ) err = AlignmentUtils.numMismatches(read,ref);
else if ( ERR_MODE.equals("ERR")) err = numErrors(read,contig_seq); else if ( ERR_MODE.equals("ERR")) err = numErrors(read,ref);
else if ( ERR_MODE.equals("MG")) err = numMismatchesGaps(read,contig_seq); else if ( ERR_MODE.equals("MG")) err = numMismatchesGaps(read,ref);
if ( err > MAX_ERRS ) { if ( err > MAX_ERRS ) {
ptWriter.receive(read); ptWriter.receive(read);
discarded_maxerr_count++; discarded_maxerr_count++;
@ -159,10 +161,10 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
* @return number of errors (number of mismatches plus total length of all insertions/deletions * @return number of errors (number of mismatches plus total length of all insertions/deletions
* @throws RuntimeException * @throws RuntimeException
*/ */
private static int numErrors(SAMRecord r, ReferenceSequence refseq) throws RuntimeException { private static int numErrors(SAMRecord r, char[] ref) throws RuntimeException {
// NM currently stores the total number of mismatches in all blocks + 1 // NM currently stores the total number of mismatches in all blocks + 1
int errs = numMismatches(r,refseq); int errs = numMismatches(r,ref);
// now we have to add the total length of all indels: // now we have to add the total length of all indels:
Cigar c = r.getCigar(); Cigar c = r.getCigar();
@ -187,10 +189,10 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
* deletion will be counted as a single error regardless of the length) * deletion will be counted as a single error regardless of the length)
* @throws RuntimeException * @throws RuntimeException
*/ */
private static int numMismatchesGaps(SAMRecord r,ReferenceSequence refseq) throws RuntimeException { private static int numMismatchesGaps(SAMRecord r,char[] ref) throws RuntimeException {
// NM currently stores the total number of mismatches in all blocks + 1 // NM currently stores the total number of mismatches in all blocks + 1
int errs = numMismatches(r,refseq); int errs = numMismatches(r,ref);
// now we have to add the total length of all indels: // now we have to add the total length of all indels:
Cigar c = r.getCigar(); Cigar c = r.getCigar();
@ -209,7 +211,7 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
} }
/** This method is a HACK: it is designed to work around the current bug in NM tags created at CRD */ /** This method is a HACK: it is designed to work around the current bug in NM tags created at CRD */
private static int numMismatches(SAMRecord r, ReferenceSequence refseq) throws RuntimeException { private static int numMismatches(SAMRecord r, char[] refseq) throws RuntimeException {
// NM currently stores the total number of mismatches in all blocks + 1 // NM currently stores the total number of mismatches in all blocks + 1
Integer i = (Integer)r.getAttribute("NM"); Integer i = (Integer)r.getAttribute("NM");

View File

@ -24,18 +24,21 @@ public class IndelIntervalWalker extends ReadWalker<IndelIntervalWalker.Interval
public void initialize() {} public void initialize() {}
public boolean filter(LocusContext context, SAMRecord read) { public boolean filter(char[] ref, SAMRecord read) {
return ( !read.getReadUnmappedFlag() && // mapped return ( !read.getReadUnmappedFlag() && // mapped
read.getReadLength() <= maxReadLength && // not too big read.getReadLength() <= maxReadLength && // not too big
read.getAlignmentBlocks().size() > 1); // indel read.getAlignmentBlocks().size() > 1); // indel
} }
public Interval map(LocusContext context, SAMRecord read) { public Interval map(char[] ref, SAMRecord read) {
List<AlignmentBlock> blocks = read.getAlignmentBlocks(); List<AlignmentBlock> blocks = read.getAlignmentBlocks();
long indelLeftEdge = read.getAlignmentStart() + blocks.get(0).getLength() - 1; long indelLeftEdge = read.getAlignmentStart() + blocks.get(0).getLength() - 1;
long indelRightEdge = read.getAlignmentEnd() - blocks.get(blocks.size()-1).getLength() + 1; long indelRightEdge = read.getAlignmentEnd() - blocks.get(blocks.size()-1).getLength() + 1;
GenomeLoc indelLoc = new GenomeLoc(context.getLocation().getContigIndex(), indelLeftEdge, indelRightEdge);
return new Interval(context.getLocation(), indelLoc); GenomeLoc indelLoc = new GenomeLoc(read.getReferenceIndex(), indelLeftEdge, indelRightEdge);
GenomeLoc refLoc = new GenomeLoc(read.getReferenceIndex(), read.getAlignmentStart(), read.getAlignmentEnd());
return new Interval(refLoc, indelLoc);
} }
public Interval reduceInit() { public Interval reduceInit() {

View File

@ -90,7 +90,7 @@ public class LogisticRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWr
} }
public SAMRecord map(LocusContext context, SAMRecord read) { public SAMRecord map(char[] ref, SAMRecord read) {
SAMRecord recalRead = read; SAMRecord recalRead = read;
byte[] bases = read.getReadBases(); byte[] bases = read.getReadBases();
byte[] quals = read.getBaseQualities(); byte[] quals = read.getBaseQualities();

View File

@ -13,20 +13,18 @@ import java.util.List;
@WalkerName("CountMismatches") @WalkerName("CountMismatches")
public class MismatchCounterWalker extends ReadWalker<Integer, Integer> { public class MismatchCounterWalker extends ReadWalker<Integer, Integer> {
public Integer map(LocusContext context, SAMRecord read) { public Integer map(char[] ref, SAMRecord read) {
int nMismatches = 0; int nMismatches = 0;
ReferenceSequence refseq = context.getReferenceContig();
int start = read.getAlignmentStart()-1; int start = read.getAlignmentStart()-1;
int stop = read.getAlignmentEnd(); int stop = read.getAlignmentEnd();
// sometimes BWA outputs screwy reads // sometimes BWA outputs screwy reads
if ( stop >= context.getReferenceContig().getBases().length ) if ( stop - start > ref.length )
return 0; return 0;
if ( read.getAlignmentBlocks().size() == 1 ) { if ( read.getAlignmentBlocks().size() == 1 ) {
// No indels // No indels
List<Byte> refSeq = Utils.subseq(context.getReferenceContig().getBases(), start, stop); List<Byte> refSeq = Utils.subseq(ref);
List<Byte> readBases = Utils.subseq(read.getReadBases()); List<Byte> readBases = Utils.subseq(read.getReadBases());
assert(refSeq.size() == readBases.size()); assert(refSeq.size() == readBases.size());

View File

@ -19,12 +19,12 @@ public class MismatchHistoWalker extends ReadWalker<Integer, Integer> {
protected final int MAX_TARGET_EDIT_DISTANCE = 10; protected final int MAX_TARGET_EDIT_DISTANCE = 10;
// Do we actually want to operate on the context? // Do we actually want to operate on the context?
public boolean filter(LocusContext context, SAMRecord read) { public boolean filter(char[] ref, SAMRecord read) {
// we only want aligned reads // we only want aligned reads
return !read.getReadUnmappedFlag(); return !read.getReadUnmappedFlag();
} }
public Integer map(LocusContext context, SAMRecord read) { public Integer map(char[] ref, SAMRecord read) {
int editDist = Integer.parseInt(read.getAttribute("NM").toString()); int editDist = Integer.parseInt(read.getAttribute("NM").toString());
@ -33,20 +33,18 @@ public class MismatchHistoWalker extends ReadWalker<Integer, Integer> {
editDist >= MIN_TARGET_EDIT_DISTANCE && editDist >= MIN_TARGET_EDIT_DISTANCE &&
editDist <= MAX_TARGET_EDIT_DISTANCE ) { editDist <= MAX_TARGET_EDIT_DISTANCE ) {
ReferenceSequence refseq = context.getReferenceContig();
int start = read.getAlignmentStart()-1; int start = read.getAlignmentStart()-1;
int stop = read.getAlignmentEnd(); int stop = read.getAlignmentEnd();
// sometimes BWA outputs screwy reads // sometimes BWA outputs screwy reads
if ( stop >= context.getReferenceContig().getBases().length ) if ( stop - start > ref.length )
return 0; return 0;
List<Byte> refSeq = Utils.subseq(context.getReferenceContig().getBases(), start, stop); List<Byte> refSeq = Utils.subseq(ref);
List<Byte> readBases = Utils.subseq(read.getReadBases()); List<Byte> readBases = Utils.subseq(read.getReadBases());
assert(refSeq.size() == readBases.size()); assert(refSeq.size() == readBases.size());
// it's actually faster to reallocate a resized array than to use ArrayLists... // it's actually faster to reallocate a resized array than to use ArrayLists...
if ( refSeq.size() > mismatchCounts.length ) { if ( ref.length > mismatchCounts.length ) {
int oldLength = mismatchCounts.length; int oldLength = mismatchCounts.length;
mismatchCounts = (long[])resizeArray(mismatchCounts, refSeq.size()); mismatchCounts = (long[])resizeArray(mismatchCounts, refSeq.size());
for ( int i = oldLength; i < refSeq.size(); i++ ) for ( int i = oldLength; i < refSeq.size(); i++ )

View File

@ -27,12 +27,11 @@ public class ReadErrorRateWalker extends ReadWalker<boolean[], int[]> {
/** /**
* Ignore reads with indels or clipping * Ignore reads with indels or clipping
* *
* @param context the reference context
* @param read the read to assess * @param read the read to assess
* @return true if the read can be processed, false if it should be ignored * @return true if the read can be processed, false if it should be ignored
*/ */
public boolean filter(LocusContext context, SAMRecord read) { public boolean filter(char[] ref, SAMRecord read) {
return (read.getCigar().numCigarElements() == 1 && read.getAlignmentStart() + read.getReadLength() < context.getReferenceContig().length()); return (read.getCigar().numCigarElements() == 1 && read.getReadLength() > ref.length);
} }
/** /**
@ -41,17 +40,15 @@ public class ReadErrorRateWalker extends ReadWalker<boolean[], int[]> {
* of this array is always "true" so that we can figure out how many reads we * of this array is always "true" so that we can figure out how many reads we
* processed in a thread-safe manner. * processed in a thread-safe manner.
* *
* @param context the reference context
* @param read the read to assess * @param read the read to assess
* @return An array of length (read_length + 1) indicating where the mismatches occur. * @return An array of length (read_length + 1) indicating where the mismatches occur.
* Last element is for internal use so the reduce() function can figure out how * Last element is for internal use so the reduce() function can figure out how
* many reads we processed. * many reads we processed.
*/ */
public boolean[] map(LocusContext context, SAMRecord read) { public boolean[] map(char[] ref, SAMRecord read) {
boolean[] errorsPerCycle = new boolean[read.getReadLength() + 1]; boolean[] errorsPerCycle = new boolean[read.getReadLength() + 1];
byte[] bases = read.getReadBases(); byte[] bases = read.getReadBases();
byte[] contig = context.getReferenceContig().getBases();
byte[] sq = (byte[]) read.getAttribute("SQ"); byte[] sq = (byte[]) read.getAttribute("SQ");
if (printVisualHits) { if (printVisualHits) {
@ -61,16 +58,16 @@ public class ReadErrorRateWalker extends ReadWalker<boolean[], int[]> {
} }
System.out.println(); System.out.println();
for (int cycle = 0, offset = (int) context.getPosition() - 1; cycle < bases.length; cycle++, offset++) { for (int cycle = 0; cycle < bases.length; cycle++) {
byte compBase = convertIUPACBaseToSimpleBase(contig[offset]); byte compBase = convertIUPACBaseToSimpleBase((byte)ref[cycle]);
System.out.print((char) compBase); System.out.print((char) compBase);
} }
System.out.println("\n"); System.out.println("\n");
} }
for (int cycle = 0, offset = (int) context.getPosition() - 1; cycle < bases.length; cycle++, offset++) { for (int cycle = 0; cycle < bases.length; cycle++) {
byte compBase = convertIUPACBaseToSimpleBase(contig[offset]); byte compBase = convertIUPACBaseToSimpleBase((byte)ref[cycle]);
if (compBase != '.') { if (compBase != '.') {
if (useNextBestBase || useNextRandomBase) { if (useNextBestBase || useNextRandomBase) {

View File

@ -23,12 +23,12 @@ public class ReadFilterWalker extends ReadWalker<Integer,Integer> {
} }
@Override @Override
public boolean filter(LocusContext context, SAMRecord read) { public boolean filter(char[] ref, SAMRecord read) {
return read.getReadLength() <= max_len; return read.getReadLength() <= max_len;
} }
@Override @Override
public Integer map(LocusContext context, SAMRecord read) { public Integer map(char[] ref, SAMRecord read) {
writer.addAlignment(read); writer.addAlignment(read);
return 1; return 1;
} }

View File

@ -19,7 +19,7 @@ public class SAMToFastqAndSqWalker extends ReadWalker<Integer, Integer> {
private boolean ready = false; private boolean ready = false;
public Integer map(LocusContext context, SAMRecord read) { public Integer map(char[] ref, SAMRecord read) {
if (!ready) { if (!ready) {
try { try {
fastqbw = new PrintWriter(FASTQFILE); fastqbw = new PrintWriter(FASTQFILE);

View File

@ -50,6 +50,39 @@ public class AlignmentUtils {
return mm_count; return mm_count;
} }
/**
* mhanna - 11 May 2009 - stubbed out competing method that works with partial references.
* Computes number of mismatches in the read alignment to the refence <code>ref</code>
* specified in the record <code>r</code>. Indels are completely <i>ignored</i> by this method:
* only base mismatches in the alignment segments where both sequences are present are counted.
* @param r
* @return
*/
public static int numMismatches(SAMRecord r, char[] ref) {
if ( r.getReadUnmappedFlag() ) return 1000000;
int i_ref = 0; // position on the ref
int i_read = 0; // position on the read
int mm_count = 0; // number of mismatches
Cigar c = r.getCigar();
for ( int k = 0 ; k < c.numCigarElements() ; k++ ) {
CigarElement ce = c.getCigarElement(k);
switch( ce.getOperator() ) {
case M:
for ( int l = 0 ; l < ce.getLength() ; l++, i_ref++, i_read++ ) {
if ( Character.toUpperCase(r.getReadString().charAt(i_read) ) == 'N' ) continue; // do not count N's ?
if ( Character.toUpperCase(r.getReadString().charAt(i_read) ) !=
Character.toUpperCase(ref[i_ref]) ) mm_count++;
}
break;
case I: i_read += ce.getLength(); break;
case D: i_ref += ce.getLength(); break;
default: throw new RuntimeException("Unrecognized cigar element");
}
}
return mm_count;
}
// IMPORTANT NOTE: ALTHOUGH THIS METHOD IS EXTREMELY SIMILAR TO THE ONE ABOVE, WE NEED // IMPORTANT NOTE: ALTHOUGH THIS METHOD IS EXTREMELY SIMILAR TO THE ONE ABOVE, WE NEED
// TWO SEPARATE IMPLEMENTATIONS IN ORDER TO PREVENT JAVA STRINGS FROM FORCING US TO // TWO SEPARATE IMPLEMENTATIONS IN ORDER TO PREVENT JAVA STRINGS FROM FORCING US TO
// PERFORM EXPENSIVE ARRAY COPYING WHEN TRYING TO GET A BYTE ARRAY... // PERFORM EXPENSIVE ARRAY COPYING WHEN TRYING TO GET A BYTE ARRAY...

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMSequenceRecord; import net.sf.samtools.SAMSequenceRecord;
import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.util.StringUtil;
import edu.mit.broad.picard.reference.ReferenceSequenceFile; import edu.mit.broad.picard.reference.ReferenceSequenceFile;
import java.util.*; import java.util.*;
@ -119,14 +120,20 @@ public class Utils {
return c; return c;
} }
public static ArrayList<Byte> subseq(char[] fullArray) {
byte[] fullByteArray = new byte[fullArray.length];
StringUtil.charsToBytes(fullArray,0,fullArray.length,fullByteArray,0);
return subseq(fullByteArray);
}
public static ArrayList<Byte> subseq(byte[] fullArray) { public static ArrayList<Byte> subseq(byte[] fullArray) {
return subseq(fullArray, 0, fullArray.length); return subseq(fullArray, 0, fullArray.length-1);
} }
public static ArrayList<Byte> subseq(byte[] fullArray, int start, int end) { public static ArrayList<Byte> subseq(byte[] fullArray, int start, int end) {
assert end < fullArray.length; assert end < fullArray.length;
ArrayList<Byte> dest = new ArrayList<Byte>(end - start + 1); ArrayList<Byte> dest = new ArrayList<Byte>(end - start + 1);
for (int i = start; i < end; i++) { for (int i = start; i <= end; i++) {
dest.add(fullArray[i]); dest.add(fullArray[i]);
} }
return dest; return dest;