diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReferenceProvider.java b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReferenceProvider.java index 350bc9d2c..35ead8fd1 100755 --- a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReferenceProvider.java +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReferenceProvider.java @@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.dataSources.shards.Shard; import edu.mit.broad.picard.reference.ReferenceSequence; import net.sf.samtools.util.StringUtil; import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMSequenceRecord; /** * Created by IntelliJ IDEA. @@ -54,6 +55,8 @@ public class ReferenceProvider { * Gets the bases of the reference that are aligned to the given read. * @param read the read for which to extract reference information. * @return The bases corresponding to this read, or null if the read is unmapped. + * If the alignment goes off the end of the contig, return just the portion + * mapped to the reference. */ public char[] getReferenceBases( SAMRecord read ) { if( read.getReadUnmappedFlag() ) @@ -63,6 +66,10 @@ public class ReferenceProvider { int start = read.getAlignmentStart(); int stop = read.getAlignmentEnd(); + SAMSequenceRecord sequenceRecord = sequenceFile.getSequenceDictionary().getSequence(contig); + if( stop > sequenceRecord.getSequenceLength() ) + stop = sequenceRecord.getSequenceLength(); + ReferenceSequence alignmentToReference = sequenceFile.getSubsequenceAt( contig, start, stop ); return StringUtil.bytesToString(alignmentToReference.getBases()).toCharArray(); } diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByReads.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByReads.java index 839b06736..876507ba2 100644 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByReads.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByReads.java @@ -17,6 +17,7 @@ import java.util.LinkedList; import java.io.File; import net.sf.samtools.SAMRecord; +import net.sf.samtools.util.StringUtil; /** * Created by IntelliJ IDEA. @@ -84,11 +85,21 @@ public class TraverseByReads extends TraversalEngine { final List reads = Arrays.asList(read); GenomeLoc loc = new GenomeLoc(read); + char[] refBases = null; + // Jump forward in the reference to this locus location LocusContext locus = new LocusContext(loc, reads, offsets); if (!loc.isUnmapped() && refIter != null) { final ReferenceIterator refSite = refIter.seekForward(loc); locus.setReferenceContig(refSite.getCurrentContig()); + + // MAQ alignments sometimes go off the end of the reference. Take this + // into account by only returning a shortened version of the reference. + int refStart = read.getAlignmentStart()-1; + int refLength = Math.min( read.getAlignmentEnd() - read.getAlignmentStart() + 1, + refSite.getCurrentContig().length() - read.getAlignmentStart() ); + + refBases = StringUtil.bytesToString( refSite.getCurrentContig().getBases(),refStart,refLength ).toCharArray(); } GenomeLoc.removePastLocs(loc, notYetTraversedLocations); @@ -97,9 +108,9 @@ public class TraverseByReads extends TraversalEngine { // // execute the walker contact // - final boolean keepMeP = walker.filter(locus, read); + final boolean keepMeP = walker.filter(refBases, read); if (keepMeP) { - M x = walker.map(locus, read); + M x = walker.map(refBases, read); sum = walker.reduce(x, sum); } diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java index 13cabb476..c0f895f09 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java @@ -112,9 +112,9 @@ public class TraverseReads extends TraversalEngine { // update the number of reads we've seen TraversalStatistics.nRecords++; - final boolean keepMeP = readWalker.filter(locus, read); + final boolean keepMeP = readWalker.filter(refSeq, read); if (keepMeP) { - M x = readWalker.map(locus, read); + M x = readWalker.map(refSeq, read); sum = readWalker.reduce(x, sum); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/CountReadsWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/CountReadsWalker.java index c88d25dcf..291a929f1 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/CountReadsWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/CountReadsWalker.java @@ -4,7 +4,7 @@ import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.LocusContext; public class CountReadsWalker extends ReadWalker { - public Integer map(LocusContext context, SAMRecord read) { + public Integer map(char[] ref, SAMRecord read) { //System.out.println(read.format()); return 1; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java index 769a2a63e..698a310b8 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java @@ -4,7 +4,7 @@ import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.LocusContext; public class PrintReadsWalker extends ReadWalker { - public Integer map(LocusContext context, SAMRecord read) { + public Integer map(char[] ref, SAMRecord read) { out.println(read.format()); return 1; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java index 8d6443a56..210cccb0a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java @@ -17,11 +17,11 @@ public abstract class ReadWalker extends Walker { } } - public String walkerType() { return "ByRead"; } - // Do we actually want to operate on the context? - public boolean filter(LocusContext context, SAMRecord read) { + public boolean filter(char[] ref, SAMRecord read) { // we only want aligned reads return !read.getReadUnmappedFlag(); } // Map over the org.broadinstitute.sting.atk.LocusContext - public Integer map(LocusContext context, SAMRecord read) { + public Integer map(char[] ref, SAMRecord read) { //System.out.println(read.getAttribute("NM")); int editDist = Integer.parseInt(read.getAttribute("NM").toString()); if (editDist <= 50) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseQualityDumpWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseQualityDumpWalker.java index 1ad6cb7fa..cbff5d43d 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseQualityDumpWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseQualityDumpWalker.java @@ -12,12 +12,12 @@ public class BaseQualityDumpWalker extends ReadWalker { protected final int MAX_TARGET_EDIT_DISTANCE = 4; //10; // Do we actually want to operate on the context? - public boolean filter(LocusContext context, SAMRecord read) { + public boolean filter(char[] ref, SAMRecord read) { // we only want aligned reads return !read.getReadUnmappedFlag(); } - public Integer map(LocusContext context, SAMRecord read) { + public Integer map(char[] ref, SAMRecord read) { int editDist = Integer.parseInt(read.getAttribute("NM").toString()); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseQualityHistoWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseQualityHistoWalker.java index f0767bbdb..370d3ffa4 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseQualityHistoWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseQualityHistoWalker.java @@ -23,12 +23,12 @@ public class BaseQualityHistoWalker extends ReadWalker { } // Do we actually want to operate on the context? - public boolean filter(LocusContext context, SAMRecord read) { + public boolean filter(char[] ref, SAMRecord read) { return true; // We are keeping all the reads } // Map over the org.broadinstitute.sting.gatk.LocusContext - public Integer map(LocusContext context, SAMRecord read) { + public Integer map(char[] ref, SAMRecord read) { for ( byte qual : read.getBaseQualities() ) { //System.out.println(qual); this.qualCounts[qual]++; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/DisplayFourBaseReadWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/DisplayFourBaseReadWalker.java index e32fc2342..a5c99e2c0 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/DisplayFourBaseReadWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/DisplayFourBaseReadWalker.java @@ -6,7 +6,7 @@ import org.broadinstitute.sting.utils.QualityUtils; import net.sf.samtools.SAMRecord; public class DisplayFourBaseReadWalker extends ReadWalker { - public Integer map(LocusContext context, SAMRecord read) { + public Integer map(char[] ref, SAMRecord read) { String bases = read.getReadString(); boolean displayed = false; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/IOCrusherWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/IOCrusherWalker.java index bae99b408..a14facd4a 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/IOCrusherWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/IOCrusherWalker.java @@ -39,7 +39,7 @@ public class IOCrusherWalker extends ReadWalker { } } - public Integer map(LocusContext context, SAMRecord read) { + public Integer map(char[] ref, SAMRecord read) { total_reads++; @@ -84,7 +84,8 @@ public class IndelInspector extends ReadWalker { ptWriter.receive(read); // make sure we still write the read to the output, we do not want to lose data! return 0; } - + + /* 11 May 2009 - Commented out because we no longer have the full reference sequence for sanity checking. ReferenceSequence contig_seq = context.getReferenceContig(); if ( read.getReferenceName() != cur_contig) { cur_contig = read.getReferenceName(); @@ -94,6 +95,7 @@ public class IndelInspector extends ReadWalker { pileBuilder.setReferenceSequence(refstr); out.println("loaded contig "+cur_contig+" (index="+read.getReferenceIndex()+"); length="+contig_seq.getBases().length+" tst="+contig_seq.toString()); } + */ // if ( cur_contig.equals("chrM") || GenomeLoc.compareContigs(cur_contig,"chrY") > 0 ) continue; // skip chrM and unplaced contigs for now @@ -138,10 +140,10 @@ public class IndelInspector extends ReadWalker { */ if ( ERR_MODE != null ) { - if ( ERR_MODE.equals("MM")) err = numMismatches(read,contig_seq); - else if ( ERR_MODE.equals("MC") ) err = AlignmentUtils.numMismatches(read,contig_seq); - else if ( ERR_MODE.equals("ERR")) err = numErrors(read,contig_seq); - else if ( ERR_MODE.equals("MG")) err = numMismatchesGaps(read,contig_seq); + if ( ERR_MODE.equals("MM")) err = numMismatches(read,ref); + else if ( ERR_MODE.equals("MC") ) err = AlignmentUtils.numMismatches(read,ref); + else if ( ERR_MODE.equals("ERR")) err = numErrors(read,ref); + else if ( ERR_MODE.equals("MG")) err = numMismatchesGaps(read,ref); if ( err > MAX_ERRS ) { ptWriter.receive(read); discarded_maxerr_count++; @@ -159,10 +161,10 @@ public class IndelInspector extends ReadWalker { * @return number of errors (number of mismatches plus total length of all insertions/deletions * @throws RuntimeException */ - private static int numErrors(SAMRecord r, ReferenceSequence refseq) throws RuntimeException { + private static int numErrors(SAMRecord r, char[] ref) throws RuntimeException { // NM currently stores the total number of mismatches in all blocks + 1 - int errs = numMismatches(r,refseq); + int errs = numMismatches(r,ref); // now we have to add the total length of all indels: Cigar c = r.getCigar(); @@ -187,10 +189,10 @@ public class IndelInspector extends ReadWalker { * deletion will be counted as a single error regardless of the length) * @throws RuntimeException */ - private static int numMismatchesGaps(SAMRecord r,ReferenceSequence refseq) throws RuntimeException { + private static int numMismatchesGaps(SAMRecord r,char[] ref) throws RuntimeException { // NM currently stores the total number of mismatches in all blocks + 1 - int errs = numMismatches(r,refseq); + int errs = numMismatches(r,ref); // now we have to add the total length of all indels: Cigar c = r.getCigar(); @@ -209,7 +211,7 @@ public class IndelInspector extends ReadWalker { } /** This method is a HACK: it is designed to work around the current bug in NM tags created at CRD */ - private static int numMismatches(SAMRecord r, ReferenceSequence refseq) throws RuntimeException { + private static int numMismatches(SAMRecord r, char[] refseq) throws RuntimeException { // NM currently stores the total number of mismatches in all blocks + 1 Integer i = (Integer)r.getAttribute("NM"); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/IndelIntervalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/IndelIntervalWalker.java index 967e12b8c..4b7b678e4 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/IndelIntervalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/IndelIntervalWalker.java @@ -24,18 +24,21 @@ public class IndelIntervalWalker extends ReadWalker 1); // indel } - public Interval map(LocusContext context, SAMRecord read) { + public Interval map(char[] ref, SAMRecord read) { List blocks = read.getAlignmentBlocks(); long indelLeftEdge = read.getAlignmentStart() + blocks.get(0).getLength() - 1; long indelRightEdge = read.getAlignmentEnd() - blocks.get(blocks.size()-1).getLength() + 1; - GenomeLoc indelLoc = new GenomeLoc(context.getLocation().getContigIndex(), indelLeftEdge, indelRightEdge); - return new Interval(context.getLocation(), indelLoc); + + GenomeLoc indelLoc = new GenomeLoc(read.getReferenceIndex(), indelLeftEdge, indelRightEdge); + GenomeLoc refLoc = new GenomeLoc(read.getReferenceIndex(), read.getAlignmentStart(), read.getAlignmentEnd()); + + return new Interval(refLoc, indelLoc); } public Interval reduceInit() { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/LogisticRecalibrationWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/LogisticRecalibrationWalker.java index 516b6d8d0..6fa9dba48 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/LogisticRecalibrationWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/LogisticRecalibrationWalker.java @@ -90,7 +90,7 @@ public class LogisticRecalibrationWalker extends ReadWalker { - public Integer map(LocusContext context, SAMRecord read) { + public Integer map(char[] ref, SAMRecord read) { int nMismatches = 0; - ReferenceSequence refseq = context.getReferenceContig(); - int start = read.getAlignmentStart()-1; int stop = read.getAlignmentEnd(); // sometimes BWA outputs screwy reads - if ( stop >= context.getReferenceContig().getBases().length ) + if ( stop - start > ref.length ) return 0; if ( read.getAlignmentBlocks().size() == 1 ) { // No indels - List refSeq = Utils.subseq(context.getReferenceContig().getBases(), start, stop); + List refSeq = Utils.subseq(ref); List readBases = Utils.subseq(read.getReadBases()); assert(refSeq.size() == readBases.size()); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/MismatchHistoWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/MismatchHistoWalker.java index bf6c3ce1f..8874a0f36 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/MismatchHistoWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/MismatchHistoWalker.java @@ -19,12 +19,12 @@ public class MismatchHistoWalker extends ReadWalker { protected final int MAX_TARGET_EDIT_DISTANCE = 10; // Do we actually want to operate on the context? - public boolean filter(LocusContext context, SAMRecord read) { + public boolean filter(char[] ref, SAMRecord read) { // we only want aligned reads return !read.getReadUnmappedFlag(); } - public Integer map(LocusContext context, SAMRecord read) { + public Integer map(char[] ref, SAMRecord read) { int editDist = Integer.parseInt(read.getAttribute("NM").toString()); @@ -33,20 +33,18 @@ public class MismatchHistoWalker extends ReadWalker { editDist >= MIN_TARGET_EDIT_DISTANCE && editDist <= MAX_TARGET_EDIT_DISTANCE ) { - ReferenceSequence refseq = context.getReferenceContig(); - int start = read.getAlignmentStart()-1; int stop = read.getAlignmentEnd(); // sometimes BWA outputs screwy reads - if ( stop >= context.getReferenceContig().getBases().length ) + if ( stop - start > ref.length ) return 0; - List refSeq = Utils.subseq(context.getReferenceContig().getBases(), start, stop); + List refSeq = Utils.subseq(ref); List readBases = Utils.subseq(read.getReadBases()); assert(refSeq.size() == readBases.size()); // it's actually faster to reallocate a resized array than to use ArrayLists... - if ( refSeq.size() > mismatchCounts.length ) { + if ( ref.length > mismatchCounts.length ) { int oldLength = mismatchCounts.length; mismatchCounts = (long[])resizeArray(mismatchCounts, refSeq.size()); for ( int i = oldLength; i < refSeq.size(); i++ ) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadErrorRateWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadErrorRateWalker.java index b496768a2..128dde640 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadErrorRateWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadErrorRateWalker.java @@ -27,12 +27,11 @@ public class ReadErrorRateWalker extends ReadWalker { /** * Ignore reads with indels or clipping * - * @param context the reference context * @param read the read to assess * @return true if the read can be processed, false if it should be ignored */ - public boolean filter(LocusContext context, SAMRecord read) { - return (read.getCigar().numCigarElements() == 1 && read.getAlignmentStart() + read.getReadLength() < context.getReferenceContig().length()); + public boolean filter(char[] ref, SAMRecord read) { + return (read.getCigar().numCigarElements() == 1 && read.getReadLength() > ref.length); } /** @@ -41,17 +40,15 @@ public class ReadErrorRateWalker extends ReadWalker { * of this array is always "true" so that we can figure out how many reads we * processed in a thread-safe manner. * - * @param context the reference context * @param read the read to assess * @return An array of length (read_length + 1) indicating where the mismatches occur. * Last element is for internal use so the reduce() function can figure out how * many reads we processed. */ - public boolean[] map(LocusContext context, SAMRecord read) { + public boolean[] map(char[] ref, SAMRecord read) { boolean[] errorsPerCycle = new boolean[read.getReadLength() + 1]; byte[] bases = read.getReadBases(); - byte[] contig = context.getReferenceContig().getBases(); byte[] sq = (byte[]) read.getAttribute("SQ"); if (printVisualHits) { @@ -61,16 +58,16 @@ public class ReadErrorRateWalker extends ReadWalker { } System.out.println(); - for (int cycle = 0, offset = (int) context.getPosition() - 1; cycle < bases.length; cycle++, offset++) { - byte compBase = convertIUPACBaseToSimpleBase(contig[offset]); + for (int cycle = 0; cycle < bases.length; cycle++) { + byte compBase = convertIUPACBaseToSimpleBase((byte)ref[cycle]); System.out.print((char) compBase); } System.out.println("\n"); } - for (int cycle = 0, offset = (int) context.getPosition() - 1; cycle < bases.length; cycle++, offset++) { - byte compBase = convertIUPACBaseToSimpleBase(contig[offset]); + for (int cycle = 0; cycle < bases.length; cycle++) { + byte compBase = convertIUPACBaseToSimpleBase((byte)ref[cycle]); if (compBase != '.') { if (useNextBestBase || useNextRandomBase) { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadFilterWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadFilterWalker.java index 22f6cce93..4cde166e1 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadFilterWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ReadFilterWalker.java @@ -23,12 +23,12 @@ public class ReadFilterWalker extends ReadWalker { } @Override - public boolean filter(LocusContext context, SAMRecord read) { + public boolean filter(char[] ref, SAMRecord read) { return read.getReadLength() <= max_len; } @Override - public Integer map(LocusContext context, SAMRecord read) { + public Integer map(char[] ref, SAMRecord read) { writer.addAlignment(read); return 1; } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SAMToFastqAndSqWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SAMToFastqAndSqWalker.java index 963fbd1f1..af4f804aa 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/SAMToFastqAndSqWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/SAMToFastqAndSqWalker.java @@ -19,7 +19,7 @@ public class SAMToFastqAndSqWalker extends ReadWalker { private boolean ready = false; - public Integer map(LocusContext context, SAMRecord read) { + public Integer map(char[] ref, SAMRecord read) { if (!ready) { try { fastqbw = new PrintWriter(FASTQFILE); diff --git a/java/src/org/broadinstitute/sting/playground/indels/AlignmentUtils.java b/java/src/org/broadinstitute/sting/playground/indels/AlignmentUtils.java index 5db2e63cd..6c30e5f41 100644 --- a/java/src/org/broadinstitute/sting/playground/indels/AlignmentUtils.java +++ b/java/src/org/broadinstitute/sting/playground/indels/AlignmentUtils.java @@ -50,6 +50,39 @@ public class AlignmentUtils { return mm_count; } + /** + * mhanna - 11 May 2009 - stubbed out competing method that works with partial references. + * Computes number of mismatches in the read alignment to the refence ref + * specified in the record r. Indels are completely ignored by this method: + * only base mismatches in the alignment segments where both sequences are present are counted. + * @param r + * @return + */ + public static int numMismatches(SAMRecord r, char[] ref) { + if ( r.getReadUnmappedFlag() ) return 1000000; + int i_ref = 0; // position on the ref + int i_read = 0; // position on the read + int mm_count = 0; // number of mismatches + Cigar c = r.getCigar(); + for ( int k = 0 ; k < c.numCigarElements() ; k++ ) { + CigarElement ce = c.getCigarElement(k); + switch( ce.getOperator() ) { + case M: + for ( int l = 0 ; l < ce.getLength() ; l++, i_ref++, i_read++ ) { + if ( Character.toUpperCase(r.getReadString().charAt(i_read) ) == 'N' ) continue; // do not count N's ? + if ( Character.toUpperCase(r.getReadString().charAt(i_read) ) != + Character.toUpperCase(ref[i_ref]) ) mm_count++; + } + break; + case I: i_read += ce.getLength(); break; + case D: i_ref += ce.getLength(); break; + default: throw new RuntimeException("Unrecognized cigar element"); + } + + } + return mm_count; + } + // IMPORTANT NOTE: ALTHOUGH THIS METHOD IS EXTREMELY SIMILAR TO THE ONE ABOVE, WE NEED // TWO SEPARATE IMPLEMENTATIONS IN ORDER TO PREVENT JAVA STRINGS FROM FORCING US TO // PERFORM EXPENSIVE ARRAY COPYING WHEN TRYING TO GET A BYTE ARRAY... diff --git a/java/src/org/broadinstitute/sting/utils/Utils.java b/java/src/org/broadinstitute/sting/utils/Utils.java index 86d16b158..29ada1b24 100755 --- a/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/java/src/org/broadinstitute/sting/utils/Utils.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMSequenceRecord; import net.sf.samtools.SAMSequenceDictionary; +import net.sf.samtools.util.StringUtil; import edu.mit.broad.picard.reference.ReferenceSequenceFile; import java.util.*; @@ -119,14 +120,20 @@ public class Utils { return c; } + public static ArrayList subseq(char[] fullArray) { + byte[] fullByteArray = new byte[fullArray.length]; + StringUtil.charsToBytes(fullArray,0,fullArray.length,fullByteArray,0); + return subseq(fullByteArray); + } + public static ArrayList subseq(byte[] fullArray) { - return subseq(fullArray, 0, fullArray.length); + return subseq(fullArray, 0, fullArray.length-1); } public static ArrayList subseq(byte[] fullArray, int start, int end) { assert end < fullArray.length; ArrayList dest = new ArrayList(end - start + 1); - for (int i = start; i < end; i++) { + for (int i = start; i <= end; i++) { dest.add(fullArray[i]); } return dest;