Changed reads traversals from providing a LocusContext from which the reference sequence
could be extracted to a char[] containing the reference bases. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@657 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
052819bed5
commit
23e9e29964
|
|
@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
|
|||
import edu.mit.broad.picard.reference.ReferenceSequence;
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -54,6 +55,8 @@ public class ReferenceProvider {
|
|||
* Gets the bases of the reference that are aligned to the given read.
|
||||
* @param read the read for which to extract reference information.
|
||||
* @return The bases corresponding to this read, or null if the read is unmapped.
|
||||
* If the alignment goes off the end of the contig, return just the portion
|
||||
* mapped to the reference.
|
||||
*/
|
||||
public char[] getReferenceBases( SAMRecord read ) {
|
||||
if( read.getReadUnmappedFlag() )
|
||||
|
|
@ -63,6 +66,10 @@ public class ReferenceProvider {
|
|||
int start = read.getAlignmentStart();
|
||||
int stop = read.getAlignmentEnd();
|
||||
|
||||
SAMSequenceRecord sequenceRecord = sequenceFile.getSequenceDictionary().getSequence(contig);
|
||||
if( stop > sequenceRecord.getSequenceLength() )
|
||||
stop = sequenceRecord.getSequenceLength();
|
||||
|
||||
ReferenceSequence alignmentToReference = sequenceFile.getSubsequenceAt( contig, start, stop );
|
||||
return StringUtil.bytesToString(alignmentToReference.getBases()).toCharArray();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ import java.util.LinkedList;
|
|||
import java.io.File;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -84,11 +85,21 @@ public class TraverseByReads extends TraversalEngine {
|
|||
final List<SAMRecord> reads = Arrays.asList(read);
|
||||
GenomeLoc loc = new GenomeLoc(read);
|
||||
|
||||
char[] refBases = null;
|
||||
|
||||
// Jump forward in the reference to this locus location
|
||||
LocusContext locus = new LocusContext(loc, reads, offsets);
|
||||
if (!loc.isUnmapped() && refIter != null) {
|
||||
final ReferenceIterator refSite = refIter.seekForward(loc);
|
||||
locus.setReferenceContig(refSite.getCurrentContig());
|
||||
|
||||
// MAQ alignments sometimes go off the end of the reference. Take this
|
||||
// into account by only returning a shortened version of the reference.
|
||||
int refStart = read.getAlignmentStart()-1;
|
||||
int refLength = Math.min( read.getAlignmentEnd() - read.getAlignmentStart() + 1,
|
||||
refSite.getCurrentContig().length() - read.getAlignmentStart() );
|
||||
|
||||
refBases = StringUtil.bytesToString( refSite.getCurrentContig().getBases(),refStart,refLength ).toCharArray();
|
||||
}
|
||||
|
||||
GenomeLoc.removePastLocs(loc, notYetTraversedLocations);
|
||||
|
|
@ -97,9 +108,9 @@ public class TraverseByReads extends TraversalEngine {
|
|||
//
|
||||
// execute the walker contact
|
||||
//
|
||||
final boolean keepMeP = walker.filter(locus, read);
|
||||
final boolean keepMeP = walker.filter(refBases, read);
|
||||
if (keepMeP) {
|
||||
M x = walker.map(locus, read);
|
||||
M x = walker.map(refBases, read);
|
||||
sum = walker.reduce(x, sum);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -112,9 +112,9 @@ public class TraverseReads extends TraversalEngine {
|
|||
// update the number of reads we've seen
|
||||
TraversalStatistics.nRecords++;
|
||||
|
||||
final boolean keepMeP = readWalker.filter(locus, read);
|
||||
final boolean keepMeP = readWalker.filter(refSeq, read);
|
||||
if (keepMeP) {
|
||||
M x = readWalker.map(locus, read);
|
||||
M x = readWalker.map(refSeq, read);
|
||||
sum = readWalker.reduce(x, sum);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import net.sf.samtools.SAMRecord;
|
|||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
|
||||
public class CountReadsWalker extends ReadWalker<Integer, Integer> {
|
||||
public Integer map(LocusContext context, SAMRecord read) {
|
||||
public Integer map(char[] ref, SAMRecord read) {
|
||||
//System.out.println(read.format());
|
||||
return 1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import net.sf.samtools.SAMRecord;
|
|||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
|
||||
public class PrintReadsWalker extends ReadWalker<Integer, Integer> {
|
||||
public Integer map(LocusContext context, SAMRecord read) {
|
||||
public Integer map(char[] ref, SAMRecord read) {
|
||||
out.println(read.format());
|
||||
return 1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,11 +17,11 @@ public abstract class ReadWalker<MapType, ReduceType> extends Walker<MapType, Re
|
|||
/** Must return true for reads that need to be processed. Reads, for which this method return false will
|
||||
* be skipped by the engine and never passed to the walker.
|
||||
*/
|
||||
public boolean filter(LocusContext context, SAMRecord read) {
|
||||
public boolean filter(char[] ref, SAMRecord read) {
|
||||
// We are keeping all the reads
|
||||
return true;
|
||||
}
|
||||
|
||||
// Map over the org.broadinstitute.sting.gatk.LocusContext
|
||||
public abstract MapType map(LocusContext context, SAMRecord read);
|
||||
public abstract MapType map(char[] ref, SAMRecord read);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,16 +22,14 @@ public class AlignedReadsHistoWalker extends ReadWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
|
||||
public String walkerType() { return "ByRead"; }
|
||||
|
||||
// Do we actually want to operate on the context?
|
||||
public boolean filter(LocusContext context, SAMRecord read) {
|
||||
public boolean filter(char[] ref, SAMRecord read) {
|
||||
// we only want aligned reads
|
||||
return !read.getReadUnmappedFlag();
|
||||
}
|
||||
|
||||
// Map over the org.broadinstitute.sting.atk.LocusContext
|
||||
public Integer map(LocusContext context, SAMRecord read) {
|
||||
public Integer map(char[] ref, SAMRecord read) {
|
||||
//System.out.println(read.getAttribute("NM"));
|
||||
int editDist = Integer.parseInt(read.getAttribute("NM").toString());
|
||||
if (editDist <= 50)
|
||||
|
|
|
|||
|
|
@ -12,12 +12,12 @@ public class BaseQualityDumpWalker extends ReadWalker<Integer, Integer> {
|
|||
protected final int MAX_TARGET_EDIT_DISTANCE = 4; //10;
|
||||
|
||||
// Do we actually want to operate on the context?
|
||||
public boolean filter(LocusContext context, SAMRecord read) {
|
||||
public boolean filter(char[] ref, SAMRecord read) {
|
||||
// we only want aligned reads
|
||||
return !read.getReadUnmappedFlag();
|
||||
}
|
||||
|
||||
public Integer map(LocusContext context, SAMRecord read) {
|
||||
public Integer map(char[] ref, SAMRecord read) {
|
||||
|
||||
int editDist = Integer.parseInt(read.getAttribute("NM").toString());
|
||||
|
||||
|
|
|
|||
|
|
@ -23,12 +23,12 @@ public class BaseQualityHistoWalker extends ReadWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
// Do we actually want to operate on the context?
|
||||
public boolean filter(LocusContext context, SAMRecord read) {
|
||||
public boolean filter(char[] ref, SAMRecord read) {
|
||||
return true; // We are keeping all the reads
|
||||
}
|
||||
|
||||
// Map over the org.broadinstitute.sting.gatk.LocusContext
|
||||
public Integer map(LocusContext context, SAMRecord read) {
|
||||
public Integer map(char[] ref, SAMRecord read) {
|
||||
for ( byte qual : read.getBaseQualities() ) {
|
||||
//System.out.println(qual);
|
||||
this.qualCounts[qual]++;
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import org.broadinstitute.sting.utils.QualityUtils;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
public class DisplayFourBaseReadWalker extends ReadWalker<Integer, Integer> {
|
||||
public Integer map(LocusContext context, SAMRecord read) {
|
||||
public Integer map(char[] ref, SAMRecord read) {
|
||||
String bases = read.getReadString();
|
||||
|
||||
boolean displayed = false;
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ public class IOCrusherWalker extends ReadWalker<SAMRecord, ArrayList<SAMFileWrit
|
|||
/**
|
||||
*
|
||||
*/
|
||||
public SAMRecord map(LocusContext context, SAMRecord read) {
|
||||
public SAMRecord map(char[] ref, SAMRecord read) {
|
||||
nReadsRead++;
|
||||
return read;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
|
||||
public Integer map(LocusContext context, SAMRecord read) {
|
||||
public Integer map(char[] ref, SAMRecord read) {
|
||||
|
||||
total_reads++;
|
||||
|
||||
|
|
@ -84,7 +84,8 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
|
|||
ptWriter.receive(read); // make sure we still write the read to the output, we do not want to lose data!
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* 11 May 2009 - Commented out because we no longer have the full reference sequence for sanity checking.
|
||||
ReferenceSequence contig_seq = context.getReferenceContig();
|
||||
if ( read.getReferenceName() != cur_contig) {
|
||||
cur_contig = read.getReferenceName();
|
||||
|
|
@ -94,6 +95,7 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
|
|||
pileBuilder.setReferenceSequence(refstr);
|
||||
out.println("loaded contig "+cur_contig+" (index="+read.getReferenceIndex()+"); length="+contig_seq.getBases().length+" tst="+contig_seq.toString());
|
||||
}
|
||||
*/
|
||||
|
||||
// if ( cur_contig.equals("chrM") || GenomeLoc.compareContigs(cur_contig,"chrY") > 0 ) continue; // skip chrM and unplaced contigs for now
|
||||
|
||||
|
|
@ -138,10 +140,10 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
|
|||
*/
|
||||
|
||||
if ( ERR_MODE != null ) {
|
||||
if ( ERR_MODE.equals("MM")) err = numMismatches(read,contig_seq);
|
||||
else if ( ERR_MODE.equals("MC") ) err = AlignmentUtils.numMismatches(read,contig_seq);
|
||||
else if ( ERR_MODE.equals("ERR")) err = numErrors(read,contig_seq);
|
||||
else if ( ERR_MODE.equals("MG")) err = numMismatchesGaps(read,contig_seq);
|
||||
if ( ERR_MODE.equals("MM")) err = numMismatches(read,ref);
|
||||
else if ( ERR_MODE.equals("MC") ) err = AlignmentUtils.numMismatches(read,ref);
|
||||
else if ( ERR_MODE.equals("ERR")) err = numErrors(read,ref);
|
||||
else if ( ERR_MODE.equals("MG")) err = numMismatchesGaps(read,ref);
|
||||
if ( err > MAX_ERRS ) {
|
||||
ptWriter.receive(read);
|
||||
discarded_maxerr_count++;
|
||||
|
|
@ -159,10 +161,10 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
|
|||
* @return number of errors (number of mismatches plus total length of all insertions/deletions
|
||||
* @throws RuntimeException
|
||||
*/
|
||||
private static int numErrors(SAMRecord r, ReferenceSequence refseq) throws RuntimeException {
|
||||
private static int numErrors(SAMRecord r, char[] ref) throws RuntimeException {
|
||||
|
||||
// NM currently stores the total number of mismatches in all blocks + 1
|
||||
int errs = numMismatches(r,refseq);
|
||||
int errs = numMismatches(r,ref);
|
||||
|
||||
// now we have to add the total length of all indels:
|
||||
Cigar c = r.getCigar();
|
||||
|
|
@ -187,10 +189,10 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
|
|||
* deletion will be counted as a single error regardless of the length)
|
||||
* @throws RuntimeException
|
||||
*/
|
||||
private static int numMismatchesGaps(SAMRecord r,ReferenceSequence refseq) throws RuntimeException {
|
||||
private static int numMismatchesGaps(SAMRecord r,char[] ref) throws RuntimeException {
|
||||
|
||||
// NM currently stores the total number of mismatches in all blocks + 1
|
||||
int errs = numMismatches(r,refseq);
|
||||
int errs = numMismatches(r,ref);
|
||||
|
||||
// now we have to add the total length of all indels:
|
||||
Cigar c = r.getCigar();
|
||||
|
|
@ -209,7 +211,7 @@ public class IndelInspector extends ReadWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
/** This method is a HACK: it is designed to work around the current bug in NM tags created at CRD */
|
||||
private static int numMismatches(SAMRecord r, ReferenceSequence refseq) throws RuntimeException {
|
||||
private static int numMismatches(SAMRecord r, char[] refseq) throws RuntimeException {
|
||||
|
||||
// NM currently stores the total number of mismatches in all blocks + 1
|
||||
Integer i = (Integer)r.getAttribute("NM");
|
||||
|
|
|
|||
|
|
@ -24,18 +24,21 @@ public class IndelIntervalWalker extends ReadWalker<IndelIntervalWalker.Interval
|
|||
|
||||
public void initialize() {}
|
||||
|
||||
public boolean filter(LocusContext context, SAMRecord read) {
|
||||
public boolean filter(char[] ref, SAMRecord read) {
|
||||
return ( !read.getReadUnmappedFlag() && // mapped
|
||||
read.getReadLength() <= maxReadLength && // not too big
|
||||
read.getAlignmentBlocks().size() > 1); // indel
|
||||
}
|
||||
|
||||
public Interval map(LocusContext context, SAMRecord read) {
|
||||
public Interval map(char[] ref, SAMRecord read) {
|
||||
List<AlignmentBlock> blocks = read.getAlignmentBlocks();
|
||||
long indelLeftEdge = read.getAlignmentStart() + blocks.get(0).getLength() - 1;
|
||||
long indelRightEdge = read.getAlignmentEnd() - blocks.get(blocks.size()-1).getLength() + 1;
|
||||
GenomeLoc indelLoc = new GenomeLoc(context.getLocation().getContigIndex(), indelLeftEdge, indelRightEdge);
|
||||
return new Interval(context.getLocation(), indelLoc);
|
||||
|
||||
GenomeLoc indelLoc = new GenomeLoc(read.getReferenceIndex(), indelLeftEdge, indelRightEdge);
|
||||
GenomeLoc refLoc = new GenomeLoc(read.getReferenceIndex(), read.getAlignmentStart(), read.getAlignmentEnd());
|
||||
|
||||
return new Interval(refLoc, indelLoc);
|
||||
}
|
||||
|
||||
public Interval reduceInit() {
|
||||
|
|
|
|||
|
|
@ -90,7 +90,7 @@ public class LogisticRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWr
|
|||
}
|
||||
|
||||
|
||||
public SAMRecord map(LocusContext context, SAMRecord read) {
|
||||
public SAMRecord map(char[] ref, SAMRecord read) {
|
||||
SAMRecord recalRead = read;
|
||||
byte[] bases = read.getReadBases();
|
||||
byte[] quals = read.getBaseQualities();
|
||||
|
|
|
|||
|
|
@ -13,20 +13,18 @@ import java.util.List;
|
|||
|
||||
@WalkerName("CountMismatches")
|
||||
public class MismatchCounterWalker extends ReadWalker<Integer, Integer> {
|
||||
public Integer map(LocusContext context, SAMRecord read) {
|
||||
public Integer map(char[] ref, SAMRecord read) {
|
||||
int nMismatches = 0;
|
||||
|
||||
ReferenceSequence refseq = context.getReferenceContig();
|
||||
|
||||
int start = read.getAlignmentStart()-1;
|
||||
int stop = read.getAlignmentEnd();
|
||||
// sometimes BWA outputs screwy reads
|
||||
if ( stop >= context.getReferenceContig().getBases().length )
|
||||
if ( stop - start > ref.length )
|
||||
return 0;
|
||||
|
||||
if ( read.getAlignmentBlocks().size() == 1 ) {
|
||||
// No indels
|
||||
List<Byte> refSeq = Utils.subseq(context.getReferenceContig().getBases(), start, stop);
|
||||
List<Byte> refSeq = Utils.subseq(ref);
|
||||
List<Byte> readBases = Utils.subseq(read.getReadBases());
|
||||
|
||||
assert(refSeq.size() == readBases.size());
|
||||
|
|
|
|||
|
|
@ -19,12 +19,12 @@ public class MismatchHistoWalker extends ReadWalker<Integer, Integer> {
|
|||
protected final int MAX_TARGET_EDIT_DISTANCE = 10;
|
||||
|
||||
// Do we actually want to operate on the context?
|
||||
public boolean filter(LocusContext context, SAMRecord read) {
|
||||
public boolean filter(char[] ref, SAMRecord read) {
|
||||
// we only want aligned reads
|
||||
return !read.getReadUnmappedFlag();
|
||||
}
|
||||
|
||||
public Integer map(LocusContext context, SAMRecord read) {
|
||||
public Integer map(char[] ref, SAMRecord read) {
|
||||
|
||||
int editDist = Integer.parseInt(read.getAttribute("NM").toString());
|
||||
|
||||
|
|
@ -33,20 +33,18 @@ public class MismatchHistoWalker extends ReadWalker<Integer, Integer> {
|
|||
editDist >= MIN_TARGET_EDIT_DISTANCE &&
|
||||
editDist <= MAX_TARGET_EDIT_DISTANCE ) {
|
||||
|
||||
ReferenceSequence refseq = context.getReferenceContig();
|
||||
|
||||
int start = read.getAlignmentStart()-1;
|
||||
int stop = read.getAlignmentEnd();
|
||||
// sometimes BWA outputs screwy reads
|
||||
if ( stop >= context.getReferenceContig().getBases().length )
|
||||
if ( stop - start > ref.length )
|
||||
return 0;
|
||||
|
||||
List<Byte> refSeq = Utils.subseq(context.getReferenceContig().getBases(), start, stop);
|
||||
List<Byte> refSeq = Utils.subseq(ref);
|
||||
List<Byte> readBases = Utils.subseq(read.getReadBases());
|
||||
assert(refSeq.size() == readBases.size());
|
||||
|
||||
// it's actually faster to reallocate a resized array than to use ArrayLists...
|
||||
if ( refSeq.size() > mismatchCounts.length ) {
|
||||
if ( ref.length > mismatchCounts.length ) {
|
||||
int oldLength = mismatchCounts.length;
|
||||
mismatchCounts = (long[])resizeArray(mismatchCounts, refSeq.size());
|
||||
for ( int i = oldLength; i < refSeq.size(); i++ )
|
||||
|
|
|
|||
|
|
@ -27,12 +27,11 @@ public class ReadErrorRateWalker extends ReadWalker<boolean[], int[]> {
|
|||
/**
|
||||
* Ignore reads with indels or clipping
|
||||
*
|
||||
* @param context the reference context
|
||||
* @param read the read to assess
|
||||
* @return true if the read can be processed, false if it should be ignored
|
||||
*/
|
||||
public boolean filter(LocusContext context, SAMRecord read) {
|
||||
return (read.getCigar().numCigarElements() == 1 && read.getAlignmentStart() + read.getReadLength() < context.getReferenceContig().length());
|
||||
public boolean filter(char[] ref, SAMRecord read) {
|
||||
return (read.getCigar().numCigarElements() == 1 && read.getReadLength() > ref.length);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -41,17 +40,15 @@ public class ReadErrorRateWalker extends ReadWalker<boolean[], int[]> {
|
|||
* of this array is always "true" so that we can figure out how many reads we
|
||||
* processed in a thread-safe manner.
|
||||
*
|
||||
* @param context the reference context
|
||||
* @param read the read to assess
|
||||
* @return An array of length (read_length + 1) indicating where the mismatches occur.
|
||||
* Last element is for internal use so the reduce() function can figure out how
|
||||
* many reads we processed.
|
||||
*/
|
||||
public boolean[] map(LocusContext context, SAMRecord read) {
|
||||
public boolean[] map(char[] ref, SAMRecord read) {
|
||||
boolean[] errorsPerCycle = new boolean[read.getReadLength() + 1];
|
||||
|
||||
byte[] bases = read.getReadBases();
|
||||
byte[] contig = context.getReferenceContig().getBases();
|
||||
byte[] sq = (byte[]) read.getAttribute("SQ");
|
||||
|
||||
if (printVisualHits) {
|
||||
|
|
@ -61,16 +58,16 @@ public class ReadErrorRateWalker extends ReadWalker<boolean[], int[]> {
|
|||
}
|
||||
System.out.println();
|
||||
|
||||
for (int cycle = 0, offset = (int) context.getPosition() - 1; cycle < bases.length; cycle++, offset++) {
|
||||
byte compBase = convertIUPACBaseToSimpleBase(contig[offset]);
|
||||
for (int cycle = 0; cycle < bases.length; cycle++) {
|
||||
byte compBase = convertIUPACBaseToSimpleBase((byte)ref[cycle]);
|
||||
|
||||
System.out.print((char) compBase);
|
||||
}
|
||||
System.out.println("\n");
|
||||
}
|
||||
|
||||
for (int cycle = 0, offset = (int) context.getPosition() - 1; cycle < bases.length; cycle++, offset++) {
|
||||
byte compBase = convertIUPACBaseToSimpleBase(contig[offset]);
|
||||
for (int cycle = 0; cycle < bases.length; cycle++) {
|
||||
byte compBase = convertIUPACBaseToSimpleBase((byte)ref[cycle]);
|
||||
|
||||
if (compBase != '.') {
|
||||
if (useNextBestBase || useNextRandomBase) {
|
||||
|
|
|
|||
|
|
@ -23,12 +23,12 @@ public class ReadFilterWalker extends ReadWalker<Integer,Integer> {
|
|||
}
|
||||
|
||||
@Override
|
||||
public boolean filter(LocusContext context, SAMRecord read) {
|
||||
public boolean filter(char[] ref, SAMRecord read) {
|
||||
return read.getReadLength() <= max_len;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer map(LocusContext context, SAMRecord read) {
|
||||
public Integer map(char[] ref, SAMRecord read) {
|
||||
writer.addAlignment(read);
|
||||
return 1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ public class SAMToFastqAndSqWalker extends ReadWalker<Integer, Integer> {
|
|||
|
||||
private boolean ready = false;
|
||||
|
||||
public Integer map(LocusContext context, SAMRecord read) {
|
||||
public Integer map(char[] ref, SAMRecord read) {
|
||||
if (!ready) {
|
||||
try {
|
||||
fastqbw = new PrintWriter(FASTQFILE);
|
||||
|
|
|
|||
|
|
@ -50,6 +50,39 @@ public class AlignmentUtils {
|
|||
return mm_count;
|
||||
}
|
||||
|
||||
/**
|
||||
* mhanna - 11 May 2009 - stubbed out competing method that works with partial references.
|
||||
* Computes number of mismatches in the read alignment to the refence <code>ref</code>
|
||||
* specified in the record <code>r</code>. Indels are completely <i>ignored</i> by this method:
|
||||
* only base mismatches in the alignment segments where both sequences are present are counted.
|
||||
* @param r
|
||||
* @return
|
||||
*/
|
||||
public static int numMismatches(SAMRecord r, char[] ref) {
|
||||
if ( r.getReadUnmappedFlag() ) return 1000000;
|
||||
int i_ref = 0; // position on the ref
|
||||
int i_read = 0; // position on the read
|
||||
int mm_count = 0; // number of mismatches
|
||||
Cigar c = r.getCigar();
|
||||
for ( int k = 0 ; k < c.numCigarElements() ; k++ ) {
|
||||
CigarElement ce = c.getCigarElement(k);
|
||||
switch( ce.getOperator() ) {
|
||||
case M:
|
||||
for ( int l = 0 ; l < ce.getLength() ; l++, i_ref++, i_read++ ) {
|
||||
if ( Character.toUpperCase(r.getReadString().charAt(i_read) ) == 'N' ) continue; // do not count N's ?
|
||||
if ( Character.toUpperCase(r.getReadString().charAt(i_read) ) !=
|
||||
Character.toUpperCase(ref[i_ref]) ) mm_count++;
|
||||
}
|
||||
break;
|
||||
case I: i_read += ce.getLength(); break;
|
||||
case D: i_ref += ce.getLength(); break;
|
||||
default: throw new RuntimeException("Unrecognized cigar element");
|
||||
}
|
||||
|
||||
}
|
||||
return mm_count;
|
||||
}
|
||||
|
||||
// IMPORTANT NOTE: ALTHOUGH THIS METHOD IS EXTREMELY SIMILAR TO THE ONE ABOVE, WE NEED
|
||||
// TWO SEPARATE IMPLEMENTATIONS IN ORDER TO PREVENT JAVA STRINGS FROM FORCING US TO
|
||||
// PERFORM EXPENSIVE ARRAY COPYING WHEN TRYING TO GET A BYTE ARRAY...
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
import edu.mit.broad.picard.reference.ReferenceSequenceFile;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -119,14 +120,20 @@ public class Utils {
|
|||
return c;
|
||||
}
|
||||
|
||||
public static ArrayList<Byte> subseq(char[] fullArray) {
|
||||
byte[] fullByteArray = new byte[fullArray.length];
|
||||
StringUtil.charsToBytes(fullArray,0,fullArray.length,fullByteArray,0);
|
||||
return subseq(fullByteArray);
|
||||
}
|
||||
|
||||
public static ArrayList<Byte> subseq(byte[] fullArray) {
|
||||
return subseq(fullArray, 0, fullArray.length);
|
||||
return subseq(fullArray, 0, fullArray.length-1);
|
||||
}
|
||||
|
||||
public static ArrayList<Byte> subseq(byte[] fullArray, int start, int end) {
|
||||
assert end < fullArray.length;
|
||||
ArrayList<Byte> dest = new ArrayList<Byte>(end - start + 1);
|
||||
for (int i = start; i < end; i++) {
|
||||
for (int i = start; i <= end; i++) {
|
||||
dest.add(fullArray[i]);
|
||||
}
|
||||
return dest;
|
||||
|
|
|
|||
Loading…
Reference in New Issue