adding the initial ROD for Reads support; more convenience methods in ReadMetaDataTracker to come.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2918 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-03-03 15:56:44 +00:00
parent 0e9a6826b0
commit 790d2a7776
45 changed files with 854 additions and 44 deletions

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.alignment;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.StingException;
@ -58,7 +59,7 @@ public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
* @return Number of reads aligned by this map (aka 1).
*/
@Override
public Integer map(char[] ref, SAMRecord read) {
public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
//logger.info(String.format("examining read %s", read.getReadName()));
byte[] bases = read.getReadBases();

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.alignment;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.WalkerName;
@ -83,7 +84,7 @@ public class AlignmentWalker extends ReadWalker<Integer,Integer> {
* @return Number of alignments found for this read.
*/
@Override
public Integer map(char[] ref, SAMRecord read) {
public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
SAMRecord alignedRead = aligner.align(read,header);
if (outputBam != null) {
outputBam.addAlignment(alignedRead);

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.alignment;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.alignment.bwa.BWTFiles;
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
@ -49,7 +50,7 @@ public class CountBestAlignmentsWalker extends ReadWalker<Integer,Integer> {
* @return Number of alignments found for this read.
*/
@Override
public Integer map(char[] ref, SAMRecord read) {
public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
Iterator<Alignment[]> alignmentIterator = aligner.getAllAlignments(read.getReadBases()).iterator();
if(alignmentIterator.hasNext()) {
int numAlignments = alignmentIterator.next().length;

View File

@ -0,0 +1,181 @@
/*
* Copyright (c) 2010. The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.providers;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.*;
/** a ROD view for reads. This provides the Read traversals a way of getting a ReadMetaDataTracker */
public class ReadBasedReferenceOrderedView implements View {
private final WindowedData window;
public ReadBasedReferenceOrderedView(ShardDataProvider provider) {
window = new WindowedData(provider);
provider.register(this);
}
/**
* for testing only please
*
* @param data the window provider
*/
ReadBasedReferenceOrderedView(WindowedData data) {
window = data;
}
public ReadMetaDataTracker getReferenceOrderedDataForRead(SAMRecord read) {
return window.getTracker(read);
}
public Collection<Class<? extends View>> getConflictingViews() {
List<Class<? extends View>> classes = new ArrayList<Class<? extends View>>();
classes.add(ManagingReferenceOrderedView.class);
return classes;
}
public void close() {
if (window != null) window.close();
}
}
/** stores a window of data, dropping RODs if we've passed the new reads start point. */
class WindowedData {
// the queue of possibly in-frame RODs; RODs are dropped removed as soon as they are out of scope
private final TreeMap<Long, Set<ReferenceOrderedDatum>> mapping = new TreeMap<Long, Set<ReferenceOrderedDatum>>();
// our current location from the last read we processed
private GenomeLoc currentLoc;
// a list of the RMDDataState (location->iterators)
private List<RMDDataState> states;
// the provider; where we get all our information
private final ShardDataProvider provider;
/**
* create a WindowedData given a shard provider
*
* @param provider the ShardDataProvider
*/
public WindowedData(ShardDataProvider provider) {
this.provider = provider;
}
/**
* load the states dynamically, since the only way to get a genome loc is from the read (the shard doesn't have one)
*
* @param provider the ShardDataProvider
* @param rec the current read
*/
private void getStates(ShardDataProvider provider, SAMRecord rec) {
states = new ArrayList<RMDDataState>();
if (provider != null && provider.getReferenceOrderedData() != null)
for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData())
states.add(new RMDDataState(dataSource, ((FlashBackIterator) dataSource.seek(GenomeLocParser.createGenomeLoc(rec.getReferenceIndex(), rec.getAlignmentStart())))));
}
/**
* this function is for testing only
*
* @param states a list of RMDDataState to initialize with
*/
WindowedData(List<RMDDataState> states) {
this.states = states;
provider = null;
}
/**
* create a ReadMetaDataTracker given the current read
*
* @param rec the read
*
* @return a ReadMetaDataTracker for the read, from which you can get ROD -> read alignments
*/
public ReadMetaDataTracker getTracker(SAMRecord rec) {
updatePosition(rec);
return new ReadMetaDataTracker(rec, mapping);
}
/**
* update the position we're storing
*
* @param rec the read to use for start and end
*/
private void updatePosition(SAMRecord rec) {
if (states == null) getStates(this.provider, rec);
currentLoc = GenomeLocParser.createGenomeLoc(rec);
// flush the queue looking for records we've passed over
while (mapping.size() > 0 && mapping.firstKey() < currentLoc.getStart())
mapping.pollFirstEntry(); // toss away records that we've passed
// add new data to the queue
for (RMDDataState state : states) {
// move into position
while (state.iterator.hasNext() && state.iterator.peekNextLocation().isBefore(currentLoc))
state.iterator.next();
while (state.iterator.hasNext() && state.iterator.peekNextLocation().overlapsP(currentLoc)) {
RODRecordList list = state.iterator.next();
for (ReferenceOrderedDatum datum : list) {
if (!mapping.containsKey(list.getLocation().getStart()))
mapping.put(list.getLocation().getStart(), new HashSet<ReferenceOrderedDatum>());
mapping.get(list.getLocation().getStart()).add(datum);
}
}
}
}
/** Closes the current view. */
public void close() {
if (states == null) return;
for (RMDDataState state : states)
if (state.iterator instanceof FlashBackIterator) state.dataSource.close((FlashBackIterator) state.iterator);
// Clear out the existing data so that post-close() accesses to this data will fail-fast.
states = null;
}
}
/** Models the traversal state of a given ROD lane. */
class RMDDataState {
public final ReferenceOrderedDataSource dataSource;
public final LocationAwareSeekableRODIterator iterator;
public RMDDataState(ReferenceOrderedDataSource dataSource, LocationAwareSeekableRODIterator iterator) {
this.dataSource = dataSource;
this.iterator = iterator;
}
}

View File

@ -72,6 +72,20 @@ public class ReferenceOrderedDataSource implements SimpleDataSource {
return RODIterator;
}
/**
* Seek to the specified position and return an iterator through the data.
*
* @param loc GenomeLoc that points to the selected position.
*
* @return Iterator through the data.
*/
public Iterator seek(GenomeLoc loc) {
DataStreamSegment dataStreamSegment = new MappedStreamSegment(loc);
FlashBackIterator RODIterator = iteratorPool.iterator(dataStreamSegment);
return RODIterator;
}
/**
* Close the specified iterator, returning it to the pool.
* @param iterator Iterator to close.
@ -116,10 +130,10 @@ class ReferenceOrderedDataPool extends ResourcePool<FlashBackIterator, FlashBack
if( (RODIterator.position() == null && RODIterator.hasNext()) ||
(RODIterator.position() != null && RODIterator.position().isBefore(position)) )
return RODIterator;
// if (RODIterator.position() != null && RODIterator.canFlashBackTo(position)) {
// RODIterator.flashBackTo(position);
// return RODIterator;
// }
if (RODIterator.position() != null && RODIterator.canFlashBackTo(position)) {
RODIterator.flashBackTo(position);
return RODIterator;
}
}
return null;

View File

@ -0,0 +1,46 @@
/*
* Copyright (c) 2010. The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.iterators;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.Iterator;
import java.util.List;
/**
* @author aaron
* <p/>
* Class PeekableRODIterator
* <p/>
* the methods attached to a peekable ROD iterator
*/
public interface PeekableRODIterator extends Iterator<List<ReferenceOrderedDatum>> {
public GenomeLoc peekNextLocation();
public RODRecordList seekForward(GenomeLoc interval);
}

View File

@ -0,0 +1,110 @@
/*
* Copyright (c) 2010. The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.*;
/**
* @author aaron
* <p/>
* Class ReadMetaDataTracker
* <p/>
* a read-based meta data tracker
*/
public class ReadMetaDataTracker {
private final SAMRecord record;
private final TreeMap<Long, Set<ReferenceOrderedDatum>> mapping;
/**
* create a read meta data tracker, given the read and a queue of RODatum positions
*
* @param record the read to create offset from
* @param mapping the mapping of reference ordered datum
*/
public ReadMetaDataTracker(SAMRecord record, TreeMap<Long, Set<ReferenceOrderedDatum>> mapping) {
this.record = record;
this.mapping = mapping;
}
/**
* create an alignment of read position to reference ordered datum
*
* @param record the SAMRecord
* @param queue the queue (as a tree set)
* @param cl the class name, null if not filtered by classname
* @param name the datum track name, null if not filtered by name
*
* @return a mapping from the position in the read to the reference ordered datum
*/
private Map<Integer, Set<ReferenceOrderedDatum>> createReadAlignment(SAMRecord record, TreeMap<Long, Set<ReferenceOrderedDatum>> queue, Class cl, String name) {
Map<Integer, Set<ReferenceOrderedDatum>> ret = new LinkedHashMap<Integer, Set<ReferenceOrderedDatum>>();
GenomeLoc location = GenomeLocParser.createGenomeLoc(record);
int length = record.getReadLength();
for (Long loc : queue.keySet()) {
//if (location.containsP(loc)) {
long position = loc - location.getStart();
if (position >= 0 && position < length) ret.put((int)(position),queue.get(loc));
}
return ret;
}
/**
* create an alignment of read position to reference ordered datum
*
* @return a mapping from the position in the read to the reference ordered datum
*/
private Map<Long, Set<ReferenceOrderedDatum>> createGenomeLocAlignment(SAMRecord record, TreeMap<Long, Set<ReferenceOrderedDatum>> mapping, Class cl, String name) {
Map<Long, Set<ReferenceOrderedDatum>> ret = new LinkedHashMap<Long, Set<ReferenceOrderedDatum>>();
int start = record.getAlignmentStart();
int stop = record.getAlignmentEnd();
for (Long location : mapping.keySet()) {
if (location >= start && location <= stop) ret.put(location,mapping.get(location));
}
return ret;
}
/**
* get the position mapping, from read offset to ROD
*
* @return a mapping of read offset to ROD(s)
*/
public Map<Integer, Set<ReferenceOrderedDatum>> getPositionMapping() {
return createReadAlignment(record, mapping, null, null);
}
/**
* get the position mapping, from read offset to ROD
*
* @return a mapping of genome loc position to ROD(s)
*/
public Map<Long, Set<ReferenceOrderedDatum>> getGenomeLocMapping() {
return createGenomeLocAlignment(record, mapping, null, null);
}
}

View File

@ -1,3 +1,26 @@
/*
* Copyright (c) 2010. The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata.utils;
import org.broadinstitute.sting.utils.GenomeLoc;

View File

@ -1,3 +1,26 @@
/*
* Copyright (c) 2010. The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata.utils;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;

View File

@ -3,9 +3,11 @@ package org.broadinstitute.sting.gatk.traversals;
import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.WalkerManager;
import org.broadinstitute.sting.gatk.datasources.providers.ReadBasedReferenceOrderedView;
import org.broadinstitute.sting.gatk.datasources.providers.ReadReferenceView;
import org.broadinstitute.sting.gatk.datasources.providers.ReadView;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Walker;
@ -80,6 +82,9 @@ public class TraverseReads extends TraversalEngine {
ReadView reads = new ReadView(dataProvider);
ReadReferenceView reference = new ReadReferenceView(dataProvider);
// get the reference ordered data
ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider);
// while we still have more reads
for (SAMRecord read : reads) {
// an array of characters that represent the reference
@ -92,9 +97,12 @@ public class TraverseReads extends TraversalEngine {
// update the number of reads we've seen
TraversalStatistics.nRecords++;
// if the read is mapped, create a metadata tracker
ReadMetaDataTracker tracker = (read.getReferenceIndex() >= 0) ? rodView.getReferenceOrderedDataForRead(read) : null;
final boolean keepMeP = readWalker.filter(refSeq, read);
if (keepMeP) {
M x = readWalker.map(refSeq, read);
M x = readWalker.map(refSeq, read, tracker); // the tracker can be null
sum = readWalker.reduce(x, sum);
}

View File

@ -4,6 +4,7 @@ import net.sf.samtools.*;
import net.sf.picard.reference.ReferenceSequenceFileFactory;
import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.picard.reference.ReferenceSequence;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.Pair;
@ -141,7 +142,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipper, Cli
* @param read the read itself, as a SAMRecord
* @return the ReadClipper object describing what should be done to clip this read
*/
public ReadClipper map(char[] ref, SAMRecord read) {
public ReadClipper map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
if ( onlyDoRead == null || read.getReadName().equals(onlyDoRead) ) {
ReadClipper clipper = new ReadClipper(read);

View File

@ -13,6 +13,7 @@ package org.broadinstitute.sting.gatk.walkers;
*/
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMRecord;
@ -36,7 +37,7 @@ public class FixBAMSortOrderTag extends ReadWalker<SAMRecord, SAMFileWriter> {
public SAMFileHeader.SortOrder SORT_ORDER=SAMFileHeader.SortOrder.coordinate;
@Override
public SAMRecord map(char[] ref, SAMRecord read) {
public SAMRecord map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
return read;
}

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.walkers;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import java.text.DecimalFormat;
import java.text.NumberFormat;
@ -112,7 +113,7 @@ public class FlagStatWalker extends ReadWalker<Integer, Integer> {
private FlagStat myStat = new FlagStat();
public Integer map( char[] ref, SAMRecord read ) {
public Integer map( char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) {
myStat.readCount++;
if (read.getReadFailsVendorQualityCheckFlag()) {
myStat.QC_failure++;

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.utils.cmdLine.Argument;
/*
@ -90,7 +91,7 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
* @param read the read itself, as a SAMRecord
* @return the read itself
*/
public SAMRecord map( char[] ref, SAMRecord read ) {
public SAMRecord map( char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) {
return read;
}

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.walkers;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
/**
* Created by IntelliJ IDEA.
@ -23,5 +24,5 @@ public abstract class ReadWalker<MapType, ReduceType> extends Walker<MapType, Re
}
// Map over the org.broadinstitute.sting.gatk.contexts.AlignmentContext
public abstract MapType map(char[] ref, SAMRecord read);
public abstract MapType map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker);
}

View File

@ -26,6 +26,7 @@
package org.broadinstitute.sting.gatk.walkers;
import net.sf.samtools.*;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.*;
import org.apache.log4j.Logger;
@ -52,7 +53,7 @@ public class SplitSamFileWalker extends ReadWalker<SAMRecord, Map<String, SAMFil
logger.info("SplitSamFile version: " + VERSION);
}
public SAMRecord map(char[] ref, SAMRecord read) {
public SAMRecord map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
return read;
}

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.coverage;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.cmdLine.Argument;
@ -28,7 +29,7 @@ public class CoarseCoverageWalker extends ReadWalker<Integer,Integer> {
}
@Override
public Integer map(char[] ref, SAMRecord read) {
public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
if ( read.getReadUnmappedFlag() ||
read.getDuplicateReadFlag() ||

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.fasta;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.WalkerName;
import org.broadinstitute.sting.utils.BaseUtils;
@ -33,7 +34,7 @@ public class BamToFastqWalker extends ReadWalker<Integer, Integer> {
}
}
public Integer map(char[] ref, SAMRecord read) {
public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
out.println("@" + read.getReadName());
if ( !RE_REVERSE || !read.getReadNegativeStrandFlag() ) {
out.println(read.getReadString());

View File

@ -148,7 +148,7 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
@Override
public Integer map(char[] ref, SAMRecord read) {
public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
// if ( read.getReadName().equals("428EFAAXX090610:2:36:1384:639#0") ) System.out.println("GOT READ");

View File

@ -226,7 +226,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
}
}
public Integer map(char[] ref, SAMRecord read) {
public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
if ( currentInterval == null ) {
emit(read);
return 0;
@ -278,7 +278,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
} while ( currentInterval != null && currentInterval.isBefore(readLoc) );
// call back into map now that the state has been updated
map(ref, read);
map(ref, read,metaDataTracker);
}
return 0;

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.qc;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
@ -12,7 +13,7 @@ import org.broadinstitute.sting.gatk.walkers.ReadWalker;
*/
@Requires({DataSource.READS, DataSource.REFERENCE})
public class CountReadsWalker extends ReadWalker<Integer, Integer> {
public Integer map(char[] ref, SAMRecord read) {
public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker tracker) {
//System.out.println(read.format());
return 1;
}

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.qc;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.StingException;
import net.sf.samtools.SAMRecord;
@ -76,7 +77,7 @@ public class ReadValidationWalker extends ReadWalker<SAMRecord, SAMRecord> {
* @param read the read itself, as a SAMRecord
* @return the read itself
*/
public SAMRecord map( char[] ref, SAMRecord read ) {
public SAMRecord map( char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) {
return read;
}

View File

@ -18,6 +18,7 @@ import net.sf.samtools.util.SequenceUtil;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Requires;
@ -314,7 +315,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
* @param read The read to be recalibrated
* @return The read with quality scores replaced
*/
public SAMRecord map( char[] refBases, SAMRecord read ) {
public SAMRecord map( char[] refBases, SAMRecord read, ReadMetaDataTracker metaDataTracker ) {
RecalDataManager.parseSAMRecord( read, RAC );

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.oneoffprojects.walkers;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.WalkerName;
@ -27,7 +28,7 @@ public class AlignedReadsHistoWalker extends ReadWalker<Integer, Integer> {
return !read.getReadUnmappedFlag();
}
public Integer map(char[] ref, SAMRecord read) {
public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
//System.out.println(read.getAttribute("NM"));
int editDist = Integer.parseInt(read.getAttribute("NM").toString());
if (editDist <= 50)

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.oneoffprojects.walkers;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.*;
@ -326,7 +327,7 @@ public class DSBWalkerV3 extends ReadWalker<Integer,Integer> {
}
public Integer map(char[] ref, SAMRecord read) {
public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
if ( AlignmentUtils.isReadUnmapped(read) ) return 0;

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.oneoffprojects.walkers;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.Utils;
@ -37,7 +38,7 @@ public class IOCrusherWalker extends ReadWalker<SAMRecord, ArrayList<SAMFileWrit
/**
*
*/
public SAMRecord map(char[] ref, SAMRecord read) {
public SAMRecord map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
nReadsRead++;
return read;
}

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.oneoffprojects.walkers;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.WalkerName;
import org.broadinstitute.sting.utils.Utils;
@ -9,7 +10,7 @@ import java.util.List;
@WalkerName("CountMismatches")
public class MismatchCounterWalker extends ReadWalker<Integer, Integer> {
public Integer map(char[] ref, SAMRecord read) {
public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
int nMismatches = 0;
int start = read.getAlignmentStart()-1;

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.oneoffprojects.walkers;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.WalkerName;
import org.broadinstitute.sting.utils.Utils;
@ -21,7 +22,7 @@ public class MismatchHistoWalker extends ReadWalker<Integer, Integer> {
return !read.getReadUnmappedFlag();
}
public Integer map(char[] ref, SAMRecord read) {
public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
int editDist = Integer.parseInt(read.getAttribute("NM").toString());

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.oneoffprojects.walkers;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.QualityUtils;
@ -35,7 +36,7 @@ public class PairedQualityScoreCountsWalker extends ReadWalker<Pair<byte[],Boole
return reduceCounts;
}
public Pair<byte[],Boolean> map( char[] ref, SAMRecord read) {
public Pair<byte[],Boolean> map( char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
if ( canUseRead(read) ) {
return getCorrectlyOrientedBaseQualities(read);
} else {

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.oneoffprojects.walkers;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.QualityUtils;
@ -46,7 +47,7 @@ public class ReadErrorRateWalker extends ReadWalker<boolean[], ReadErrorRateColl
* Last element is for internal use so the reduce() function can figure out how
* many reads we processed.
*/
public boolean[] map(char[] ref, SAMRecord read) {
public boolean[] map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
boolean[] errorsPerCycle = new boolean[read.getReadLength() + 1];
byte[] bases = read.getReadBases();

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.oneoffprojects.walkers;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.GenomeLoc;
@ -69,7 +70,7 @@ public class ReadQualityScoreWalker extends ReadWalker<SAMRecord, SAMFileWriter>
protected static BufferedReader inputReader = null;
protected static String line = null;
public SAMRecord map( char[] ref, SAMRecord read ) {
public SAMRecord map( char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) {
return read; // all the work is done in the reduce step for this walker
}

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.oneoffprojects.walkers;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.Pair;
@ -64,7 +65,7 @@ public class ReplaceQuals extends ReadWalker<SAMRecord, SAMFileWriter> {
/**
*
*/
public SAMRecord map(char[] ref, SAMRecord read) {
public SAMRecord map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
final String name = read.getReadName();
if ( readNameToPairs.containsKey(name) ) {

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.oneoffprojects.walkers;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.alignment.bwa.BWAAligner;
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
@ -106,7 +107,7 @@ public class TestReadFishingWalker extends ReadWalker<Integer,Long> {
}
@Override
public Integer map(char[] ref, SAMRecord read) {
public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
Alignment bestAlignment = aligner.getBestAlignment(read.getReadBases());
System.out.println("bestAlignment = " + bestAlignment);
return 1;

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.playground.gatk.walkers.HLAcaller;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.cmdLine.Argument;
@ -76,7 +77,7 @@ public class CalculateAlleleLikelihoodsWalker extends ReadWalker<Integer, Intege
return 0;
}
public Integer map(char[] ref, SAMRecord read) {
public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
HLAnamesAL.add(read.getReadName());
HLAreadsAL.add(formatter.FormatRead(read.getCigarString(), read.getReadString()));
HLAstartposAL.add(read.getAlignmentStart());

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.playground.gatk.walkers.HLAcaller;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.utils.cmdLine.Argument;
@ -154,7 +155,7 @@ public class CalculatePhaseLikelihoodsWalker extends ReadWalker<Integer, Integer
return isWithinInterval;
}
public Integer map(char[] ref, SAMRecord read) {
public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
if (!ReadsToDiscard.contains(read.getReadName())){
UpdateCorrelation(read);
}else{

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.playground.gatk.walkers.HLAcaller;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import java.util.Hashtable;
/**
@ -36,7 +37,7 @@ public class CreateHaplotypesWalker extends ReadWalker<Integer, Integer> {
public Integer map(char[] ref, SAMRecord read) {
public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
int readstart = read.getAlignmentStart();
int readstop = read.getAlignmentEnd();

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.playground.gatk.walkers.HLAcaller;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.cmdLine.Argument;
@ -177,7 +178,7 @@ public class CreatePedFileWalker extends ReadWalker<Integer, Integer> {
}
}
public Integer map(char[] ref, SAMRecord read) {
public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
HLAnamesAL.add(read.getReadName());
HLAreadsAL.add(formatter.FormatRead(read.getCigarString(), read.getReadString()));
HLAstartposAL.add(read.getAlignmentStart());

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.playground.gatk.walkers.HLAcaller;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.cmdLine.Argument;
@ -231,7 +232,7 @@ public class FindClosestAlleleWalker extends ReadWalker<Integer, Integer> {
return maxFreq;
}
public Integer map(char[] ref, SAMRecord read) {
public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
//Calculate concordance for this read and all overlapping reads
double maxConcordance = CalculateConcordance(read);

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.playground.gatk.walkers.HLAcaller;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import java.io.FileInputStream;
@ -134,7 +135,7 @@ public class ImputeAllelesWalker extends ReadWalker<Integer, Integer> {
}
public Integer map(char[] ref, SAMRecord read) {
public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
int readstart = read.getAlignmentStart();
int readstop = read.getAlignmentEnd();
int startimputation = 0, stopimputation = 0;

View File

@ -166,7 +166,7 @@ public class GraphReferenceAssessor extends ReadWalker<Integer, Integer> {
return minNMM;
}
public Integer map(char[] refArg, SAMRecord read) {
public Integer map(char[] refArg, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
if ( MAXREADS-- == 0 ) {
System.exit(0);

View File

@ -112,7 +112,7 @@ public class ArtificialReadsTraversal extends TraversalEngine {
final boolean keepMeP = readWalker.filter(refSeq, read);
if (keepMeP) {
M x = readWalker.map(refSeq, read);
M x = readWalker.map(refSeq, read,null); // TODO: fix me at some point, it would be nice to fake out ROD data too
sum = readWalker.reduce(x, sum);
}

View File

@ -154,6 +154,11 @@ public class ArtificialSAMUtils {
elements.add(new CigarElement(length, CigarOperator.characterToEnum('M')));
record.setCigar(new Cigar(elements));
record.setProperPairFlag(false);
// TODO: add to code after checking why it breaks the clean read injector test
/*byte[] c = new byte[length];
for (int x = 0; x < length; x++)
c[x] = 'A'; */
//record.setReadBases(c);
if (refIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
record.setReadUmappedFlag(true);
}
@ -247,4 +252,3 @@ public class ArtificialSAMUtils {
return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, unmappedReadCount, header);
}
}

View File

@ -0,0 +1,182 @@
/*
* Copyright (c) 2010. The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.providers;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTrackerTest;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.junit.Assert;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import java.util.*;
/**
*
* @author aaron
*
* Class ReadBasedReferenceOrderedViewTest
*
* test out the ReadBasedReferenceOrderedView class
*/
public class ReadBasedReferenceOrderedViewTest extends BaseTest {
private static int startingChr = 1;
private static int endingChr = 2;
private static int readCount = 100;
private static int DEFAULT_READ_LENGTH = ArtificialSAMUtils.DEFAULT_READ_LENGTH;
private static SAMFileHeader header;
@BeforeClass
public static void beforeClass() {
header = ArtificialSAMUtils.createArtificialSamHeader((endingChr - startingChr) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary());
}
@Before
public void beforeEach() {
}
@Test
public void testCreateReadMetaDataTrackerOnePerSite() {
// make ten reads,
List<SAMRecord> records = new ArrayList<SAMRecord>();
for (int x = 1; x < 11; x++) {
SAMRecord rec = ArtificialSAMUtils.createArtificialRead(header, "name", 0, x, 10);
byte[] c = new byte[10];
for (int y = 0; y < 10; y++)
c[y] = 'A';
rec.setReadBases(c);
records.add(rec);
}
GenomeLoc start = GenomeLocParser.createGenomeLoc(0,0,0);
List<RMDDataState> list = new ArrayList<RMDDataState>();
list.add(new RMDDataState(null, new FakePeekingRODIterator(start)));
ReadBasedReferenceOrderedView view = new ReadBasedReferenceOrderedView(new WindowedData(list));
for (SAMRecord rec : records) {
ReadMetaDataTracker tracker = view.getReferenceOrderedDataForRead(rec);
Map<Integer, Set<ReferenceOrderedDatum>> map = tracker.getPositionMapping();
for (Integer i : map.keySet()) {
Assert.assertEquals(1,map.get(i).size());
}
Assert.assertEquals(10,map.keySet().size());
}
}
}
class FakePeekingRODIterator implements LocationAwareSeekableRODIterator {
// current location
private GenomeLoc location;
private ReadMetaDataTrackerTest.FakeRODatum curROD;
public FakePeekingRODIterator(GenomeLoc startingLoc) {
this.location = GenomeLocParser.createGenomeLoc(startingLoc.getContigIndex(),startingLoc.getStart()+1,startingLoc.getStop()+1);;
}
@Override
public GenomeLoc peekNextLocation() {
System.err.println("Peek Next -> " + location);
return location;
}
@Override
public GenomeLoc position() {
return location;
}
@Override
public RODRecordList seekForward(GenomeLoc interval) {
while (location.isBefore(interval))
next();
return next(); // we always move by one, we know the next location will be right
}
@Override
public boolean hasNext() {
return true; // we always have next
}
@Override
public RODRecordList next() {
System.err.println("Next -> " + location);
curROD = new ReadMetaDataTrackerTest.FakeRODatum(location);
location = GenomeLocParser.createGenomeLoc(location.getContigIndex(),location.getStart()+1,location.getStop()+1);
FakeRODRecordList list = new FakeRODRecordList();
list.add(curROD);
return list;
}
@Override
public void remove() {
throw new IllegalStateException("GRRR");
}
}
class FakeRODRecordList extends AbstractList<ReferenceOrderedDatum> implements RODRecordList {
private final List<ReferenceOrderedDatum> list = new ArrayList<ReferenceOrderedDatum>();
public boolean add(ReferenceOrderedDatum data) {
return list.add(data);
}
@Override
public ReferenceOrderedDatum get(int i) {
return list.get(i);
}
@Override
public int size() {
return list.size();
}
@Override
public GenomeLoc getLocation() {
return list.get(0).getLocation();
}
@Override
public String getName() {
return "test";
}
@Override
public int compareTo(RODRecordList rodRecordList) {
return this.list.get(0).getLocation().compareTo(rodRecordList.getLocation());
}
}

View File

@ -0,0 +1,189 @@
/*
* Copyright (c) 2010. The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.junit.Assert;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.HashSet;
import java.util.PriorityQueue;
import java.util.Set;
import java.util.TreeMap;
/**
* @author aaron
* <p/>
* Class ReadMetaDataTrackerTest
* <p/>
* test out the ReadMetaDataTracker
*/
public class ReadMetaDataTrackerTest extends BaseTest {
private static int startingChr = 1;
private static int endingChr = 2;
private static int readCount = 100;
private static int DEFAULT_READ_LENGTH = ArtificialSAMUtils.DEFAULT_READ_LENGTH;
private static SAMFileHeader header;
@BeforeClass
public static void beforeClass() {
header = ArtificialSAMUtils.createArtificialSamHeader((endingChr - startingChr) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary());
}
@Before
public void beforeEach() {
}
@Test
public void rodAtEachReadBase() {
ReadMetaDataTracker tracker = getRMDT(1);
// count the positions
int count = 0;
for (int x : tracker.getPositionMapping().keySet()) {
count++;
Assert.assertEquals(1, tracker.getPositionMapping().get(x).size());
}
Assert.assertEquals(10, count);
}
@Test
public void sparceRODsForRead() {
ReadMetaDataTracker tracker = getRMDT(7);
// count the positions
int count = 0;
for (int x : tracker.getPositionMapping().keySet()) {
count++;
Assert.assertEquals(1, tracker.getPositionMapping().get(x).size());
}
Assert.assertEquals(2, count);
}
@Test
public void rodByGenomeLoc() {
ReadMetaDataTracker tracker = getRMDT(1);
// count the positions
int count = 0;
for (Long x : tracker.getGenomeLocMapping().keySet()) {
count++;
Assert.assertEquals(1, tracker.getGenomeLocMapping().get(x).size());
}
Assert.assertEquals(10, count);
}
private ReadMetaDataTracker getRMDT(int incr) {
SAMRecord record = ArtificialSAMUtils.createArtificialRead(header, "name", 0, 1, 10);
byte[] c = new byte[10];
for (int x = 0; x < 10; x++)
c[x] = 'A';
record.setReadBases(c);
TreeMap<Long, Set<ReferenceOrderedDatum>> data = new TreeMap<Long, Set<ReferenceOrderedDatum>>();
for (int x = 0; x < record.getAlignmentEnd(); x+=incr) {
GenomeLoc loc = GenomeLocParser.createGenomeLoc(record.getReferenceIndex(), record.getAlignmentStart() + x, record.getAlignmentStart() + x);
Set<ReferenceOrderedDatum> set = new HashSet<ReferenceOrderedDatum>();
set.add(new FakeRODatum(loc));
data.put((long)record.getAlignmentStart() + x,set);
}
ReadMetaDataTracker tracker = new ReadMetaDataTracker(record, data);
return tracker;
}
/** for testing only */
static public class FakeRODatum implements ReferenceOrderedDatum {
final GenomeLoc location;
public FakeRODatum(GenomeLoc location) {
this.location = location;
}
@Override
public String getName() {
return "false";
}
@Override
public boolean parseLine(Object header, String[] parts) throws IOException {
return false;
}
@Override
public String toSimpleString() {
return "";
}
@Override
public String repl() {
return "";
}
/**
* Used by the ROD system to determine how to split input lines
*
* @return Regex string delimiter separating fields
*/
@Override
public String delimiterRegex() {
return "";
}
@Override
public GenomeLoc getLocation() {
return location;
}
@Override
public int compareTo(ReferenceOrderedDatum that) {
return location.compareTo(that.getLocation());
}
/**
* Backdoor hook to read header, meta-data, etc. associated with the file. Will be
* called by the ROD system before streaming starts
*
* @param source source data file on disk from which this rod stream will be pulled
*
* @return a header object that will be passed to parseLine command
*/
@Override
public Object initialize(File source) throws FileNotFoundException {
return null;
}
}
}

View File

@ -76,7 +76,7 @@ public class PrintReadsWalkerTest extends BaseTest {
public void testNullRead() {
PrintReadsWalker walker = new PrintReadsWalker();
SAMRecord rec = walker.map(bases, null);
SAMRecord rec = walker.map(bases, null, null);
assertTrue(rec == null);
}
@ -86,7 +86,7 @@ public class PrintReadsWalkerTest extends BaseTest {
PrintReadsWalker walker = new PrintReadsWalker();
SAMFileHeader head = ArtificialSAMUtils.createArtificialSamHeader(3,1,1000);
SAMRecord rec = ArtificialSAMUtils.createArtificialRead(head, "FakeRead", 1, 1, 50);
SAMRecord ret = walker.map(bases, rec);
SAMRecord ret = walker.map(bases, rec,null);
assertTrue(ret == rec);
assertTrue(ret.getReadName().equals(rec.getReadName()));
}
@ -98,7 +98,7 @@ public class PrintReadsWalkerTest extends BaseTest {
SAMFileHeader head = ArtificialSAMUtils.createArtificialSamHeader(3,1,1000);
SAMRecord rec = ArtificialSAMUtils.createArtificialRead(head, "FakeRead", 1, 1, 50);
ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter();
SAMRecord ret = walker.map(bases, null);
SAMRecord ret = walker.map(bases, null,null);
walker.reduce(ret,writer);
assertTrue(writer.getRecords().size() == 1);