diff --git a/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java b/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java index 646d055d6..dcbd9a82f 100644 --- a/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java +++ b/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.alignment; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.StingException; @@ -58,7 +59,7 @@ public class AlignmentValidationWalker extends ReadWalker { * @return Number of reads aligned by this map (aka 1). */ @Override - public Integer map(char[] ref, SAMRecord read) { + public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { //logger.info(String.format("examining read %s", read.getReadName())); byte[] bases = read.getReadBases(); diff --git a/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java b/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java index ee22a2e20..214b34d38 100644 --- a/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java +++ b/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.alignment; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.WalkerName; @@ -83,7 +84,7 @@ public class AlignmentWalker extends ReadWalker { * @return Number of alignments found for this read. */ @Override - public Integer map(char[] ref, SAMRecord read) { + public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { SAMRecord alignedRead = aligner.align(read,header); if (outputBam != null) { outputBam.addAlignment(alignedRead); diff --git a/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java b/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java index fd87e710e..8e8dc617d 100644 --- a/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java +++ b/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.alignment; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.alignment.bwa.BWTFiles; import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; @@ -49,7 +50,7 @@ public class CountBestAlignmentsWalker extends ReadWalker { * @return Number of alignments found for this read. */ @Override - public Integer map(char[] ref, SAMRecord read) { + public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { Iterator alignmentIterator = aligner.getAllAlignments(read.getReadBases()).iterator(); if(alignmentIterator.hasNext()) { int numAlignments = alignmentIterator.next().length; diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java new file mode 100644 index 000000000..c4c9ef38b --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2010. The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.datasources.providers; + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator; +import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; + +import java.util.*; + +/** a ROD view for reads. This provides the Read traversals a way of getting a ReadMetaDataTracker */ +public class ReadBasedReferenceOrderedView implements View { + private final WindowedData window; + + public ReadBasedReferenceOrderedView(ShardDataProvider provider) { + window = new WindowedData(provider); + provider.register(this); + } + + /** + * for testing only please + * + * @param data the window provider + */ + ReadBasedReferenceOrderedView(WindowedData data) { + window = data; + } + + public ReadMetaDataTracker getReferenceOrderedDataForRead(SAMRecord read) { + return window.getTracker(read); + } + + public Collection> getConflictingViews() { + List> classes = new ArrayList>(); + classes.add(ManagingReferenceOrderedView.class); + return classes; + } + + public void close() { + if (window != null) window.close(); + } +} + + +/** stores a window of data, dropping RODs if we've passed the new reads start point. */ +class WindowedData { + // the queue of possibly in-frame RODs; RODs are dropped removed as soon as they are out of scope + private final TreeMap> mapping = new TreeMap>(); + + // our current location from the last read we processed + private GenomeLoc currentLoc; + + // a list of the RMDDataState (location->iterators) + private List states; + + // the provider; where we get all our information + private final ShardDataProvider provider; + + /** + * create a WindowedData given a shard provider + * + * @param provider the ShardDataProvider + */ + public WindowedData(ShardDataProvider provider) { + this.provider = provider; + } + + /** + * load the states dynamically, since the only way to get a genome loc is from the read (the shard doesn't have one) + * + * @param provider the ShardDataProvider + * @param rec the current read + */ + private void getStates(ShardDataProvider provider, SAMRecord rec) { + states = new ArrayList(); + if (provider != null && provider.getReferenceOrderedData() != null) + for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData()) + states.add(new RMDDataState(dataSource, ((FlashBackIterator) dataSource.seek(GenomeLocParser.createGenomeLoc(rec.getReferenceIndex(), rec.getAlignmentStart()))))); + } + + /** + * this function is for testing only + * + * @param states a list of RMDDataState to initialize with + */ + WindowedData(List states) { + this.states = states; + provider = null; + } + + /** + * create a ReadMetaDataTracker given the current read + * + * @param rec the read + * + * @return a ReadMetaDataTracker for the read, from which you can get ROD -> read alignments + */ + public ReadMetaDataTracker getTracker(SAMRecord rec) { + updatePosition(rec); + return new ReadMetaDataTracker(rec, mapping); + } + + /** + * update the position we're storing + * + * @param rec the read to use for start and end + */ + private void updatePosition(SAMRecord rec) { + if (states == null) getStates(this.provider, rec); + currentLoc = GenomeLocParser.createGenomeLoc(rec); + + // flush the queue looking for records we've passed over + while (mapping.size() > 0 && mapping.firstKey() < currentLoc.getStart()) + mapping.pollFirstEntry(); // toss away records that we've passed + + // add new data to the queue + for (RMDDataState state : states) { + // move into position + while (state.iterator.hasNext() && state.iterator.peekNextLocation().isBefore(currentLoc)) + state.iterator.next(); + while (state.iterator.hasNext() && state.iterator.peekNextLocation().overlapsP(currentLoc)) { + RODRecordList list = state.iterator.next(); + for (ReferenceOrderedDatum datum : list) { + if (!mapping.containsKey(list.getLocation().getStart())) + mapping.put(list.getLocation().getStart(), new HashSet()); + mapping.get(list.getLocation().getStart()).add(datum); + } + } + } + } + + /** Closes the current view. */ + public void close() { + if (states == null) return; + for (RMDDataState state : states) + if (state.iterator instanceof FlashBackIterator) state.dataSource.close((FlashBackIterator) state.iterator); + + // Clear out the existing data so that post-close() accesses to this data will fail-fast. + states = null; + } + + +} + +/** Models the traversal state of a given ROD lane. */ +class RMDDataState { + public final ReferenceOrderedDataSource dataSource; + public final LocationAwareSeekableRODIterator iterator; + + public RMDDataState(ReferenceOrderedDataSource dataSource, LocationAwareSeekableRODIterator iterator) { + this.dataSource = dataSource; + this.iterator = iterator; + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java index 5c4842182..f23e59dac 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/ReferenceOrderedDataSource.java @@ -72,6 +72,20 @@ public class ReferenceOrderedDataSource implements SimpleDataSource { return RODIterator; } + /** + * Seek to the specified position and return an iterator through the data. + * + * @param loc GenomeLoc that points to the selected position. + * + * @return Iterator through the data. + */ + public Iterator seek(GenomeLoc loc) { + DataStreamSegment dataStreamSegment = new MappedStreamSegment(loc); + FlashBackIterator RODIterator = iteratorPool.iterator(dataStreamSegment); + return RODIterator; + } + + /** * Close the specified iterator, returning it to the pool. * @param iterator Iterator to close. @@ -116,10 +130,10 @@ class ReferenceOrderedDataPool extends ResourcePool + * Class PeekableRODIterator + *

+ * the methods attached to a peekable ROD iterator + */ +public interface PeekableRODIterator extends Iterator> { + public GenomeLoc peekNextLocation(); + + public RODRecordList seekForward(GenomeLoc interval); +} + diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/ReadMetaDataTracker.java b/java/src/org/broadinstitute/sting/gatk/refdata/ReadMetaDataTracker.java new file mode 100644 index 000000000..b2f57cfc6 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/refdata/ReadMetaDataTracker.java @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2010. The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.refdata; + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; + +import java.util.*; + + +/** + * @author aaron + *

+ * Class ReadMetaDataTracker + *

+ * a read-based meta data tracker + */ +public class ReadMetaDataTracker { + private final SAMRecord record; + private final TreeMap> mapping; + + /** + * create a read meta data tracker, given the read and a queue of RODatum positions + * + * @param record the read to create offset from + * @param mapping the mapping of reference ordered datum + */ + public ReadMetaDataTracker(SAMRecord record, TreeMap> mapping) { + this.record = record; + this.mapping = mapping; + } + + /** + * create an alignment of read position to reference ordered datum + * + * @param record the SAMRecord + * @param queue the queue (as a tree set) + * @param cl the class name, null if not filtered by classname + * @param name the datum track name, null if not filtered by name + * + * @return a mapping from the position in the read to the reference ordered datum + */ + private Map> createReadAlignment(SAMRecord record, TreeMap> queue, Class cl, String name) { + Map> ret = new LinkedHashMap>(); + GenomeLoc location = GenomeLocParser.createGenomeLoc(record); + int length = record.getReadLength(); + for (Long loc : queue.keySet()) { + //if (location.containsP(loc)) { + long position = loc - location.getStart(); + if (position >= 0 && position < length) ret.put((int)(position),queue.get(loc)); + } + return ret; + + } + + /** + * create an alignment of read position to reference ordered datum + * + * @return a mapping from the position in the read to the reference ordered datum + */ + private Map> createGenomeLocAlignment(SAMRecord record, TreeMap> mapping, Class cl, String name) { + Map> ret = new LinkedHashMap>(); + int start = record.getAlignmentStart(); + int stop = record.getAlignmentEnd(); + for (Long location : mapping.keySet()) { + if (location >= start && location <= stop) ret.put(location,mapping.get(location)); + } + return ret; + } + + /** + * get the position mapping, from read offset to ROD + * + * @return a mapping of read offset to ROD(s) + */ + public Map> getPositionMapping() { + return createReadAlignment(record, mapping, null, null); + } + + /** + * get the position mapping, from read offset to ROD + * + * @return a mapping of genome loc position to ROD(s) + */ + public Map> getGenomeLocMapping() { + return createGenomeLocAlignment(record, mapping, null, null); + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/utils/FlashBackIterator.java b/java/src/org/broadinstitute/sting/gatk/refdata/utils/FlashBackIterator.java index 1491f9e3a..85a03a73d 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/utils/FlashBackIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/utils/FlashBackIterator.java @@ -1,3 +1,26 @@ +/* + * Copyright (c) 2010. The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + package org.broadinstitute.sting.gatk.refdata.utils; import org.broadinstitute.sting.utils.GenomeLoc; diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/utils/RODRecordList.java b/java/src/org/broadinstitute/sting/gatk/refdata/utils/RODRecordList.java index ca1b2631c..15de4789f 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/utils/RODRecordList.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/utils/RODRecordList.java @@ -1,3 +1,26 @@ +/* + * Copyright (c) 2010. The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + package org.broadinstitute.sting.gatk.refdata.utils; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java index 24c80a018..9be830b34 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java @@ -3,9 +3,11 @@ package org.broadinstitute.sting.gatk.traversals; import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.WalkerManager; +import org.broadinstitute.sting.gatk.datasources.providers.ReadBasedReferenceOrderedView; import org.broadinstitute.sting.gatk.datasources.providers.ReadReferenceView; import org.broadinstitute.sting.gatk.datasources.providers.ReadView; import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.Walker; @@ -80,6 +82,9 @@ public class TraverseReads extends TraversalEngine { ReadView reads = new ReadView(dataProvider); ReadReferenceView reference = new ReadReferenceView(dataProvider); + // get the reference ordered data + ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider); + // while we still have more reads for (SAMRecord read : reads) { // an array of characters that represent the reference @@ -92,9 +97,12 @@ public class TraverseReads extends TraversalEngine { // update the number of reads we've seen TraversalStatistics.nRecords++; + // if the read is mapped, create a metadata tracker + ReadMetaDataTracker tracker = (read.getReferenceIndex() >= 0) ? rodView.getReferenceOrderedDataForRead(read) : null; + final boolean keepMeP = readWalker.filter(refSeq, read); if (keepMeP) { - M x = readWalker.map(refSeq, read); + M x = readWalker.map(refSeq, read, tracker); // the tracker can be null sum = readWalker.reduce(x, sum); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java index d11ae8d0c..87bee035d 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java @@ -4,6 +4,7 @@ import net.sf.samtools.*; import net.sf.picard.reference.ReferenceSequenceFileFactory; import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.picard.reference.ReferenceSequence; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Pair; @@ -141,7 +142,7 @@ public class ClipReadsWalker extends ReadWalker { public SAMFileHeader.SortOrder SORT_ORDER=SAMFileHeader.SortOrder.coordinate; @Override - public SAMRecord map(char[] ref, SAMRecord read) { + public SAMRecord map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { return read; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/FlagStatWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/FlagStatWalker.java index caffc15d0..bf115e1e3 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/FlagStatWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/FlagStatWalker.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.walkers; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import java.text.DecimalFormat; import java.text.NumberFormat; @@ -112,7 +113,7 @@ public class FlagStatWalker extends ReadWalker { private FlagStat myStat = new FlagStat(); - public Integer map( char[] ref, SAMRecord read ) { + public Integer map( char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) { myStat.readCount++; if (read.getReadFailsVendorQualityCheckFlag()) { myStat.QC_failure++; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java index 8b1fadcf1..54b50e20b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers; import net.sf.samtools.SAMFileWriter; import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.utils.cmdLine.Argument; /* @@ -90,7 +91,7 @@ public class PrintReadsWalker extends ReadWalker { * @param read the read itself, as a SAMRecord * @return the read itself */ - public SAMRecord map( char[] ref, SAMRecord read ) { + public SAMRecord map( char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) { return read; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java index 587babb42..a09ab7df8 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.walkers; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; /** * Created by IntelliJ IDEA. @@ -23,5 +24,5 @@ public abstract class ReadWalker extends Walker { } @Override - public Integer map(char[] ref, SAMRecord read) { + public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { if ( read.getReadUnmappedFlag() || read.getDuplicateReadFlag() || diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/fasta/BamToFastqWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/fasta/BamToFastqWalker.java index 8ab68f532..55252c543 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/fasta/BamToFastqWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/fasta/BamToFastqWalker.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.fasta; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.WalkerName; import org.broadinstitute.sting.utils.BaseUtils; @@ -33,7 +34,7 @@ public class BamToFastqWalker extends ReadWalker { } } - public Integer map(char[] ref, SAMRecord read) { + public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { out.println("@" + read.getReadName()); if ( !RE_REVERSE || !read.getReadNegativeStrandFlag() ) { out.println(read.getReadString()); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java index 5e4115ac8..f985c8d93 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java @@ -148,7 +148,7 @@ public class IndelGenotyperV2Walker extends ReadWalker { @Override - public Integer map(char[] ref, SAMRecord read) { + public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { // if ( read.getReadName().equals("428EFAAXX090610:2:36:1384:639#0") ) System.out.println("GOT READ"); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 0451f9db9..6bcc9e173 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -226,7 +226,7 @@ public class IndelRealigner extends ReadWalker { } } - public Integer map(char[] ref, SAMRecord read) { + public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { if ( currentInterval == null ) { emit(read); return 0; @@ -278,7 +278,7 @@ public class IndelRealigner extends ReadWalker { } while ( currentInterval != null && currentInterval.isBefore(readLoc) ); // call back into map now that the state has been updated - map(ref, read); + map(ref, read,metaDataTracker); } return 0; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java index 84a424e9c..59eec83da 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReadsWalker.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.qc; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.Requires; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.ReadWalker; @@ -12,7 +13,7 @@ import org.broadinstitute.sting.gatk.walkers.ReadWalker; */ @Requires({DataSource.READS, DataSource.REFERENCE}) public class CountReadsWalker extends ReadWalker { - public Integer map(char[] ref, SAMRecord read) { + public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker tracker) { //System.out.println(read.format()); return 1; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadValidationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadValidationWalker.java index 32a5e97a1..826b78952 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadValidationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/qc/ReadValidationWalker.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.qc; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.utils.StingException; import net.sf.samtools.SAMRecord; @@ -76,7 +77,7 @@ public class ReadValidationWalker extends ReadWalker { * @param read the read itself, as a SAMRecord * @return the read itself */ - public SAMRecord map( char[] ref, SAMRecord read ) { + public SAMRecord map( char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) { return read; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index 0a5b1c71d..b599dede5 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -18,6 +18,7 @@ import net.sf.samtools.util.SequenceUtil; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.Requires; @@ -314,7 +315,7 @@ public class TableRecalibrationWalker extends ReadWalker { return !read.getReadUnmappedFlag(); } - public Integer map(char[] ref, SAMRecord read) { + public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { //System.out.println(read.getAttribute("NM")); int editDist = Integer.parseInt(read.getAttribute("NM").toString()); if (editDist <= 50) diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DSBWalkerV3.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DSBWalkerV3.java index 1725557d1..c244ace73 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DSBWalkerV3.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DSBWalkerV3.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.oneoffprojects.walkers; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.*; @@ -326,7 +327,7 @@ public class DSBWalkerV3 extends ReadWalker { } - public Integer map(char[] ref, SAMRecord read) { + public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { if ( AlignmentUtils.isReadUnmapped(read) ) return 0; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IOCrusherWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IOCrusherWalker.java index ce591919b..95085ab68 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IOCrusherWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IOCrusherWalker.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.oneoffprojects.walkers; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.Utils; @@ -37,7 +38,7 @@ public class IOCrusherWalker extends ReadWalker { - public Integer map(char[] ref, SAMRecord read) { + public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { int nMismatches = 0; int start = read.getAlignmentStart()-1; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MismatchHistoWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MismatchHistoWalker.java index 7945d2ac5..6505005f2 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MismatchHistoWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/MismatchHistoWalker.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.oneoffprojects.walkers; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.WalkerName; import org.broadinstitute.sting.utils.Utils; @@ -21,7 +22,7 @@ public class MismatchHistoWalker extends ReadWalker { return !read.getReadUnmappedFlag(); } - public Integer map(char[] ref, SAMRecord read) { + public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { int editDist = Integer.parseInt(read.getAttribute("NM").toString()); diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/PairedQualityScoreCountsWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/PairedQualityScoreCountsWalker.java index edebd35e6..d44cfbb31 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/PairedQualityScoreCountsWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/PairedQualityScoreCountsWalker.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.oneoffprojects.walkers; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.utils.Pair; import org.broadinstitute.sting.utils.QualityUtils; @@ -35,7 +36,7 @@ public class PairedQualityScoreCountsWalker extends ReadWalker map( char[] ref, SAMRecord read) { + public Pair map( char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { if ( canUseRead(read) ) { return getCorrectlyOrientedBaseQualities(read); } else { diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ReadErrorRateWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ReadErrorRateWalker.java index 89b56887d..3313a1c46 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ReadErrorRateWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ReadErrorRateWalker.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.oneoffprojects.walkers; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.QualityUtils; @@ -46,7 +47,7 @@ public class ReadErrorRateWalker extends ReadWalker protected static BufferedReader inputReader = null; protected static String line = null; - public SAMRecord map( char[] ref, SAMRecord read ) { + public SAMRecord map( char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) { return read; // all the work is done in the reduce step for this walker } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ReplaceQuals.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ReplaceQuals.java index e36e2a8e5..c2197a3f1 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ReplaceQuals.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ReplaceQuals.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.oneoffprojects.walkers; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.Pair; @@ -64,7 +65,7 @@ public class ReplaceQuals extends ReadWalker { /** * */ - public SAMRecord map(char[] ref, SAMRecord read) { + public SAMRecord map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { final String name = read.getReadName(); if ( readNameToPairs.containsKey(name) ) { diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestReadFishingWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestReadFishingWalker.java index dfd35e262..f73f2fee4 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestReadFishingWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/TestReadFishingWalker.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.oneoffprojects.walkers; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.alignment.bwa.BWAAligner; import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; @@ -106,7 +107,7 @@ public class TestReadFishingWalker extends ReadWalker { } @Override - public Integer map(char[] ref, SAMRecord read) { + public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { Alignment bestAlignment = aligner.getBestAlignment(read.getReadBases()); System.out.println("bestAlignment = " + bestAlignment); return 1; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CalculateAlleleLikelihoodsWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CalculateAlleleLikelihoodsWalker.java index 29a344532..e426ca7e2 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CalculateAlleleLikelihoodsWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CalculateAlleleLikelihoodsWalker.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.HLAcaller; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.cmdLine.Argument; @@ -76,7 +77,7 @@ public class CalculateAlleleLikelihoodsWalker extends ReadWalker { - public Integer map(char[] ref, SAMRecord read) { + public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { int readstart = read.getAlignmentStart(); int readstop = read.getAlignmentEnd(); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CreatePedFileWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CreatePedFileWalker.java index f059d8fb5..a60edc63c 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CreatePedFileWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/CreatePedFileWalker.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.HLAcaller; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.cmdLine.Argument; @@ -177,7 +178,7 @@ public class CreatePedFileWalker extends ReadWalker { } } - public Integer map(char[] ref, SAMRecord read) { + public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { HLAnamesAL.add(read.getReadName()); HLAreadsAL.add(formatter.FormatRead(read.getCigarString(), read.getReadString())); HLAstartposAL.add(read.getAlignmentStart()); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/FindClosestAlleleWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/FindClosestAlleleWalker.java index a2978bf80..481a3acef 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/FindClosestAlleleWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/FindClosestAlleleWalker.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.HLAcaller; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.cmdLine.Argument; @@ -231,7 +232,7 @@ public class FindClosestAlleleWalker extends ReadWalker { return maxFreq; } - public Integer map(char[] ref, SAMRecord read) { + public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { //Calculate concordance for this read and all overlapping reads double maxConcordance = CalculateConcordance(read); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/ImputeAllelesWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/ImputeAllelesWalker.java index 9f9eab7ed..dacacfb3c 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/ImputeAllelesWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/ImputeAllelesWalker.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.playground.gatk.walkers.HLAcaller; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import java.io.FileInputStream; @@ -134,7 +135,7 @@ public class ImputeAllelesWalker extends ReadWalker { } - public Integer map(char[] ref, SAMRecord read) { + public Integer map(char[] ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { int readstart = read.getAlignmentStart(); int readstop = read.getAlignmentEnd(); int startimputation = 0, stopimputation = 0; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/graphalign/GraphReferenceAssessor.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/graphalign/GraphReferenceAssessor.java index 13a1fdb83..ec2db0d7c 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/graphalign/GraphReferenceAssessor.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/graphalign/GraphReferenceAssessor.java @@ -166,7 +166,7 @@ public class GraphReferenceAssessor extends ReadWalker { return minNMM; } - public Integer map(char[] refArg, SAMRecord read) { + public Integer map(char[] refArg, SAMRecord read, ReadMetaDataTracker metaDataTracker) { if ( MAXREADS-- == 0 ) { System.exit(0); diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java index 3848ac5e3..e3af57cdf 100644 --- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java +++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialReadsTraversal.java @@ -112,7 +112,7 @@ public class ArtificialReadsTraversal extends TraversalEngine { final boolean keepMeP = readWalker.filter(refSeq, read); if (keepMeP) { - M x = readWalker.map(refSeq, read); + M x = readWalker.map(refSeq, read,null); // TODO: fix me at some point, it would be nice to fake out ROD data too sum = readWalker.reduce(x, sum); } diff --git a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java index d1f5fe3e7..5618bcdf4 100755 --- a/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java +++ b/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java @@ -154,6 +154,11 @@ public class ArtificialSAMUtils { elements.add(new CigarElement(length, CigarOperator.characterToEnum('M'))); record.setCigar(new Cigar(elements)); record.setProperPairFlag(false); + // TODO: add to code after checking why it breaks the clean read injector test + /*byte[] c = new byte[length]; + for (int x = 0; x < length; x++) + c[x] = 'A'; */ + //record.setReadBases(c); if (refIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { record.setReadUmappedFlag(true); } @@ -247,4 +252,3 @@ public class ArtificialSAMUtils { return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, unmappedReadCount, header); } } - diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedViewTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedViewTest.java new file mode 100644 index 000000000..a74beb7ff --- /dev/null +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedViewTest.java @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2010. The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.datasources.providers; + +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTrackerTest; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; +import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.util.*; + + +/** + * + * @author aaron + * + * Class ReadBasedReferenceOrderedViewTest + * + * test out the ReadBasedReferenceOrderedView class + */ +public class ReadBasedReferenceOrderedViewTest extends BaseTest { + + private static int startingChr = 1; + private static int endingChr = 2; + private static int readCount = 100; + private static int DEFAULT_READ_LENGTH = ArtificialSAMUtils.DEFAULT_READ_LENGTH; + private static SAMFileHeader header; + + @BeforeClass + public static void beforeClass() { + header = ArtificialSAMUtils.createArtificialSamHeader((endingChr - startingChr) + 1, startingChr, readCount + DEFAULT_READ_LENGTH); + GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); + } + + @Before + public void beforeEach() { + } + + @Test + public void testCreateReadMetaDataTrackerOnePerSite() { + // make ten reads, + List records = new ArrayList(); + for (int x = 1; x < 11; x++) { + SAMRecord rec = ArtificialSAMUtils.createArtificialRead(header, "name", 0, x, 10); + byte[] c = new byte[10]; + for (int y = 0; y < 10; y++) + c[y] = 'A'; + rec.setReadBases(c); + records.add(rec); + } + GenomeLoc start = GenomeLocParser.createGenomeLoc(0,0,0); + List list = new ArrayList(); + list.add(new RMDDataState(null, new FakePeekingRODIterator(start))); + ReadBasedReferenceOrderedView view = new ReadBasedReferenceOrderedView(new WindowedData(list)); + + for (SAMRecord rec : records) { + ReadMetaDataTracker tracker = view.getReferenceOrderedDataForRead(rec); + Map> map = tracker.getPositionMapping(); + for (Integer i : map.keySet()) { + Assert.assertEquals(1,map.get(i).size()); + } + Assert.assertEquals(10,map.keySet().size()); + } + + } + +} + + +class FakePeekingRODIterator implements LocationAwareSeekableRODIterator { + + // current location + private GenomeLoc location; + private ReadMetaDataTrackerTest.FakeRODatum curROD; + + public FakePeekingRODIterator(GenomeLoc startingLoc) { + this.location = GenomeLocParser.createGenomeLoc(startingLoc.getContigIndex(),startingLoc.getStart()+1,startingLoc.getStop()+1);; + } + + @Override + public GenomeLoc peekNextLocation() { + System.err.println("Peek Next -> " + location); + return location; + } + + @Override + public GenomeLoc position() { + return location; + } + + @Override + public RODRecordList seekForward(GenomeLoc interval) { + while (location.isBefore(interval)) + next(); + return next(); // we always move by one, we know the next location will be right + } + + @Override + public boolean hasNext() { + return true; // we always have next + } + + @Override + public RODRecordList next() { + System.err.println("Next -> " + location); + curROD = new ReadMetaDataTrackerTest.FakeRODatum(location); + location = GenomeLocParser.createGenomeLoc(location.getContigIndex(),location.getStart()+1,location.getStop()+1); + FakeRODRecordList list = new FakeRODRecordList(); + list.add(curROD); + return list; + } + + @Override + public void remove() { + throw new IllegalStateException("GRRR"); + } +} + +class FakeRODRecordList extends AbstractList implements RODRecordList { + private final List list = new ArrayList(); + + public boolean add(ReferenceOrderedDatum data) { + return list.add(data); + } + + @Override + public ReferenceOrderedDatum get(int i) { + return list.get(i); + } + + @Override + public int size() { + return list.size(); + } + + @Override + public GenomeLoc getLocation() { + return list.get(0).getLocation(); + } + + @Override + public String getName() { + return "test"; + } + + @Override + public int compareTo(RODRecordList rodRecordList) { + return this.list.get(0).getLocation().compareTo(rodRecordList.getLocation()); + } +} \ No newline at end of file diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/ReadMetaDataTrackerTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/ReadMetaDataTrackerTest.java new file mode 100644 index 000000000..e84d66a9a --- /dev/null +++ b/java/test/org/broadinstitute/sting/gatk/refdata/ReadMetaDataTrackerTest.java @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2010. The Broad Institute + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.refdata; + +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.HashSet; +import java.util.PriorityQueue; +import java.util.Set; +import java.util.TreeMap; + + +/** + * @author aaron + *

+ * Class ReadMetaDataTrackerTest + *

+ * test out the ReadMetaDataTracker + */ +public class ReadMetaDataTrackerTest extends BaseTest { + private static int startingChr = 1; + private static int endingChr = 2; + private static int readCount = 100; + private static int DEFAULT_READ_LENGTH = ArtificialSAMUtils.DEFAULT_READ_LENGTH; + private static SAMFileHeader header; + + @BeforeClass + public static void beforeClass() { + header = ArtificialSAMUtils.createArtificialSamHeader((endingChr - startingChr) + 1, startingChr, readCount + DEFAULT_READ_LENGTH); + GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); + } + + @Before + public void beforeEach() { + } + + @Test + public void rodAtEachReadBase() { + ReadMetaDataTracker tracker = getRMDT(1); + + // count the positions + int count = 0; + for (int x : tracker.getPositionMapping().keySet()) { + count++; + Assert.assertEquals(1, tracker.getPositionMapping().get(x).size()); + } + Assert.assertEquals(10, count); + } + + @Test + public void sparceRODsForRead() { + ReadMetaDataTracker tracker = getRMDT(7); + + // count the positions + int count = 0; + for (int x : tracker.getPositionMapping().keySet()) { + count++; + Assert.assertEquals(1, tracker.getPositionMapping().get(x).size()); + } + Assert.assertEquals(2, count); + } + + @Test + public void rodByGenomeLoc() { + ReadMetaDataTracker tracker = getRMDT(1); + + // count the positions + int count = 0; + for (Long x : tracker.getGenomeLocMapping().keySet()) { + count++; + Assert.assertEquals(1, tracker.getGenomeLocMapping().get(x).size()); + } + Assert.assertEquals(10, count); + } + + private ReadMetaDataTracker getRMDT(int incr) { + SAMRecord record = ArtificialSAMUtils.createArtificialRead(header, "name", 0, 1, 10); + byte[] c = new byte[10]; + for (int x = 0; x < 10; x++) + c[x] = 'A'; + record.setReadBases(c); + TreeMap> data = new TreeMap>(); + for (int x = 0; x < record.getAlignmentEnd(); x+=incr) { + GenomeLoc loc = GenomeLocParser.createGenomeLoc(record.getReferenceIndex(), record.getAlignmentStart() + x, record.getAlignmentStart() + x); + Set set = new HashSet(); + set.add(new FakeRODatum(loc)); + data.put((long)record.getAlignmentStart() + x,set); + } + ReadMetaDataTracker tracker = new ReadMetaDataTracker(record, data); + return tracker; + } + + + /** for testing only */ + static public class FakeRODatum implements ReferenceOrderedDatum { + + final GenomeLoc location; + + public FakeRODatum(GenomeLoc location) { + this.location = location; + } + + @Override + public String getName() { + return "false"; + } + + @Override + public boolean parseLine(Object header, String[] parts) throws IOException { + return false; + } + + @Override + public String toSimpleString() { + return ""; + } + + @Override + public String repl() { + return ""; + } + + /** + * Used by the ROD system to determine how to split input lines + * + * @return Regex string delimiter separating fields + */ + @Override + public String delimiterRegex() { + return ""; + } + + @Override + public GenomeLoc getLocation() { + return location; + } + + @Override + public int compareTo(ReferenceOrderedDatum that) { + return location.compareTo(that.getLocation()); + } + + /** + * Backdoor hook to read header, meta-data, etc. associated with the file. Will be + * called by the ROD system before streaming starts + * + * @param source source data file on disk from which this rod stream will be pulled + * + * @return a header object that will be passed to parseLine command + */ + @Override + public Object initialize(File source) throws FileNotFoundException { + return null; + } + } +} \ No newline at end of file diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerTest.java index b74701c46..a07e4a229 100644 --- a/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerTest.java @@ -76,7 +76,7 @@ public class PrintReadsWalkerTest extends BaseTest { public void testNullRead() { PrintReadsWalker walker = new PrintReadsWalker(); - SAMRecord rec = walker.map(bases, null); + SAMRecord rec = walker.map(bases, null, null); assertTrue(rec == null); } @@ -86,7 +86,7 @@ public class PrintReadsWalkerTest extends BaseTest { PrintReadsWalker walker = new PrintReadsWalker(); SAMFileHeader head = ArtificialSAMUtils.createArtificialSamHeader(3,1,1000); SAMRecord rec = ArtificialSAMUtils.createArtificialRead(head, "FakeRead", 1, 1, 50); - SAMRecord ret = walker.map(bases, rec); + SAMRecord ret = walker.map(bases, rec,null); assertTrue(ret == rec); assertTrue(ret.getReadName().equals(rec.getReadName())); } @@ -98,7 +98,7 @@ public class PrintReadsWalkerTest extends BaseTest { SAMFileHeader head = ArtificialSAMUtils.createArtificialSamHeader(3,1,1000); SAMRecord rec = ArtificialSAMUtils.createArtificialRead(head, "FakeRead", 1, 1, 50); ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter(); - SAMRecord ret = walker.map(bases, null); + SAMRecord ret = walker.map(bases, null,null); walker.reduce(ret,writer); assertTrue(writer.getRecords().size() == 1);