Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
07f9d14d9f
|
|
@ -0,0 +1,247 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
package net.sf.picard.sam;
|
||||
|
||||
import net.sf.picard.PicardException;
|
||||
|
||||
import java.util.*;
|
||||
import java.lang.reflect.Constructor;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
|
||||
/**
|
||||
* Provides an iterator interface for merging multiple underlying iterators into a single
|
||||
* iterable stream. The underlying iterators/files must all have the same sort order unless
|
||||
* the requested output format is unsorted, in which case any combination is valid.
|
||||
*/
|
||||
public class MergingSamRecordIterator implements CloseableIterator<SAMRecord> {
|
||||
private final PriorityQueue<ComparableSamRecordIterator> pq;
|
||||
private final SamFileHeaderMerger samHeaderMerger;
|
||||
private final Collection<SAMFileReader> readers;
|
||||
private final SAMFileHeader.SortOrder sortOrder;
|
||||
private final SAMRecordComparator comparator;
|
||||
|
||||
private boolean initialized = false;
|
||||
private boolean iterationStarted = false;
|
||||
|
||||
/**
|
||||
* Constructs a new merging iterator with the same set of readers and sort order as
|
||||
* provided by the header merger parameter.
|
||||
* @param headerMerger The merged header and contents of readers.
|
||||
* @param forcePresorted True to ensure that the iterator checks the headers of the readers for appropriate sort order.
|
||||
* @deprecated replaced by (SamFileHeaderMerger, Collection<SAMFileReader>, boolean)
|
||||
*/
|
||||
public MergingSamRecordIterator(final SamFileHeaderMerger headerMerger, final boolean forcePresorted) {
|
||||
this(headerMerger, headerMerger.getReaders(), forcePresorted);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new merging iterator with the same set of readers and sort order as
|
||||
* provided by the header merger parameter.
|
||||
* @param headerMerger The merged header and contents of readers.
|
||||
* @param assumeSorted false ensures that the iterator checks the headers of the readers for appropriate sort order.
|
||||
*/
|
||||
public MergingSamRecordIterator(final SamFileHeaderMerger headerMerger, Collection<SAMFileReader> readers, final boolean assumeSorted) {
|
||||
this.samHeaderMerger = headerMerger;
|
||||
this.sortOrder = headerMerger.getMergedHeader().getSortOrder();
|
||||
this.comparator = getComparator();
|
||||
this.readers = readers;
|
||||
|
||||
this.pq = new PriorityQueue<ComparableSamRecordIterator>(readers.size());
|
||||
|
||||
for (final SAMFileReader reader : readers) {
|
||||
if (!assumeSorted && this.sortOrder != SAMFileHeader.SortOrder.unsorted &&
|
||||
reader.getFileHeader().getSortOrder() != this.sortOrder){
|
||||
throw new PicardException("Files are not compatible with sort order");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a given SAM file iterator to the merging iterator. Use this to restrict the merged iteration to a given genomic interval,
|
||||
* rather than iterating over every read in the backing file or stream.
|
||||
* @param reader Reader to add to the merging iterator.
|
||||
* @param iterator Iterator traversing over reader contents.
|
||||
*/
|
||||
public void addIterator(final SAMFileReader reader, final CloseableIterator<SAMRecord> iterator) {
|
||||
if(iterationStarted)
|
||||
throw new PicardException("Cannot add another iterator; iteration has already begun");
|
||||
if(!samHeaderMerger.containsHeader(reader.getFileHeader()))
|
||||
throw new PicardException("All iterators to be merged must be accounted for in the SAM header merger");
|
||||
final ComparableSamRecordIterator comparableIterator = new ComparableSamRecordIterator(reader,iterator,comparator);
|
||||
addIfNotEmpty(comparableIterator);
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
private void startIterationIfRequired() {
|
||||
if(initialized)
|
||||
return;
|
||||
for(SAMFileReader reader: readers)
|
||||
addIterator(reader,reader.iterator());
|
||||
iterationStarted = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close down all open iterators.
|
||||
*/
|
||||
public void close() {
|
||||
// Iterators not in the priority queue have already been closed; only close down the iterators that are still in the priority queue.
|
||||
for(CloseableIterator<SAMRecord> iterator: pq)
|
||||
iterator.close();
|
||||
}
|
||||
|
||||
/** Returns true if any of the underlying iterators has more records, otherwise false. */
|
||||
public boolean hasNext() {
|
||||
startIterationIfRequired();
|
||||
return !this.pq.isEmpty();
|
||||
}
|
||||
|
||||
/** Returns the next record from the top most iterator during merging. */
|
||||
public SAMRecord next() {
|
||||
startIterationIfRequired();
|
||||
|
||||
final ComparableSamRecordIterator iterator = this.pq.poll();
|
||||
final SAMRecord record = iterator.next();
|
||||
addIfNotEmpty(iterator);
|
||||
record.setHeader(this.samHeaderMerger.getMergedHeader());
|
||||
|
||||
// Fix the read group if needs be
|
||||
if (this.samHeaderMerger.hasReadGroupCollisions()) {
|
||||
final String oldGroupId = (String) record.getAttribute(ReservedTagConstants.READ_GROUP_ID);
|
||||
if (oldGroupId != null ) {
|
||||
final String newGroupId = this.samHeaderMerger.getReadGroupId(iterator.getReader().getFileHeader(),oldGroupId);
|
||||
record.setAttribute(ReservedTagConstants.READ_GROUP_ID, newGroupId);
|
||||
}
|
||||
}
|
||||
|
||||
// Fix the program group if needs be
|
||||
if (this.samHeaderMerger.hasProgramGroupCollisions()) {
|
||||
final String oldGroupId = (String) record.getAttribute(ReservedTagConstants.PROGRAM_GROUP_ID);
|
||||
if (oldGroupId != null ) {
|
||||
final String newGroupId = this.samHeaderMerger.getProgramGroupId(iterator.getReader().getFileHeader(),oldGroupId);
|
||||
record.setAttribute(ReservedTagConstants.PROGRAM_GROUP_ID, newGroupId);
|
||||
}
|
||||
}
|
||||
|
||||
// Fix up the sequence indexes if needs be
|
||||
if (this.samHeaderMerger.hasMergedSequenceDictionary()) {
|
||||
if (record.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
|
||||
record.setReferenceIndex(this.samHeaderMerger.getMergedSequenceIndex(iterator.getReader().getFileHeader(),record.getReferenceIndex()));
|
||||
}
|
||||
|
||||
if (record.getReadPairedFlag() && record.getMateReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
|
||||
record.setMateReferenceIndex(this.samHeaderMerger.getMergedSequenceIndex(iterator.getReader().getFileHeader(),record.getMateReferenceIndex()));
|
||||
}
|
||||
}
|
||||
|
||||
return record;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds iterator to priority queue. If the iterator has more records it is added
|
||||
* otherwise it is closed and not added.
|
||||
*/
|
||||
private void addIfNotEmpty(final ComparableSamRecordIterator iterator) {
|
||||
if (iterator.hasNext()) {
|
||||
pq.offer(iterator);
|
||||
}
|
||||
else {
|
||||
iterator.close();
|
||||
}
|
||||
}
|
||||
|
||||
/** Unsupported operation. */
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("MergingSAMRecorderIterator.remove()");
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the right comparator for a given sort order (coordinate, alphabetic). In the
|
||||
* case of "unsorted" it will return a comparator that gives an arbitrary but reflexive
|
||||
* ordering.
|
||||
*/
|
||||
private SAMRecordComparator getComparator() {
|
||||
// For unsorted build a fake comparator that compares based on object ID
|
||||
if (this.sortOrder == SAMFileHeader.SortOrder.unsorted) {
|
||||
return new SAMRecordComparator() {
|
||||
public int fileOrderCompare(final SAMRecord lhs, final SAMRecord rhs) {
|
||||
return System.identityHashCode(lhs) - System.identityHashCode(rhs);
|
||||
}
|
||||
|
||||
public int compare(final SAMRecord lhs, final SAMRecord rhs) {
|
||||
return fileOrderCompare(lhs, rhs);
|
||||
}
|
||||
};
|
||||
}
|
||||
if (samHeaderMerger.hasMergedSequenceDictionary() && sortOrder.equals(SAMFileHeader.SortOrder.coordinate)) {
|
||||
return new MergedSequenceDictionaryCoordinateOrderComparator();
|
||||
}
|
||||
|
||||
// Otherwise try and figure out what kind of comparator to return and build it
|
||||
return this.sortOrder.getComparatorInstance();
|
||||
}
|
||||
|
||||
/** Returns the merged header that the merging iterator is working from. */
|
||||
public SAMFileHeader getMergedHeader() {
|
||||
return this.samHeaderMerger.getMergedHeader();
|
||||
}
|
||||
|
||||
/**
|
||||
* Ugh. Basically does a regular coordinate compare, but looks up the sequence indices in the merged
|
||||
* sequence dictionary. I hate the fact that this extends SAMRecordCoordinateComparator, but it avoids
|
||||
* more copy & paste.
|
||||
*/
|
||||
private class MergedSequenceDictionaryCoordinateOrderComparator extends SAMRecordCoordinateComparator {
|
||||
|
||||
public int fileOrderCompare(final SAMRecord samRecord1, final SAMRecord samRecord2) {
|
||||
final int referenceIndex1 = getReferenceIndex(samRecord1);
|
||||
final int referenceIndex2 = getReferenceIndex(samRecord2);
|
||||
if (referenceIndex1 != referenceIndex2) {
|
||||
if (referenceIndex1 == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
|
||||
return 1;
|
||||
} else if (referenceIndex2 == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
|
||||
return -1;
|
||||
} else {
|
||||
return referenceIndex1 - referenceIndex2;
|
||||
}
|
||||
}
|
||||
if (referenceIndex1 == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
|
||||
// Both are unmapped.
|
||||
return 0;
|
||||
}
|
||||
return samRecord1.getAlignmentStart() - samRecord2.getAlignmentStart();
|
||||
}
|
||||
|
||||
private int getReferenceIndex(final SAMRecord samRecord) {
|
||||
if (samRecord.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
|
||||
return samHeaderMerger.getMergedSequenceIndex(samRecord.getHeader(), samRecord.getReferenceIndex());
|
||||
}
|
||||
if (samRecord.getMateReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
|
||||
return samHeaderMerger.getMergedSequenceIndex(samRecord.getHeader(), samRecord.getMateReferenceIndex());
|
||||
}
|
||||
return SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,744 @@
|
|||
/*
|
||||
* The MIT License
|
||||
*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
package net.sf.picard.sam;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import net.sf.picard.PicardException;
|
||||
import net.sf.samtools.AbstractSAMHeaderRecord;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMProgramRecord;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import net.sf.samtools.util.SequenceUtil;
|
||||
|
||||
/**
|
||||
* Merges SAMFileHeaders that have the same sequences into a single merged header
|
||||
* object while providing read group translation for cases where read groups
|
||||
* clash across input headers.
|
||||
*/
|
||||
public class SamFileHeaderMerger {
|
||||
//Super Header to construct
|
||||
private final SAMFileHeader mergedHeader;
|
||||
private Collection<SAMFileReader> readers;
|
||||
private final Collection<SAMFileHeader> headers;
|
||||
|
||||
//Translation of old group ids to new group ids
|
||||
private final Map<SAMFileHeader, Map<String, String>> samReadGroupIdTranslation =
|
||||
new IdentityHashMap<SAMFileHeader, Map<String, String>>();
|
||||
|
||||
//the read groups from different files use the same group ids
|
||||
private boolean hasReadGroupCollisions = false;
|
||||
|
||||
//the program records from different files use the same program record ids
|
||||
private boolean hasProgramGroupCollisions = false;
|
||||
|
||||
//Translation of old program group ids to new program group ids
|
||||
private Map<SAMFileHeader, Map<String, String>> samProgramGroupIdTranslation =
|
||||
new IdentityHashMap<SAMFileHeader, Map<String, String>>();
|
||||
|
||||
private boolean hasMergedSequenceDictionary = false;
|
||||
|
||||
// Translation of old sequence dictionary ids to new dictionary ids
|
||||
// This is an IdentityHashMap because it can be quite expensive to compute the hashCode for
|
||||
// large SAMFileHeaders. It is possible that two input files will have identical headers so that
|
||||
// the regular HashMap would fold them together, but the value stored in each of the two
|
||||
// Map entries will be the same, so it should not hurt anything.
|
||||
private final Map<SAMFileHeader, Map<Integer, Integer>> samSeqDictionaryIdTranslationViaHeader =
|
||||
new IdentityHashMap<SAMFileHeader, Map<Integer, Integer>>();
|
||||
|
||||
//HeaderRecordFactory that creates SAMReadGroupRecord instances.
|
||||
private static final HeaderRecordFactory<SAMReadGroupRecord> READ_GROUP_RECORD_FACTORY = new HeaderRecordFactory<SAMReadGroupRecord>() {
|
||||
public SAMReadGroupRecord createRecord(String id, SAMReadGroupRecord srcReadGroupRecord) {
|
||||
return new SAMReadGroupRecord(id, srcReadGroupRecord);
|
||||
}
|
||||
};
|
||||
|
||||
//HeaderRecordFactory that creates SAMProgramRecord instances.
|
||||
private static final HeaderRecordFactory<SAMProgramRecord> PROGRAM_RECORD_FACTORY = new HeaderRecordFactory<SAMProgramRecord>() {
|
||||
public SAMProgramRecord createRecord(String id, SAMProgramRecord srcProgramRecord) {
|
||||
return new SAMProgramRecord(id, srcProgramRecord);
|
||||
}
|
||||
};
|
||||
|
||||
//comparator used to sort lists of program group and read group records
|
||||
private static final Comparator<AbstractSAMHeaderRecord> RECORD_ID_COMPARATOR = new Comparator<AbstractSAMHeaderRecord>() {
|
||||
public int compare(AbstractSAMHeaderRecord o1, AbstractSAMHeaderRecord o2) {
|
||||
return o1.getId().compareTo(o2.getId());
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Create SAMFileHeader with additional information. Required that sequence dictionaries agree.
|
||||
*
|
||||
* @param readers sam file readers to combine
|
||||
* @param sortOrder sort order new header should have
|
||||
* @deprecated replaced by SamFileHeaderMerger(Collection<SAMFileHeader>, SAMFileHeader.SortOrder, boolean)
|
||||
*/
|
||||
public SamFileHeaderMerger(final Collection<SAMFileReader> readers, final SAMFileHeader.SortOrder sortOrder) {
|
||||
this(readers, sortOrder, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create SAMFileHeader with additional information.
|
||||
*
|
||||
* @param readers sam file readers to combine
|
||||
* @param sortOrder sort order new header should have
|
||||
* @param mergeDictionaries If true, merge sequence dictionaries in new header. If false, require that
|
||||
* all input sequence dictionaries be identical.
|
||||
* @deprecated replaced by SamFileHeaderMerger(Collection<SAMFileHeader>, SAMFileHeader.SortOrder, boolean)
|
||||
*/
|
||||
public SamFileHeaderMerger(final Collection<SAMFileReader> readers, final SAMFileHeader.SortOrder sortOrder, final boolean mergeDictionaries) {
|
||||
this(sortOrder, getHeadersFromReaders(readers), mergeDictionaries);
|
||||
this.readers = readers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create SAMFileHeader with additional information.. This is the preferred constructor.
|
||||
*
|
||||
* @param sortOrder sort order new header should have
|
||||
* @param headers sam file headers to combine
|
||||
* @param mergeDictionaries If true, merge sequence dictionaries in new header. If false, require that
|
||||
* all input sequence dictionaries be identical.
|
||||
*/
|
||||
public SamFileHeaderMerger(final SAMFileHeader.SortOrder sortOrder, final Collection<SAMFileHeader> headers, final boolean mergeDictionaries) {
|
||||
this.headers = headers;
|
||||
this.mergedHeader = new SAMFileHeader();
|
||||
|
||||
SAMSequenceDictionary sequenceDictionary;
|
||||
try {
|
||||
sequenceDictionary = getSequenceDictionary(headers);
|
||||
this.hasMergedSequenceDictionary = false;
|
||||
}
|
||||
catch (SequenceUtil.SequenceListsDifferException pe) {
|
||||
if (mergeDictionaries) {
|
||||
sequenceDictionary = mergeSequenceDictionaries(headers);
|
||||
this.hasMergedSequenceDictionary = true;
|
||||
}
|
||||
else {
|
||||
throw pe;
|
||||
}
|
||||
}
|
||||
|
||||
this.mergedHeader.setSequenceDictionary(sequenceDictionary);
|
||||
|
||||
// Set program that creates input alignments
|
||||
for (final SAMProgramRecord program : mergeProgramGroups(headers)) {
|
||||
this.mergedHeader.addProgramRecord(program);
|
||||
}
|
||||
|
||||
// Set read groups for merged header
|
||||
final List<SAMReadGroupRecord> readGroups = mergeReadGroups(headers);
|
||||
this.mergedHeader.setReadGroups(readGroups);
|
||||
this.mergedHeader.setGroupOrder(SAMFileHeader.GroupOrder.none);
|
||||
|
||||
this.mergedHeader.setSortOrder(sortOrder);
|
||||
|
||||
for (final SAMFileHeader header : headers) {
|
||||
for (final String comment : header.getComments()) {
|
||||
this.mergedHeader.addComment(comment);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Utilility method to make use with old constructor
|
||||
private static List<SAMFileHeader> getHeadersFromReaders(Collection<SAMFileReader> readers) {
|
||||
List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(readers.size());
|
||||
for (SAMFileReader reader : readers) {
|
||||
headers.add(reader.getFileHeader());
|
||||
}
|
||||
return headers;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks to see if there are clashes where different readers are using the same read
|
||||
* group IDs. If yes, then those IDs that collided are remapped.
|
||||
*
|
||||
* @param headers headers to combine
|
||||
* @return new list of read groups constructed from all the readers
|
||||
*/
|
||||
private List<SAMReadGroupRecord> mergeReadGroups(final Collection<SAMFileHeader> headers) {
|
||||
//prepare args for mergeHeaderRecords(..) call
|
||||
final HashSet<String> idsThatAreAlreadyTaken = new HashSet<String>();
|
||||
|
||||
final List<HeaderRecordAndFileHeader<SAMReadGroupRecord>> readGroupsToProcess = new LinkedList<HeaderRecordAndFileHeader<SAMReadGroupRecord>>();
|
||||
for (final SAMFileHeader header : headers) {
|
||||
for (final SAMReadGroupRecord readGroup : header.getReadGroups()) {
|
||||
//verify that there are no existing id collisions in this input file
|
||||
if(!idsThatAreAlreadyTaken.add(readGroup.getId()))
|
||||
throw new PicardException("Input file: " + header + " contains more than one RG with the same id (" + readGroup.getId() + ")");
|
||||
|
||||
readGroupsToProcess.add(new HeaderRecordAndFileHeader<SAMReadGroupRecord>(readGroup, header));
|
||||
}
|
||||
idsThatAreAlreadyTaken.clear();
|
||||
}
|
||||
|
||||
final List<SAMReadGroupRecord> result = new LinkedList<SAMReadGroupRecord>();
|
||||
|
||||
hasReadGroupCollisions = mergeHeaderRecords(readGroupsToProcess, READ_GROUP_RECORD_FACTORY, idsThatAreAlreadyTaken, samReadGroupIdTranslation, result);
|
||||
|
||||
//sort the result list by record id
|
||||
Collections.sort(result, RECORD_ID_COMPARATOR);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks to see if there are clashes where different readers are using the same program
|
||||
* group IDs. If yes, then those IDs that collided are remapped.
|
||||
*
|
||||
* @param headers headers to combine
|
||||
* @return new list of program groups constructed from all the readers
|
||||
*/
|
||||
private List<SAMProgramRecord> mergeProgramGroups(final Collection<SAMFileHeader> headers) {
|
||||
|
||||
final List<SAMProgramRecord> overallResult = new LinkedList<SAMProgramRecord>();
|
||||
|
||||
//this Set will accumulate all SAMProgramRecord ids that have been encountered so far.
|
||||
final HashSet<String> idsThatAreAlreadyTaken = new HashSet<String>();
|
||||
|
||||
//need to process all program groups
|
||||
List<HeaderRecordAndFileHeader<SAMProgramRecord>> programGroupsLeftToProcess = new LinkedList<HeaderRecordAndFileHeader<SAMProgramRecord>>();
|
||||
for (final SAMFileHeader header : headers) {
|
||||
for (final SAMProgramRecord programGroup : header.getProgramRecords()) {
|
||||
//verify that there are no existing id collisions in this input file
|
||||
if(!idsThatAreAlreadyTaken.add(programGroup.getId()))
|
||||
throw new PicardException("Input file: " + header + " contains more than one PG with the same id (" + programGroup.getId() + ")");
|
||||
|
||||
programGroupsLeftToProcess.add(new HeaderRecordAndFileHeader<SAMProgramRecord>(programGroup, header));
|
||||
}
|
||||
idsThatAreAlreadyTaken.clear();
|
||||
}
|
||||
|
||||
//A program group header (lets say ID=2 PN=B PP=1) may have a PP (previous program) attribute which chains it to
|
||||
//another program group header (lets say ID=1 PN=A) to indicate that the given file was
|
||||
//processed by program A followed by program B. These PP attributes potentially
|
||||
//connect headers into one or more tree structures. Merging is done by
|
||||
//first merging all headers that don't have PP attributes (eg. tree roots),
|
||||
//then updating and merging all headers whose PPs point to the tree-root headers,
|
||||
//and so on until all program group headers are processed.
|
||||
|
||||
//currentProgramGroups is the list of records to merge next. Start by merging the programGroups that don't have a PP attribute (eg. the tree roots).
|
||||
List< HeaderRecordAndFileHeader<SAMProgramRecord> > currentProgramGroups = new LinkedList<HeaderRecordAndFileHeader<SAMProgramRecord>>();
|
||||
for(final Iterator<HeaderRecordAndFileHeader<SAMProgramRecord>> programGroupsLeftToProcessIterator = programGroupsLeftToProcess.iterator(); programGroupsLeftToProcessIterator.hasNext(); ) {
|
||||
final HeaderRecordAndFileHeader<SAMProgramRecord> pair = programGroupsLeftToProcessIterator.next();
|
||||
if(pair.getHeaderRecord().getAttribute(SAMProgramRecord.PREVIOUS_PROGRAM_GROUP_ID_TAG) == null) {
|
||||
programGroupsLeftToProcessIterator.remove();
|
||||
currentProgramGroups.add(pair);
|
||||
}
|
||||
}
|
||||
|
||||
//merge currentProgramGroups
|
||||
while(!currentProgramGroups.isEmpty())
|
||||
{
|
||||
final List<SAMProgramRecord> currentResult = new LinkedList<SAMProgramRecord>();
|
||||
|
||||
hasProgramGroupCollisions |= mergeHeaderRecords(currentProgramGroups, PROGRAM_RECORD_FACTORY, idsThatAreAlreadyTaken, samProgramGroupIdTranslation, currentResult);
|
||||
|
||||
//add currentResults to overallResults
|
||||
overallResult.addAll(currentResult);
|
||||
|
||||
//apply the newly-computed id translations to currentProgramGroups and programGroupsLeftToProcess
|
||||
currentProgramGroups = translateIds(currentProgramGroups, samProgramGroupIdTranslation, false);
|
||||
programGroupsLeftToProcess = translateIds(programGroupsLeftToProcess, samProgramGroupIdTranslation, true);
|
||||
|
||||
//find all records in programGroupsLeftToProcess whose ppId points to a record that was just processed (eg. a record that's in currentProgramGroups),
|
||||
//and move them to the list of programGroupsToProcessNext.
|
||||
LinkedList<HeaderRecordAndFileHeader<SAMProgramRecord>> programGroupsToProcessNext = new LinkedList<HeaderRecordAndFileHeader<SAMProgramRecord>>();
|
||||
for(final Iterator<HeaderRecordAndFileHeader<SAMProgramRecord>> programGroupsLeftToProcessIterator = programGroupsLeftToProcess.iterator(); programGroupsLeftToProcessIterator.hasNext(); ) {
|
||||
final HeaderRecordAndFileHeader<SAMProgramRecord> pairLeftToProcess = programGroupsLeftToProcessIterator.next();
|
||||
final Object ppIdOfRecordLeftToProcess = pairLeftToProcess.getHeaderRecord().getAttribute(SAMProgramRecord.PREVIOUS_PROGRAM_GROUP_ID_TAG);
|
||||
//find what currentProgramGroups this ppId points to (NOTE: they have to come from the same file)
|
||||
for(final HeaderRecordAndFileHeader<SAMProgramRecord> justProcessedPair : currentProgramGroups) {
|
||||
String idJustProcessed = justProcessedPair.getHeaderRecord().getId();
|
||||
if(pairLeftToProcess.getFileHeader() == justProcessedPair.getFileHeader() && ppIdOfRecordLeftToProcess.equals(idJustProcessed)) {
|
||||
programGroupsLeftToProcessIterator.remove();
|
||||
programGroupsToProcessNext.add(pairLeftToProcess);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
currentProgramGroups = programGroupsToProcessNext;
|
||||
}
|
||||
|
||||
//verify that all records were processed
|
||||
if(!programGroupsLeftToProcess.isEmpty()) {
|
||||
StringBuffer errorMsg = new StringBuffer(programGroupsLeftToProcess.size() + " program groups weren't processed. Do their PP ids point to existing PGs? \n");
|
||||
for( final HeaderRecordAndFileHeader<SAMProgramRecord> pair : programGroupsLeftToProcess ) {
|
||||
SAMProgramRecord record = pair.getHeaderRecord();
|
||||
errorMsg.append("@PG ID:"+record.getProgramGroupId()+" PN:"+record.getProgramName()+" PP:"+record.getPreviousProgramGroupId() +"\n");
|
||||
}
|
||||
throw new PicardException(errorMsg.toString());
|
||||
}
|
||||
|
||||
//sort the result list by record id
|
||||
Collections.sort(overallResult, RECORD_ID_COMPARATOR);
|
||||
|
||||
return overallResult;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Utility method that takes a list of program groups and remaps all their
|
||||
* ids (including ppIds if requested) using the given idTranslationTable.
|
||||
*
|
||||
* NOTE: when remapping, this method creates new SAMProgramRecords and
|
||||
* doesn't mutate any records in the programGroups list.
|
||||
*
|
||||
* @param programGroups The program groups to translate.
|
||||
* @param idTranslationTable The translation table.
|
||||
* @param translatePpIds Whether ppIds should be translated as well.
|
||||
*
|
||||
* @return The list of translated records.
|
||||
*/
|
||||
private List<HeaderRecordAndFileHeader<SAMProgramRecord>> translateIds(
|
||||
List<HeaderRecordAndFileHeader<SAMProgramRecord>> programGroups,
|
||||
Map<SAMFileHeader, Map<String, String>> idTranslationTable,
|
||||
boolean translatePpIds) {
|
||||
|
||||
//go through programGroups and translate any IDs and PPs based on the idTranslationTable.
|
||||
List<HeaderRecordAndFileHeader<SAMProgramRecord>> result = new LinkedList<HeaderRecordAndFileHeader<SAMProgramRecord>>();
|
||||
for(final HeaderRecordAndFileHeader<SAMProgramRecord> pair : programGroups ) {
|
||||
final SAMProgramRecord record = pair.getHeaderRecord();
|
||||
final String id = record.getProgramGroupId();
|
||||
final String ppId = (String) record.getAttribute(SAMProgramRecord.PREVIOUS_PROGRAM_GROUP_ID_TAG);
|
||||
|
||||
final SAMFileHeader header = pair.getFileHeader();
|
||||
final Map<String, String> translations = idTranslationTable.get(header);
|
||||
|
||||
//see if one or both ids need to be translated
|
||||
SAMProgramRecord translatedRecord = null;
|
||||
if(translations != null)
|
||||
{
|
||||
String translatedId = translations.get( id );
|
||||
String translatedPpId = translatePpIds ? translations.get( ppId ) : null;
|
||||
|
||||
boolean needToTranslateId = translatedId != null && !translatedId.equals(id);
|
||||
boolean needToTranslatePpId = translatedPpId != null && !translatedPpId.equals(ppId);
|
||||
|
||||
if(needToTranslateId && needToTranslatePpId) {
|
||||
translatedRecord = new SAMProgramRecord(translatedId, record);
|
||||
translatedRecord.setAttribute(SAMProgramRecord.PREVIOUS_PROGRAM_GROUP_ID_TAG, translatedPpId);
|
||||
} else if(needToTranslateId) {
|
||||
translatedRecord = new SAMProgramRecord(translatedId, record);
|
||||
} else if(needToTranslatePpId) {
|
||||
translatedRecord = new SAMProgramRecord(id, record);
|
||||
translatedRecord.setAttribute(SAMProgramRecord.PREVIOUS_PROGRAM_GROUP_ID_TAG, translatedPpId);
|
||||
}
|
||||
}
|
||||
|
||||
if(translatedRecord != null) {
|
||||
result.add(new HeaderRecordAndFileHeader<SAMProgramRecord>(translatedRecord, header));
|
||||
} else {
|
||||
result.add(pair); //keep the original record
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Utility method for merging a List of AbstractSAMHeaderRecords. If it finds
|
||||
* records that have identical ids and attributes, it will collapse them
|
||||
* into one record. If it finds records that have identical ids but
|
||||
* non-identical attributes, this is treated as a collision. When collision happens,
|
||||
* the records' ids are remapped, and an old-id to new-id mapping is added to the idTranslationTable.
|
||||
*
|
||||
* NOTE: Non-collided records also get recorded in the idTranslationTable as
|
||||
* old-id to old-id. This way, an idTranslationTable lookup should never return null.
|
||||
*
|
||||
* @param headerRecords The header records to merge.
|
||||
* @param headerRecordFactory Constructs a specific subclass of AbstractSAMHeaderRecord.
|
||||
* @param idsThatAreAlreadyTaken If the id of a headerRecord matches an id in this set, it will be treated as a collision, and the headRecord's id will be remapped.
|
||||
* @param idTranslationTable When records collide, their ids are remapped, and an old-id to new-id
|
||||
* mapping is added to the idTranslationTable. Non-collided records also get recorded in the idTranslationTable as
|
||||
* old-id to old-id. This way, an idTranslationTable lookup should never return null.
|
||||
*
|
||||
* @param result The list of merged header records.
|
||||
*
|
||||
* @return True if there were collisions.
|
||||
*/
|
||||
private <RecordType extends AbstractSAMHeaderRecord> boolean mergeHeaderRecords(final List<HeaderRecordAndFileHeader<RecordType>> headerRecords, HeaderRecordFactory<RecordType> headerRecordFactory,
|
||||
final HashSet<String> idsThatAreAlreadyTaken, Map<SAMFileHeader, Map<String, String>> idTranslationTable, List<RecordType> result) {
|
||||
|
||||
//The outer Map bins the header records by their ids. The nested Map further collapses
|
||||
//header records which, in addition to having the same id, also have identical attributes.
|
||||
//In other words, each key in the nested map represents one or more
|
||||
//header records which have both identical ids and identical attributes. The List of
|
||||
//SAMFileHeaders keeps track of which readers these header record(s) came from.
|
||||
final Map<String, Map<RecordType, List<SAMFileHeader>>> idToRecord =
|
||||
new HashMap<String, Map<RecordType, List<SAMFileHeader>>>();
|
||||
|
||||
//Populate the idToRecord and seenIds data structures
|
||||
for (final HeaderRecordAndFileHeader<RecordType> pair : headerRecords) {
|
||||
final RecordType record = pair.getHeaderRecord();
|
||||
final SAMFileHeader header = pair.getFileHeader();
|
||||
final String recordId = record.getId();
|
||||
Map<RecordType, List<SAMFileHeader>> recordsWithSameId = idToRecord.get(recordId);
|
||||
if(recordsWithSameId == null) {
|
||||
recordsWithSameId = new LinkedHashMap<RecordType, List<SAMFileHeader>>();
|
||||
idToRecord.put(recordId, recordsWithSameId);
|
||||
}
|
||||
|
||||
List<SAMFileHeader> fileHeaders = recordsWithSameId.get(record);
|
||||
if(fileHeaders == null) {
|
||||
fileHeaders = new LinkedList<SAMFileHeader>();
|
||||
recordsWithSameId.put(record, fileHeaders);
|
||||
}
|
||||
|
||||
fileHeaders.add(header);
|
||||
}
|
||||
|
||||
//Resolve any collisions between header records by remapping their ids.
|
||||
boolean hasCollisions = false;
|
||||
for (final Map.Entry<String, Map<RecordType, List<SAMFileHeader>>> entry : idToRecord.entrySet() )
|
||||
{
|
||||
final String recordId = entry.getKey();
|
||||
final Map<RecordType, List<SAMFileHeader>> recordsWithSameId = entry.getValue();
|
||||
|
||||
|
||||
for( Map.Entry<RecordType, List<SAMFileHeader>> recordWithUniqueAttr : recordsWithSameId.entrySet()) {
|
||||
final RecordType record = recordWithUniqueAttr.getKey();
|
||||
final List<SAMFileHeader> fileHeaders = recordWithUniqueAttr.getValue();
|
||||
|
||||
String newId;
|
||||
if(!idsThatAreAlreadyTaken.contains(recordId)) {
|
||||
//don't remap 1st record. If there are more records
|
||||
//with this id, they will be remapped in the 'else'.
|
||||
newId = recordId;
|
||||
idsThatAreAlreadyTaken.add(recordId);
|
||||
} else {
|
||||
//there is more than one record with this id.
|
||||
hasCollisions = true;
|
||||
|
||||
//find a unique newId for this record
|
||||
int idx=1;
|
||||
while(idsThatAreAlreadyTaken.contains(newId = recordId + "." + Integer.toString(idx++)))
|
||||
;
|
||||
|
||||
idsThatAreAlreadyTaken.add( newId );
|
||||
}
|
||||
|
||||
for(SAMFileHeader fileHeader : fileHeaders) {
|
||||
Map<String, String> readerTranslationTable = idTranslationTable.get(fileHeader);
|
||||
if(readerTranslationTable == null) {
|
||||
readerTranslationTable = new HashMap<String, String>();
|
||||
idTranslationTable.put(fileHeader, readerTranslationTable);
|
||||
}
|
||||
readerTranslationTable.put(recordId, newId);
|
||||
}
|
||||
|
||||
result.add( headerRecordFactory.createRecord(newId, record) );
|
||||
}
|
||||
}
|
||||
|
||||
return hasCollisions;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get the sequences off the SAMFileHeader. Throws runtime exception if the sequence
|
||||
* are different from one another.
|
||||
*
|
||||
* @param headers headers to pull sequences from
|
||||
* @return sequences from files. Each file should have the same sequence
|
||||
*/
|
||||
private SAMSequenceDictionary getSequenceDictionary(final Collection<SAMFileHeader> headers) {
|
||||
SAMSequenceDictionary sequences = null;
|
||||
for (final SAMFileHeader header : headers) {
|
||||
|
||||
if (sequences == null) {
|
||||
sequences = header.getSequenceDictionary();
|
||||
}
|
||||
else {
|
||||
final SAMSequenceDictionary currentSequences = header.getSequenceDictionary();
|
||||
SequenceUtil.assertSequenceDictionariesEqual(sequences, currentSequences);
|
||||
}
|
||||
}
|
||||
|
||||
return sequences;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the sequences from the SAMFileHeader, and merge the resulting sequence dictionaries.
|
||||
*
|
||||
* @param headers headers to pull sequences from
|
||||
* @return sequences from files. Each file should have the same sequence
|
||||
*/
|
||||
private SAMSequenceDictionary mergeSequenceDictionaries(final Collection<SAMFileHeader> headers) {
|
||||
SAMSequenceDictionary sequences = new SAMSequenceDictionary();
|
||||
for (final SAMFileHeader header : headers) {
|
||||
final SAMSequenceDictionary currentSequences = header.getSequenceDictionary();
|
||||
sequences = mergeSequences(sequences, currentSequences);
|
||||
}
|
||||
// second pass, make a map of the original seqeunce id -> new sequence id
|
||||
createSequenceMapping(headers, sequences);
|
||||
return sequences;
|
||||
}
|
||||
|
||||
/**
|
||||
* They've asked to merge the sequence headers. What we support right now is finding the sequence name superset.
|
||||
*
|
||||
* @param mergeIntoDict the result of merging so far. All SAMSequenceRecords in here have been cloned from the originals.
|
||||
* @param mergeFromDict A new sequence dictionary to merge into mergeIntoDict.
|
||||
* @return A new sequence dictionary that resulting from merging the two inputs.
|
||||
*/
|
||||
private SAMSequenceDictionary mergeSequences(SAMSequenceDictionary mergeIntoDict, SAMSequenceDictionary mergeFromDict) {
|
||||
|
||||
// a place to hold the sequences that we haven't found a home for, in the order the appear in mergeFromDict.
|
||||
LinkedList<SAMSequenceRecord> holder = new LinkedList<SAMSequenceRecord>();
|
||||
|
||||
// Return value will be created from this.
|
||||
LinkedList<SAMSequenceRecord> resultingDict = new LinkedList<SAMSequenceRecord>();
|
||||
for (final SAMSequenceRecord sequenceRecord : mergeIntoDict.getSequences()) {
|
||||
resultingDict.add(sequenceRecord);
|
||||
}
|
||||
|
||||
// Index into resultingDict of previous SAMSequenceRecord from mergeFromDict that already existed in mergeIntoDict.
|
||||
int prevloc = -1;
|
||||
// Previous SAMSequenceRecord from mergeFromDict that already existed in mergeIntoDict.
|
||||
SAMSequenceRecord previouslyMerged = null;
|
||||
|
||||
for (SAMSequenceRecord sequenceRecord : mergeFromDict.getSequences()) {
|
||||
// Does it already exist in resultingDict?
|
||||
int loc = getIndexOfSequenceName(resultingDict, sequenceRecord.getSequenceName());
|
||||
if (loc == -1) {
|
||||
// If doesn't already exist in resultingDict, save it an decide where to insert it later.
|
||||
holder.add(sequenceRecord.clone());
|
||||
} else if (prevloc > loc) {
|
||||
// If sequenceRecord already exists in resultingDict, but prior to the previous one
|
||||
// from mergeIntoDict that already existed, cannot merge.
|
||||
throw new PicardException("Cannot merge sequence dictionaries because sequence " +
|
||||
sequenceRecord.getSequenceName() + " and " + previouslyMerged.getSequenceName() +
|
||||
" are in different orders in two input sequence dictionaries.");
|
||||
} else {
|
||||
// Since sequenceRecord already exists in resultingDict, don't need to add it.
|
||||
// Add in all the sequences prior to it that have been held in holder.
|
||||
resultingDict.addAll(loc, holder);
|
||||
// Remember the index of sequenceRecord so can check for merge imcompatibility.
|
||||
prevloc = loc + holder.size();
|
||||
previouslyMerged = sequenceRecord;
|
||||
holder.clear();
|
||||
}
|
||||
}
|
||||
// Append anything left in holder.
|
||||
if (holder.size() != 0) {
|
||||
resultingDict.addAll(holder);
|
||||
}
|
||||
return new SAMSequenceDictionary(resultingDict);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find sequence in list.
|
||||
* @param list List to search for the sequence name.
|
||||
* @param sequenceName Name to search for.
|
||||
* @return Index of SAMSequenceRecord with the given name in list, or -1 if not found.
|
||||
*/
|
||||
private static int getIndexOfSequenceName(final List<SAMSequenceRecord> list, final String sequenceName) {
|
||||
for (int i = 0; i < list.size(); ++i) {
|
||||
if (list.get(i).getSequenceName().equals(sequenceName)) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* create the sequence mapping. This map is used to convert the unmerged header sequence ID's to the merged
|
||||
* list of sequence id's.
|
||||
* @param headers the collections of headers.
|
||||
* @param masterDictionary the superset dictionary we've created.
|
||||
*/
|
||||
private void createSequenceMapping(final Collection<SAMFileHeader> headers, SAMSequenceDictionary masterDictionary) {
|
||||
LinkedList<String> resultingDictStr = new LinkedList<String>();
|
||||
for (SAMSequenceRecord r : masterDictionary.getSequences()) {
|
||||
resultingDictStr.add(r.getSequenceName());
|
||||
}
|
||||
for (final SAMFileHeader header : headers) {
|
||||
Map<Integer, Integer> seqMap = new HashMap<Integer, Integer>();
|
||||
SAMSequenceDictionary dict = header.getSequenceDictionary();
|
||||
for (SAMSequenceRecord rec : dict.getSequences()) {
|
||||
seqMap.put(rec.getSequenceIndex(), resultingDictStr.indexOf(rec.getSequenceName()));
|
||||
}
|
||||
this.samSeqDictionaryIdTranslationViaHeader.put(header, seqMap);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Returns the read group id that should be used for the input read and RG id.
|
||||
*
|
||||
* @deprecated replaced by getReadGroupId(SAMFileHeader, String)
|
||||
* */
|
||||
public String getReadGroupId(final SAMFileReader reader, final String originalReadGroupId) {
|
||||
return getReadGroupId(reader.getFileHeader(), originalReadGroupId);
|
||||
}
|
||||
|
||||
/** Returns the read group id that should be used for the input read and RG id. */
|
||||
public String getReadGroupId(final SAMFileHeader header, final String originalReadGroupId) {
|
||||
return this.samReadGroupIdTranslation.get(header).get(originalReadGroupId);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param reader one of the input files
|
||||
* @param originalProgramGroupId a program group ID from the above input file
|
||||
* @return new ID from the merged list of program groups in the output file
|
||||
* @deprecated replaced by getProgramGroupId(SAMFileHeader, String)
|
||||
*/
|
||||
public String getProgramGroupId(final SAMFileReader reader, final String originalProgramGroupId) {
|
||||
return getProgramGroupId(reader.getFileHeader(), originalProgramGroupId);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param header one of the input headers
|
||||
* @param originalProgramGroupId a program group ID from the above input file
|
||||
* @return new ID from the merged list of program groups in the output file
|
||||
*/
|
||||
public String getProgramGroupId(final SAMFileHeader header, final String originalProgramGroupId) {
|
||||
return this.samProgramGroupIdTranslation.get(header).get(originalProgramGroupId);
|
||||
}
|
||||
|
||||
/** Returns true if there are read group duplicates within the merged headers. */
|
||||
public boolean hasReadGroupCollisions() {
|
||||
return this.hasReadGroupCollisions;
|
||||
}
|
||||
|
||||
/** Returns true if there are program group duplicates within the merged headers. */
|
||||
public boolean hasProgramGroupCollisions() {
|
||||
return hasProgramGroupCollisions;
|
||||
}
|
||||
|
||||
/** @return if we've merged the sequence dictionaries, return true */
|
||||
public boolean hasMergedSequenceDictionary() {
|
||||
return hasMergedSequenceDictionary;
|
||||
}
|
||||
|
||||
/** Returns the merged header that should be written to any output merged file. */
|
||||
public SAMFileHeader getMergedHeader() {
|
||||
return this.mergedHeader;
|
||||
}
|
||||
|
||||
/** Returns the collection of readers that this header merger is working with. May return null.
|
||||
* @deprecated replaced by getHeaders()
|
||||
*/
|
||||
public Collection<SAMFileReader> getReaders() {
|
||||
return this.readers;
|
||||
}
|
||||
|
||||
/** Returns the collection of readers that this header merger is working with.
|
||||
*/
|
||||
public Collection<SAMFileHeader> getHeaders() {
|
||||
return this.headers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells whether this header merger contains a given SAM file header. Note that header presence
|
||||
* is confirmed / blocked by == equality, rather than actually testing SAMFileHeader.equals(), for
|
||||
* reasons of performance.
|
||||
* @param header header to check for.
|
||||
* @return True if the header exists in this HeaderMerger. False otherwise.
|
||||
*/
|
||||
boolean containsHeader(SAMFileHeader header) {
|
||||
for(SAMFileHeader headerMergerHeader: headers) {
|
||||
if(headerMergerHeader == header)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* returns the new mapping for a specified reader, given it's old sequence index
|
||||
* @param reader the reader
|
||||
* @param oldReferenceSequenceIndex the old sequence (also called reference) index
|
||||
* @return the new index value
|
||||
* @deprecated replaced by getMergedSequenceIndex(SAMFileHeader, Integer)
|
||||
*/
|
||||
public Integer getMergedSequenceIndex(SAMFileReader reader, Integer oldReferenceSequenceIndex) {
|
||||
return this.getMergedSequenceIndex(reader.getFileHeader(), oldReferenceSequenceIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Another mechanism for getting the new sequence index, for situations in which the reader is not available.
|
||||
* Note that if the SAMRecord has already had its header replaced with the merged header, this won't work.
|
||||
* @param header The original header for the input record in question.
|
||||
* @param oldReferenceSequenceIndex The original sequence index.
|
||||
* @return the new index value that is compatible with the merged sequence index.
|
||||
*/
|
||||
public Integer getMergedSequenceIndex(final SAMFileHeader header, Integer oldReferenceSequenceIndex) {
|
||||
final Map<Integer, Integer> mapping = this.samSeqDictionaryIdTranslationViaHeader.get(header);
|
||||
if (mapping == null) {
|
||||
throw new PicardException("No sequence dictionary mapping available for header: " + header);
|
||||
}
|
||||
|
||||
final Integer newIndex = mapping.get(oldReferenceSequenceIndex);
|
||||
if (newIndex == null) {
|
||||
throw new PicardException("No mapping for reference index " + oldReferenceSequenceIndex + " from header: " + header);
|
||||
}
|
||||
|
||||
return newIndex;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Implementations of this interface are used by mergeHeaderRecords(..) to instantiate
|
||||
* specific subclasses of AbstractSAMHeaderRecord.
|
||||
*/
|
||||
private static interface HeaderRecordFactory<RecordType extends AbstractSAMHeaderRecord> {
|
||||
|
||||
/**
|
||||
* Constructs a new instance of RecordType.
|
||||
* @param id The id of the new record.
|
||||
* @param srcRecord Except for the id, the new record will be a copy of this source record.
|
||||
*/
|
||||
public RecordType createRecord(final String id, RecordType srcRecord);
|
||||
}
|
||||
|
||||
/**
|
||||
* Struct that groups together a subclass of AbstractSAMHeaderRecord with the
|
||||
* SAMFileHeader that it came from.
|
||||
*/
|
||||
private static class HeaderRecordAndFileHeader<RecordType extends AbstractSAMHeaderRecord> {
|
||||
private RecordType headerRecord;
|
||||
private SAMFileHeader samFileHeader;
|
||||
|
||||
public HeaderRecordAndFileHeader(RecordType headerRecord, SAMFileHeader samFileHeader) {
|
||||
this.headerRecord = headerRecord;
|
||||
this.samFileHeader = samFileHeader;
|
||||
}
|
||||
|
||||
public RecordType getHeaderRecord() {
|
||||
return headerRecord;
|
||||
}
|
||||
public SAMFileHeader getFileHeader() {
|
||||
return samFileHeader;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -64,6 +64,9 @@ import java.util.concurrent.*;
|
|||
public class SAMDataSource {
|
||||
final private static GATKSamRecordFactory factory = new GATKSamRecordFactory();
|
||||
|
||||
/** If true, we will load SAMReaders in parallel */
|
||||
final private static boolean USE_PARALLEL_LOADING = false;
|
||||
|
||||
/** Backing support for reads. */
|
||||
protected final ReadProperties readProperties;
|
||||
|
||||
|
|
@ -259,6 +262,11 @@ public class SAMDataSource {
|
|||
validationStringency = strictness;
|
||||
if(readBufferSize != null)
|
||||
ReadShard.setReadBufferSize(readBufferSize);
|
||||
else {
|
||||
// Choose a sensible default for the read buffer size. For the moment, we're picking 1000 reads per BAM per shard (which effectively
|
||||
// will mean per-thread once ReadWalkers are parallelized) with a max cap of 250K reads in memory at once.
|
||||
ReadShard.setReadBufferSize(Math.min(1000*samFiles.size(),250000));
|
||||
}
|
||||
|
||||
resourcePool = new SAMResourcePool(Integer.MAX_VALUE);
|
||||
SAMReaders readers = resourcePool.getAvailableReaders();
|
||||
|
|
@ -479,10 +487,13 @@ public class SAMDataSource {
|
|||
// Cache the most recently viewed read so that we can check whether we've reached the end of a pair.
|
||||
SAMRecord read = null;
|
||||
|
||||
Map<SAMFileReader,GATKBAMFileSpan> positionUpdates = new IdentityHashMap<SAMFileReader,GATKBAMFileSpan>();
|
||||
|
||||
CloseableIterator<SAMRecord> iterator = getIterator(readers,shard,sortOrder == SAMFileHeader.SortOrder.coordinate);
|
||||
while(!shard.isBufferFull() && iterator.hasNext()) {
|
||||
read = iterator.next();
|
||||
addReadToBufferingShard(shard,getReaderID(readers,read),read);
|
||||
shard.addRead(read);
|
||||
noteFilePositionUpdate(positionUpdates,read);
|
||||
}
|
||||
|
||||
// If the reads are sorted in queryname order, ensure that all reads
|
||||
|
|
@ -492,11 +503,21 @@ public class SAMDataSource {
|
|||
SAMRecord nextRead = iterator.next();
|
||||
if(read == null || !read.getReadName().equals(nextRead.getReadName()))
|
||||
break;
|
||||
addReadToBufferingShard(shard,getReaderID(readers,nextRead),nextRead);
|
||||
shard.addRead(nextRead);
|
||||
noteFilePositionUpdate(positionUpdates,nextRead);
|
||||
}
|
||||
}
|
||||
|
||||
iterator.close();
|
||||
|
||||
// Make the updates specified by the reader.
|
||||
for(Map.Entry<SAMFileReader,GATKBAMFileSpan> positionUpdate: positionUpdates.entrySet())
|
||||
readerPositions.put(readers.getReaderID(positionUpdate.getKey()),positionUpdate.getValue());
|
||||
}
|
||||
|
||||
private void noteFilePositionUpdate(Map<SAMFileReader,GATKBAMFileSpan> positionMapping, SAMRecord read) {
|
||||
GATKBAMFileSpan endChunk = new GATKBAMFileSpan(read.getFileSource().getFilePointer().getContentsFollowing());
|
||||
positionMapping.put(read.getFileSource().getReader(),endChunk);
|
||||
}
|
||||
|
||||
public StingSAMIterator seek(Shard shard) {
|
||||
|
|
@ -574,18 +595,6 @@ public class SAMDataSource {
|
|||
readProperties.defaultBaseQualities());
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds this read to the given shard.
|
||||
* @param shard The shard to which to add the read.
|
||||
* @param id The id of the given reader.
|
||||
* @param read The read to add to the shard.
|
||||
*/
|
||||
private void addReadToBufferingShard(Shard shard,SAMReaderID id,SAMRecord read) {
|
||||
GATKBAMFileSpan endChunk = new GATKBAMFileSpan(read.getFileSource().getFilePointer().getContentsFollowing());
|
||||
shard.addRead(read);
|
||||
readerPositions.put(id,endChunk);
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter reads based on user-specified criteria.
|
||||
*
|
||||
|
|
@ -714,68 +723,98 @@ public class SAMDataSource {
|
|||
* @param validationStringency validation stringency.
|
||||
*/
|
||||
public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) {
|
||||
final int N_THREADS = 8;
|
||||
int totalNumberOfFiles = readerIDs.size();
|
||||
final int totalNumberOfFiles = readerIDs.size();
|
||||
int readerNumber = 1;
|
||||
final SimpleTimer timer = new SimpleTimer().start();
|
||||
|
||||
ExecutorService executor = Executors.newFixedThreadPool(N_THREADS);
|
||||
final List<ReaderInitializer> inits = new ArrayList<ReaderInitializer>(totalNumberOfFiles);
|
||||
Queue<Future<ReaderInitializer>> futures = new LinkedList<Future<ReaderInitializer>>();
|
||||
for (SAMReaderID readerID: readerIDs) {
|
||||
logger.debug("Enqueuing for initialization: " + readerID.samFile);
|
||||
final ReaderInitializer init = new ReaderInitializer(readerID);
|
||||
inits.add(init);
|
||||
futures.add(executor.submit(init));
|
||||
}
|
||||
|
||||
final SimpleTimer timer = new SimpleTimer();
|
||||
try {
|
||||
final int MAX_WAIT = 30 * 1000;
|
||||
final int MIN_WAIT = 1 * 1000;
|
||||
|
||||
timer.start();
|
||||
while ( ! futures.isEmpty() ) {
|
||||
final int prevSize = futures.size();
|
||||
final double waitTime = prevSize * (0.5 / N_THREADS); // about 0.5 seconds to load each file
|
||||
final int waitTimeInMS = Math.min(MAX_WAIT, Math.max((int) (waitTime * 1000), MIN_WAIT));
|
||||
Thread.sleep(waitTimeInMS);
|
||||
|
||||
Queue<Future<ReaderInitializer>> pending = new LinkedList<Future<ReaderInitializer>>();
|
||||
for ( final Future<ReaderInitializer> initFuture : futures ) {
|
||||
if ( initFuture.isDone() ) {
|
||||
final ReaderInitializer init = initFuture.get();
|
||||
if (threadAllocation.getNumIOThreads() > 0) {
|
||||
inputStreams.put(init.readerID, init.blockInputStream); // get from initializer
|
||||
}
|
||||
logger.debug(String.format("Processing file (%d of %d) %s...", readerNumber++, totalNumberOfFiles, init.readerID));
|
||||
readers.put(init.readerID, init.reader);
|
||||
} else {
|
||||
pending.add(initFuture);
|
||||
}
|
||||
if ( totalNumberOfFiles > 0 ) logger.info("Initializing SAMRecords " + (USE_PARALLEL_LOADING ? "in parallel" : "in serial"));
|
||||
if ( ! USE_PARALLEL_LOADING ) {
|
||||
final int tickSize = 50;
|
||||
int nExecutedTotal = 0;
|
||||
long lastTick = timer.currentTime();
|
||||
for(final SAMReaderID readerID: readerIDs) {
|
||||
final ReaderInitializer init = new ReaderInitializer(readerID).call();
|
||||
if (threadAllocation.getNumIOThreads() > 0) {
|
||||
inputStreams.put(init.readerID, init.blockInputStream); // get from initializer
|
||||
}
|
||||
|
||||
final int pendingSize = pending.size();
|
||||
final int nExecutedInTick = prevSize - pendingSize;
|
||||
final int nExecutedTotal = totalNumberOfFiles - pendingSize;
|
||||
final double totalTimeInSeconds = timer.getElapsedTime();
|
||||
final double nTasksPerSecond = nExecutedTotal / (1.0*totalTimeInSeconds);
|
||||
final int nRemaining = pendingSize;
|
||||
final double estTimeToComplete = pendingSize / nTasksPerSecond;
|
||||
logger.info(String.format("Init %d BAMs in last %d s, %d of %d in %.2f s / %.2f m (%.2f tasks/s). %d remaining with est. completion in %.2f s / %.2f m",
|
||||
nExecutedInTick, (int)(waitTimeInMS / 1000.0),
|
||||
nExecutedTotal, totalNumberOfFiles, totalTimeInSeconds, totalTimeInSeconds / 60, nTasksPerSecond,
|
||||
nRemaining, estTimeToComplete, estTimeToComplete / 60));
|
||||
|
||||
futures = pending;
|
||||
logger.debug(String.format("Processing file (%d of %d) %s...", readerNumber++, totalNumberOfFiles, readerID.samFile));
|
||||
readers.put(init.readerID,init.reader);
|
||||
if ( ++nExecutedTotal % tickSize == 0) {
|
||||
double tickInSec = (timer.currentTime() - lastTick) / 1000.0;
|
||||
printReaderPerformance(nExecutedTotal, tickSize, totalNumberOfFiles, timer, tickInSec);
|
||||
lastTick = timer.currentTime();
|
||||
}
|
||||
}
|
||||
} catch ( InterruptedException e ) {
|
||||
throw new ReviewedStingException("Interrupted SAMReader initialization", e);
|
||||
} catch ( ExecutionException e ) {
|
||||
throw new ReviewedStingException("Execution exception during SAMReader initialization", e);
|
||||
} else {
|
||||
final int N_THREADS = 8;
|
||||
|
||||
final ExecutorService executor = Executors.newFixedThreadPool(N_THREADS);
|
||||
final List<ReaderInitializer> inits = new ArrayList<ReaderInitializer>(totalNumberOfFiles);
|
||||
Queue<Future<ReaderInitializer>> futures = new LinkedList<Future<ReaderInitializer>>();
|
||||
for (final SAMReaderID readerID: readerIDs) {
|
||||
logger.debug("Enqueuing for initialization: " + readerID.samFile);
|
||||
final ReaderInitializer init = new ReaderInitializer(readerID);
|
||||
inits.add(init);
|
||||
futures.add(executor.submit(init));
|
||||
}
|
||||
|
||||
try {
|
||||
final int MAX_WAIT = 30 * 1000;
|
||||
final int MIN_WAIT = 1 * 1000;
|
||||
|
||||
while ( ! futures.isEmpty() ) {
|
||||
final int prevSize = futures.size();
|
||||
final double waitTime = prevSize * (0.5 / N_THREADS); // about 0.5 seconds to load each file
|
||||
final int waitTimeInMS = Math.min(MAX_WAIT, Math.max((int) (waitTime * 1000), MIN_WAIT));
|
||||
Thread.sleep(waitTimeInMS);
|
||||
|
||||
Queue<Future<ReaderInitializer>> pending = new LinkedList<Future<ReaderInitializer>>();
|
||||
for ( final Future<ReaderInitializer> initFuture : futures ) {
|
||||
if ( initFuture.isDone() ) {
|
||||
final ReaderInitializer init = initFuture.get();
|
||||
if (threadAllocation.getNumIOThreads() > 0) {
|
||||
inputStreams.put(init.readerID, init.blockInputStream); // get from initializer
|
||||
}
|
||||
logger.debug(String.format("Processing file (%d of %d) %s...", readerNumber++, totalNumberOfFiles, init.readerID));
|
||||
readers.put(init.readerID, init.reader);
|
||||
} else {
|
||||
pending.add(initFuture);
|
||||
}
|
||||
}
|
||||
|
||||
final int nExecutedTotal = totalNumberOfFiles - pending.size();
|
||||
final int nExecutedInTick = prevSize - pending.size();
|
||||
printReaderPerformance(nExecutedTotal, nExecutedInTick, totalNumberOfFiles, timer, waitTimeInMS / 1000.0);
|
||||
futures = pending;
|
||||
}
|
||||
} catch ( InterruptedException e ) {
|
||||
throw new ReviewedStingException("Interrupted SAMReader initialization", e);
|
||||
} catch ( ExecutionException e ) {
|
||||
throw new ReviewedStingException("Execution exception during SAMReader initialization", e);
|
||||
}
|
||||
|
||||
executor.shutdown();
|
||||
}
|
||||
|
||||
logger.info(String.format("Done initializing BAM readers: total time %.2f", timer.getElapsedTime()));
|
||||
executor.shutdown();
|
||||
if ( totalNumberOfFiles > 0 ) logger.info(String.format("Done initializing BAM readers: total time %.2f", timer.getElapsedTime()));
|
||||
}
|
||||
|
||||
final private void printReaderPerformance(final int nExecutedTotal,
|
||||
final int nExecutedInTick,
|
||||
final int totalNumberOfFiles,
|
||||
final SimpleTimer timer,
|
||||
final double tickDurationInSec) {
|
||||
final int pendingSize = totalNumberOfFiles - nExecutedTotal;
|
||||
final double totalTimeInSeconds = timer.getElapsedTime();
|
||||
final double nTasksPerSecond = nExecutedTotal / (1.0*totalTimeInSeconds);
|
||||
final int nRemaining = pendingSize;
|
||||
final double estTimeToComplete = pendingSize / nTasksPerSecond;
|
||||
logger.info(String.format("Init %d BAMs in last %.2f s, %d of %d in %.2f s / %.2f m (%.2f tasks/s). %d remaining with est. completion in %.2f s / %.2f m",
|
||||
nExecutedInTick, tickDurationInSec,
|
||||
nExecutedTotal, totalNumberOfFiles, totalTimeInSeconds, totalTimeInSeconds / 60, nTasksPerSecond,
|
||||
nRemaining, estTimeToComplete, estTimeToComplete / 60));
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -175,7 +175,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
/////////////////////////////
|
||||
// Debug Arguments
|
||||
/////////////////////////////
|
||||
@Hidden
|
||||
@Advanced
|
||||
@Argument(fullName = "trustAllPolymorphic", shortName = "allPoly", doc = "Trust that all the input training sets' unfiltered records contain only polymorphic sites to drastically speed up the computation.", required = false)
|
||||
protected Boolean TRUST_ALL_POLYMORPHIC = false;
|
||||
//@Hidden
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.*;
|
|||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
|
|
@ -42,6 +43,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
|||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -140,7 +142,7 @@ import java.util.*;
|
|||
* -R ref.fasta \
|
||||
* -T SelectVariants \
|
||||
* --variant input.vcf \
|
||||
* -family NA12891+NA12892=NA12878 \
|
||||
* -bed family.ped \
|
||||
* -mvq 50 \
|
||||
* -o violations.vcf
|
||||
*
|
||||
|
|
@ -250,16 +252,6 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
@Argument(fullName="keepOriginalAC", shortName="keepOriginalAC", doc="Don't update the AC, AF, or AN values in the INFO field after selecting", required=false)
|
||||
private boolean KEEP_ORIGINAL_CHR_COUNTS = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName="family_structure_file", shortName="familyFile", doc="use -family unless you know what you're doing", required=false)
|
||||
private File FAMILY_STRUCTURE_FILE = null;
|
||||
|
||||
/**
|
||||
* String formatted as dad+mom=child where these parameters determine which sample names are examined.
|
||||
*/
|
||||
@Argument(fullName="family_structure", shortName="family", doc="string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false)
|
||||
private String FAMILY_STRUCTURE = "";
|
||||
|
||||
/**
|
||||
* This activates the mendelian violation module that will select all variants that correspond to a mendelian violation following the rules given by the family structure.
|
||||
*/
|
||||
|
|
@ -286,13 +278,21 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
private double fractionGenotypes = 0;
|
||||
|
||||
/**
|
||||
* This argument select particular kinds of variants out of a list. If left empty, there is no type selection and all variant types are considered for other selection criteria.
|
||||
* This argument select particular kinds of variants out of a list. If left empty, there is no type selection and all variant types are considered for other selection criteria.
|
||||
* When specified one or more times, a particular type of variant is selected.
|
||||
*
|
||||
*/
|
||||
*/
|
||||
@Argument(fullName="selectTypeToInclude", shortName="selectType", doc="Select only a certain type of variants from the input file. Valid types are INDEL, SNP, MIXED, MNP, SYMBOLIC, NO_VARIATION. Can be specified multiple times", required=false)
|
||||
private List<VariantContext.Type> TYPES_TO_INCLUDE = new ArrayList<VariantContext.Type>();
|
||||
|
||||
/**
|
||||
* If provided, we will only include variants whose ID field is present in this list of ids. The matching
|
||||
* is exact string matching. The file format is just one ID per line
|
||||
*
|
||||
*/
|
||||
@Argument(fullName="keepIDs", shortName="IDs", doc="Only emit sites whose ID is found in this file (one ID per line)", required=false)
|
||||
private File rsIDFile = null;
|
||||
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName="outMVFile", shortName="outMVFile", doc="", required=false)
|
||||
|
|
@ -313,9 +313,9 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
}
|
||||
|
||||
public enum NumberAlleleRestriction {
|
||||
ALL,
|
||||
BIALLELIC,
|
||||
MULTIALLELIC
|
||||
ALL,
|
||||
BIALLELIC,
|
||||
MULTIALLELIC
|
||||
}
|
||||
|
||||
private ArrayList<VariantContext.Type> selectedTypes = new ArrayList<VariantContext.Type>();
|
||||
|
|
@ -339,17 +339,13 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
private int positionToAdd = 0;
|
||||
private RandomVariantStructure [] variantArray;
|
||||
|
||||
|
||||
/* Variables used for random selection with AF boosting */
|
||||
private ArrayList<Double> afBreakpoints = null;
|
||||
private ArrayList<Double> afBoosts = null;
|
||||
double bkDelta = 0.0;
|
||||
|
||||
private PrintStream outMVFileStream = null;
|
||||
|
||||
//Random number generator for the genotypes to remove
|
||||
//Random number generator for the genotypes to remove
|
||||
private Random randomGenotypes = new Random();
|
||||
|
||||
private Set<String> IDsToKeep = null;
|
||||
|
||||
/**
|
||||
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
|
||||
*/
|
||||
|
|
@ -437,7 +433,18 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
if (SELECT_RANDOM_FRACTION) logger.info("Selecting approximately " + 100.0*fractionRandom + "% of the variants at random from the variant track");
|
||||
|
||||
|
||||
|
||||
/** load in the IDs file to a hashset for matching */
|
||||
if ( rsIDFile != null ) {
|
||||
IDsToKeep = new HashSet<String>();
|
||||
try {
|
||||
for ( final String line : new XReadLines(rsIDFile).readLines() ) {
|
||||
IDsToKeep.add(line.trim());
|
||||
}
|
||||
logger.info("Selecting only variants with one of " + IDsToKeep.size() + " IDs from " + rsIDFile);
|
||||
} catch ( FileNotFoundException e ) {
|
||||
throw new UserException.CouldNotReadInputFile(rsIDFile, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -460,20 +467,23 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
}
|
||||
|
||||
for (VariantContext vc : vcs) {
|
||||
if ( IDsToKeep != null && ! IDsToKeep.contains(vc.getID()) )
|
||||
continue;
|
||||
|
||||
if (MENDELIAN_VIOLATIONS && mv.countViolations(this.getSampleDB().getFamilies(samples),vc) < 1)
|
||||
break;
|
||||
break;
|
||||
|
||||
if (outMVFile != null){
|
||||
for( String familyId : mv.getViolationFamilies()){
|
||||
for(Sample sample : this.getSampleDB().getFamily(familyId)){
|
||||
if(sample.getParents().size() > 0){
|
||||
outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " +
|
||||
"childG=%s childGL=%s\n",vc.getChr(), vc.getStart(),
|
||||
vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getCalledChrCount(vc.getAlternateAllele(0)),
|
||||
sample.getMaternalID(), sample.getPaternalID(), sample.getID(),
|
||||
vc.getGenotype(sample.getMaternalID()).toBriefString(), vc.getGenotype(sample.getMaternalID()).getLikelihoods().getAsString(),
|
||||
vc.getGenotype(sample.getPaternalID()).toBriefString(), vc.getGenotype(sample.getPaternalID()).getLikelihoods().getAsString(),
|
||||
vc.getGenotype(sample.getID()).toBriefString(),vc.getGenotype(sample.getID()).getLikelihoods().getAsString() );
|
||||
outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " +
|
||||
"childG=%s childGL=%s\n",vc.getChr(), vc.getStart(),
|
||||
vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getCalledChrCount(vc.getAlternateAllele(0)),
|
||||
sample.getMaternalID(), sample.getPaternalID(), sample.getID(),
|
||||
vc.getGenotype(sample.getMaternalID()).toBriefString(), vc.getGenotype(sample.getMaternalID()).getLikelihoods().getAsString(),
|
||||
vc.getGenotype(sample.getPaternalID()).toBriefString(), vc.getGenotype(sample.getPaternalID()).getLikelihoods().getAsString(),
|
||||
vc.getGenotype(sample.getID()).toBriefString(),vc.getGenotype(sample.getID()).getLikelihoods().getAsString() );
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -513,10 +523,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
else if (!SELECT_RANDOM_FRACTION || ( GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) {
|
||||
vcfWriter.add(sub);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
|
@ -647,7 +654,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
|
||||
// if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs (because they are no longer accurate)
|
||||
if ( vc.getAlleles().size() != sub.getAlleles().size() )
|
||||
newGC = VariantContextUtils.stripPLs(sub.getGenotypes());
|
||||
newGC = VariantContextUtils.stripPLs(sub.getGenotypes());
|
||||
|
||||
//Remove a fraction of the genotypes if needed
|
||||
if(fractionGenotypes>0){
|
||||
|
|
@ -655,10 +662,10 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
for ( Genotype genotype : newGC ) {
|
||||
//Set genotype to no call if it falls in the fraction.
|
||||
if(fractionGenotypes>0 && randomGenotypes.nextDouble()<fractionGenotypes){
|
||||
ArrayList<Allele> alleles = new ArrayList<Allele>(2);
|
||||
alleles.add(Allele.create((byte)'.'));
|
||||
alleles.add(Allele.create((byte)'.'));
|
||||
genotypes.add(new Genotype(genotype.getSampleName(),alleles, Genotype.NO_LOG10_PERROR,genotype.getFilters(),new HashMap<String, Object>(),false));
|
||||
ArrayList<Allele> alleles = new ArrayList<Allele>(2);
|
||||
alleles.add(Allele.create((byte)'.'));
|
||||
alleles.add(Allele.create((byte)'.'));
|
||||
genotypes.add(new Genotype(genotype.getSampleName(),alleles, Genotype.NO_LOG10_PERROR,genotype.getFilters(),new HashMap<String, Object>(),false));
|
||||
}
|
||||
else{
|
||||
genotypes.add(genotype);
|
||||
|
|
|
|||
|
|
@ -109,7 +109,7 @@ public class RScriptExecutor {
|
|||
|
||||
List<File> tempFiles = new ArrayList<File>();
|
||||
try {
|
||||
File tempLibDir = IOUtils.tempDir("R.", ".lib");
|
||||
File tempLibDir = IOUtils.tempDir("Rlib.", "");
|
||||
tempFiles.add(tempLibDir);
|
||||
|
||||
StringBuilder expression = new StringBuilder("tempLibDir = '").append(tempLibDir).append("';");
|
||||
|
|
|
|||
|
|
@ -654,4 +654,18 @@ public class Utils {
|
|||
// handle exception
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static byte [] arrayFromArrayWithLength(byte[] array, int length) {
|
||||
byte [] output = new byte[length];
|
||||
for (int j = 0; j < length; j++)
|
||||
output[j] = array[(j % array.length)];
|
||||
return output;
|
||||
}
|
||||
|
||||
public static void fillArrayWithByte(byte[] array, byte value) {
|
||||
for (int i=0; i<array.length; i++)
|
||||
array[i] = value;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -460,17 +460,20 @@ public class ClippingOp {
|
|||
int newShift = 0;
|
||||
int oldShift = 0;
|
||||
|
||||
boolean readHasStarted = false; // if the new cigar is composed of S and H only, we have to traverse the entire old cigar to calculate the shift
|
||||
for (CigarElement cigarElement : newCigar.getCigarElements()) {
|
||||
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP)
|
||||
newShift += cigarElement.getLength();
|
||||
else
|
||||
else {
|
||||
readHasStarted = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (CigarElement cigarElement : oldCigar.getCigarElements()) {
|
||||
if (cigarElement.getOperator() == CigarOperator.HARD_CLIP || cigarElement.getOperator() == CigarOperator.SOFT_CLIP )
|
||||
oldShift += Math.min(cigarElement.getLength(), newShift - oldShift);
|
||||
else
|
||||
else if (readHasStarted)
|
||||
break;
|
||||
}
|
||||
return newShift - oldShift;
|
||||
|
|
|
|||
|
|
@ -63,12 +63,18 @@ public class ReadClipper {
|
|||
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
|
||||
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
|
||||
|
||||
if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1))
|
||||
return new GATKSAMRecord(read.getHeader());
|
||||
|
||||
if (start < 0 || stop > read.getReadLength() - 1)
|
||||
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
|
||||
|
||||
if ( start > stop )
|
||||
throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!");
|
||||
|
||||
if ( start > 0 && stop < read.getReadLength() - 1)
|
||||
throw new ReviewedStingException(String.format("Trying to clip the middle of the read: start %d, stop %d, cigar: %s", start, stop, read.getCigarString()));
|
||||
|
||||
this.addOp(new ClippingOp(start, stop));
|
||||
GATKSAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||
this.ops = null;
|
||||
|
|
@ -76,6 +82,9 @@ public class ReadClipper {
|
|||
}
|
||||
|
||||
public GATKSAMRecord hardClipByReadCoordinates(int start, int stop) {
|
||||
if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1))
|
||||
return new GATKSAMRecord(read.getHeader());
|
||||
|
||||
this.addOp(new ClippingOp(start, stop));
|
||||
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||
}
|
||||
|
|
@ -172,7 +181,7 @@ public class ReadClipper {
|
|||
//check if the clipped read can still be clipped in the range requested
|
||||
if (op.start < clippedRead.getReadLength()) {
|
||||
ClippingOp fixedOperation = op;
|
||||
if (op.stop > clippedRead.getReadLength())
|
||||
if (op.stop >= clippedRead.getReadLength())
|
||||
fixedOperation = new ClippingOp(op.start, clippedRead.getReadLength() - 1);
|
||||
|
||||
clippedRead = fixedOperation.apply(algorithm, clippedRead);
|
||||
|
|
@ -195,16 +204,12 @@ public class ReadClipper {
|
|||
|
||||
for(CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
||||
if (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP &&
|
||||
cigarElement.getOperator() != CigarOperator.INSERTION && cigarElement.getOperator() != CigarOperator.DELETION)
|
||||
cigarElement.getOperator() != CigarOperator.INSERTION)
|
||||
break;
|
||||
|
||||
else if (cigarElement.getOperator() == CigarOperator.INSERTION) {
|
||||
else if (cigarElement.getOperator() == CigarOperator.INSERTION)
|
||||
this.addOp(new ClippingOp(0, cigarElement.getLength() - 1));
|
||||
}
|
||||
|
||||
else if (cigarElement.getOperator() == CigarOperator.DELETION) {
|
||||
throw new ReviewedStingException("No read should start with a deletion. Aligner bug?");
|
||||
}
|
||||
}
|
||||
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -79,7 +79,7 @@ public class IOUtils {
|
|||
tempDirParent = FileUtils.getTempDirectory();
|
||||
if (!tempDirParent.exists() && !tempDirParent.mkdirs())
|
||||
throw new UserException.BadTmpDir("Could not create temp directory: " + tempDirParent);
|
||||
File temp = File.createTempFile(prefix + "-", suffix, tempDirParent);
|
||||
File temp = File.createTempFile(prefix, suffix, tempDirParent);
|
||||
if (!temp.delete())
|
||||
throw new UserException.BadTmpDir("Could not delete sub file: " + temp.getAbsolutePath());
|
||||
if (!temp.mkdir())
|
||||
|
|
|
|||
|
|
@ -910,4 +910,9 @@ public class ReadUtils {
|
|||
return comp.compare(read1, read2);
|
||||
}
|
||||
|
||||
// TEST UTILITIES
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -619,8 +619,9 @@ public class VariantContextUtils {
|
|||
if (vc.alleles.size() == 1)
|
||||
continue;
|
||||
if ( hasPLIncompatibleAlleles(alleles, vc.alleles)) {
|
||||
logger.warn(String.format("Stripping PLs at %s due incompatible alleles merged=%s vs. single=%s",
|
||||
genomeLocParser.createGenomeLoc(vc), alleles, vc.alleles));
|
||||
if ( ! genotypes.isEmpty() )
|
||||
logger.warn(String.format("Stripping PLs at %s due incompatible alleles merged=%s vs. single=%s",
|
||||
genomeLocParser.createGenomeLoc(vc), alleles, vc.alleles));
|
||||
genotypes = stripPLs(genotypes);
|
||||
// this will remove stale AC,AF attributed from vc
|
||||
calculateChromosomeCounts(vc, attributes, true);
|
||||
|
|
|
|||
|
|
@ -1,18 +1,12 @@
|
|||
package org.broadinstitute.sting;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.log4j.*;
|
||||
import org.apache.log4j.spi.LoggingEvent;
|
||||
import org.broadinstitute.sting.commandline.CommandLineUtils;
|
||||
import org.broadinstitute.sting.gatk.walkers.diffengine.DiffEngine;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.testng.Assert;
|
||||
import org.broadinstitute.sting.utils.io.IOUtils;
|
||||
|
||||
import javax.swing.*;
|
||||
import java.io.*;
|
||||
import java.math.BigInteger;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
|
@ -78,8 +72,8 @@ public abstract class BaseTest {
|
|||
public static final String hg19Intervals = intervalsLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list";
|
||||
public static final String hg19Chr20Intervals = intervalsLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.chr20.interval_list";
|
||||
|
||||
public static final String networkTempDir = "/broad/shptmp/" + System.getProperty("user.name") + "/";
|
||||
public static final File networkTempDirFile = new File(networkTempDir);
|
||||
public static final String networkTempDir;
|
||||
public static final File networkTempDirFile;
|
||||
|
||||
public static final File testDirFile = new File("public/testdata/");
|
||||
public static final String testDir = testDirFile.getAbsolutePath() + "/";
|
||||
|
|
@ -99,6 +93,10 @@ public abstract class BaseTest {
|
|||
// Set the Root logger to only output warnings.
|
||||
logger.setLevel(Level.WARN);
|
||||
|
||||
networkTempDirFile = IOUtils.tempDir("temp.", ".dir", new File("/broad/shptmp/" + System.getProperty("user.name")));
|
||||
networkTempDirFile.deleteOnExit();
|
||||
networkTempDir = networkTempDirFile.getAbsolutePath() + "/";
|
||||
|
||||
// find our file sources
|
||||
// if (!fileExist(hg18Reference) || !fileExist(hg19Reference) || !fileExist(b36KGReference)) {
|
||||
// logger.fatal("We can't locate the reference directories. Aborting!");
|
||||
|
|
@ -233,18 +231,12 @@ public abstract class BaseTest {
|
|||
|
||||
/**
|
||||
* Creates a temp file that will be deleted on exit after tests are complete.
|
||||
* @param name Prefix of the file.
|
||||
* @param extension Extension to concat to the end of the file.
|
||||
* @return A file in the network temporary directory starting with name, ending with extension, which will be deleted after the program exits.
|
||||
* @param name Name of the file.
|
||||
* @return A file in the network temporary directory with name, which will be deleted after the program exits.
|
||||
*/
|
||||
public static File createNetworkTempFile(String name, String extension) {
|
||||
try {
|
||||
FileUtils.forceMkdir(networkTempDirFile);
|
||||
File file = File.createTempFile(name, extension, networkTempDirFile);
|
||||
file.deleteOnExit();
|
||||
return file;
|
||||
} catch (IOException ex) {
|
||||
throw new ReviewedStingException("Cannot create temp file: " + ex.getMessage(), ex);
|
||||
}
|
||||
public static File createNetworkTempFile(String name) {
|
||||
File file = new File(networkTempDirFile, name);
|
||||
file.deleteOnExit();
|
||||
return file;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ public class JnaSessionIntegrationTest extends BaseTest {
|
|||
return;
|
||||
}
|
||||
|
||||
File outFile = createNetworkTempFile("JnaSessionIntegrationTest-", ".out");
|
||||
File outFile = createNetworkTempFile("JnaSessionIntegrationTest.out");
|
||||
Session session = factory.getSession();
|
||||
session.init(null);
|
||||
try {
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@ public class LibDrmaaIntegrationTest extends BaseTest {
|
|||
|
||||
@Test(dependsOnMethods = { "testDrmaa" })
|
||||
public void testSubmitEcho() throws Exception {
|
||||
if (implementation.indexOf("LSF") >= 0) {
|
||||
if (implementation.contains("LSF")) {
|
||||
System.err.println(" *********************************************************");
|
||||
System.err.println(" ***********************************************************");
|
||||
System.err.println(" **** ****");
|
||||
|
|
@ -101,7 +101,7 @@ public class LibDrmaaIntegrationTest extends BaseTest {
|
|||
Memory error = new Memory(LibDrmaa.DRMAA_ERROR_STRING_BUFFER);
|
||||
int errnum;
|
||||
|
||||
File outFile = createNetworkTempFile("LibDrmaaIntegrationTest-", ".out");
|
||||
File outFile = createNetworkTempFile("LibDrmaaIntegrationTest.out");
|
||||
|
||||
errnum = LibDrmaa.drmaa_init(null, error, LibDrmaa.DRMAA_ERROR_STRING_BUFFER_LEN);
|
||||
|
||||
|
|
|
|||
|
|
@ -93,7 +93,7 @@ public class LibBatIntegrationTest extends BaseTest {
|
|||
@Test
|
||||
public void testSubmitEcho() throws Exception {
|
||||
String queue = "hour";
|
||||
File outFile = createNetworkTempFile("LibBatIntegrationTest-", ".out");
|
||||
File outFile = createNetworkTempFile("LibBatIntegrationTest.out");
|
||||
|
||||
submit req = new submit();
|
||||
|
||||
|
|
|
|||
|
|
@ -1,18 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.clipreads;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: roger
|
||||
* Date: 11/29/11
|
||||
* Time: 4:53 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class CigarStringTestPair {
|
||||
public String toTest;
|
||||
public String expected;
|
||||
|
||||
public CigarStringTestPair(String ToTest, String Expected) {
|
||||
this.toTest = ToTest;
|
||||
this.expected = Expected;
|
||||
}
|
||||
}
|
||||
|
|
@ -2,8 +2,8 @@ package org.broadinstitute.sting.utils.clipreads;
|
|||
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.TextCigarCodec;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
|
|
@ -21,9 +21,149 @@ public class ClipReadsTestUtils {
|
|||
//Should contain all the utils needed for tests to mass produce
|
||||
//reads, cigars, and other needed classes
|
||||
|
||||
final static String BASES = "ACTG";
|
||||
final static String QUALS = "!+5?"; //ASCII values = 33,43,53,63
|
||||
final static byte [] BASES = {'A', 'C', 'T', 'G'};
|
||||
final static byte [] QUALS = {2, 15, 25, 30};
|
||||
final static String CIGAR = "4M";
|
||||
final static CigarElement[] cigarElements = { new CigarElement(1, CigarOperator.HARD_CLIP),
|
||||
new CigarElement(1, CigarOperator.SOFT_CLIP),
|
||||
new CigarElement(1, CigarOperator.INSERTION),
|
||||
new CigarElement(1, CigarOperator.DELETION),
|
||||
new CigarElement(1, CigarOperator.MATCH_OR_MISMATCH)};
|
||||
|
||||
|
||||
public static GATKSAMRecord makeReadFromCigar(Cigar cigar) {
|
||||
return ArtificialSAMUtils.createArtificialRead(Utils.arrayFromArrayWithLength(BASES, cigar.getReadLength()), Utils.arrayFromArrayWithLength(QUALS, cigar.getReadLength()), cigar.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* This function generates every valid permutation of cigar strings with a given length.
|
||||
*
|
||||
* A valid cigar object obeys the following rules:
|
||||
* - No Hard/Soft clips in the middle of the read
|
||||
* - No deletions in the beginning / end of the read
|
||||
* - No repeated adjacent element (e.g. 1M2M -> this should be 3M)
|
||||
*
|
||||
* @param maximumLength the maximum number of elements in the cigar
|
||||
* @return a list with all valid Cigar objects
|
||||
*/
|
||||
public static List<Cigar> generateCigarList(int maximumLength) {
|
||||
int numCigarElements = cigarElements.length;
|
||||
LinkedList<Cigar> cigarList = new LinkedList<Cigar>();
|
||||
byte [] cigarCombination = new byte[maximumLength];
|
||||
|
||||
Utils.fillArrayWithByte(cigarCombination, (byte) 0); // we start off with all 0's in the combination array.
|
||||
int currentIndex = 0;
|
||||
while (true) {
|
||||
Cigar cigar = createCigarFromCombination(cigarCombination); // create the cigar
|
||||
cigar = combineAdjacentCigarElements(cigar); // combine adjacent elements
|
||||
if (isCigarValid(cigar)) { // check if it's valid
|
||||
cigarList.add(cigar); // add it
|
||||
}
|
||||
|
||||
boolean currentIndexChanged = false;
|
||||
while (currentIndex < maximumLength && cigarCombination[currentIndex] == numCigarElements - 1) {
|
||||
currentIndex++; // find the next index to increment
|
||||
currentIndexChanged = true; // keep track of the fact that we have changed indices!
|
||||
}
|
||||
|
||||
if (currentIndex == maximumLength) // if we hit the end of the array, we're done.
|
||||
break;
|
||||
|
||||
cigarCombination[currentIndex]++; // otherwise advance the current index
|
||||
|
||||
if (currentIndexChanged) { // if we have changed index, then...
|
||||
for (int i = 0; i < currentIndex; i++)
|
||||
cigarCombination[i] = 0; // reset everything from 0->currentIndex
|
||||
currentIndex = 0; // go back to the first index
|
||||
}
|
||||
}
|
||||
|
||||
return cigarList;
|
||||
}
|
||||
|
||||
private static boolean isCigarValid(Cigar cigar) {
|
||||
if (cigar.isValid(null, -1) == null) { // This should take care of most invalid Cigar Strings (picard's "exhaustive" implementation)
|
||||
|
||||
Stack<CigarElement> cigarElementStack = new Stack<CigarElement>(); // Stack to invert cigar string to find ending operator
|
||||
CigarOperator startingOp = null;
|
||||
CigarOperator endingOp = null;
|
||||
|
||||
// check if it doesn't start with deletions
|
||||
boolean readHasStarted = false; // search the list of elements for the starting operator
|
||||
for (CigarElement cigarElement : cigar.getCigarElements()) {
|
||||
if (!readHasStarted) {
|
||||
if (cigarElement.getOperator() != CigarOperator.SOFT_CLIP && cigarElement.getOperator() != CigarOperator.HARD_CLIP) {
|
||||
readHasStarted = true;
|
||||
startingOp = cigarElement.getOperator();
|
||||
}
|
||||
}
|
||||
cigarElementStack.push(cigarElement);
|
||||
}
|
||||
|
||||
readHasStarted = false; // search the inverted list of elements (stack) for the stopping operator
|
||||
while (!cigarElementStack.empty()) {
|
||||
CigarElement cigarElement = cigarElementStack.pop();
|
||||
if (cigarElement.getOperator() != CigarOperator.SOFT_CLIP && cigarElement.getOperator() != CigarOperator.HARD_CLIP) {
|
||||
readHasStarted = true;
|
||||
endingOp = cigarElement.getOperator();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (startingOp != CigarOperator.DELETION && endingOp != CigarOperator.DELETION && startingOp != CigarOperator.INSERTION && endingOp != CigarOperator.INSERTION)
|
||||
return true; // we don't accept reads starting or ending in deletions (add any other constraint here)
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private static Cigar createCigarFromCombination(byte[] cigarCombination) {
|
||||
Cigar cigar = new Cigar();
|
||||
for (byte i : cigarCombination) {
|
||||
cigar.add(cigarElements[i]);
|
||||
}
|
||||
return cigar;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Combines equal adjacent elements of a Cigar object
|
||||
*
|
||||
* @param rawCigar the cigar object
|
||||
* @return a combined cigar object
|
||||
*/
|
||||
private static Cigar combineAdjacentCigarElements(Cigar rawCigar) {
|
||||
Cigar combinedCigar = new Cigar();
|
||||
CigarElement lastElement = null;
|
||||
int lastElementLength = 0;
|
||||
for (CigarElement cigarElement : rawCigar.getCigarElements()) {
|
||||
if (lastElement != null && lastElement.getOperator() == cigarElement.getOperator())
|
||||
lastElementLength += cigarElement.getLength();
|
||||
else
|
||||
{
|
||||
if (lastElement != null)
|
||||
combinedCigar.add(new CigarElement(lastElementLength, lastElement.getOperator()));
|
||||
|
||||
lastElement = cigarElement;
|
||||
lastElementLength = cigarElement.getLength();
|
||||
}
|
||||
}
|
||||
if (lastElement != null)
|
||||
combinedCigar.add(new CigarElement(lastElementLength, lastElement.getOperator()));
|
||||
|
||||
return combinedCigar;
|
||||
}
|
||||
|
||||
public static GATKSAMRecord makeRead() {
|
||||
return ArtificialSAMUtils.createArtificialRead(BASES, QUALS, CIGAR);
|
||||
}
|
||||
|
||||
/**
|
||||
* Asserts that the two reads have the same bases, qualities and cigar strings
|
||||
*
|
||||
* @param actual the calculated read
|
||||
* @param expected the expected read
|
||||
*/
|
||||
public static void assertEqualReads(GATKSAMRecord actual, GATKSAMRecord expected) {
|
||||
// If they're both not empty, test their contents
|
||||
if(!actual.isEmpty() && !expected.isEmpty()) {
|
||||
|
|
@ -36,162 +176,16 @@ public class ClipReadsTestUtils {
|
|||
Assert.assertEquals(actual.isEmpty(), expected.isEmpty());
|
||||
}
|
||||
|
||||
public static void testBaseQualCigar(GATKSAMRecord read, byte[] readBases, byte[] baseQuals, String cigar) {
|
||||
// Because quals to char start at 33 for visibility
|
||||
baseQuals = subtractToArray(baseQuals, 33);
|
||||
public static Cigar invertCigar (Cigar cigar) {
|
||||
Stack<CigarElement> cigarStack = new Stack<CigarElement>();
|
||||
for (CigarElement cigarElement : cigar.getCigarElements())
|
||||
cigarStack.push(cigarElement);
|
||||
|
||||
Assert.assertEquals(read.getReadBases(), readBases);
|
||||
Assert.assertEquals(read.getBaseQualities(), baseQuals);
|
||||
Assert.assertEquals(read.getCigarString(), cigar);
|
||||
Cigar invertedCigar = new Cigar();
|
||||
while (!cigarStack.isEmpty())
|
||||
invertedCigar.add(cigarStack.pop());
|
||||
|
||||
return invertedCigar;
|
||||
}
|
||||
|
||||
public static void testCigar(GATKSAMRecord read, String cigar) {
|
||||
Assert.assertEquals(read.getCigarString(), cigar);
|
||||
}
|
||||
|
||||
public static void testBaseQual(GATKSAMRecord read, byte[] readBases, byte[] baseQuals) {
|
||||
// Because quals to chars start at 33 for visibility
|
||||
baseQuals = subtractToArray(baseQuals, 33);
|
||||
|
||||
if (readBases.length > 0 && baseQuals.length > 0) {
|
||||
Assert.assertEquals(read.getReadBases(), readBases);
|
||||
Assert.assertEquals(read.getBaseQualities(), baseQuals);
|
||||
} else
|
||||
Assert.assertTrue(read.isEmpty());
|
||||
}
|
||||
|
||||
public static byte[] subtractToArray(byte[] array, int n) {
|
||||
if (array == null)
|
||||
return null;
|
||||
|
||||
byte[] output = new byte[array.length];
|
||||
|
||||
for (int i = 0; i < array.length; i++)
|
||||
output[i] = (byte) (array[i] - n);
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
// What the test read looks like
|
||||
// Ref: 10 11 12 13 14 15 16 17
|
||||
// Read: 0 1 2 3 - - - -
|
||||
// --------------------------------
|
||||
// Bases: A C T G - - - -
|
||||
// Quals: ! + 5 ? - - - -
|
||||
|
||||
public static GATKSAMRecord makeRead() {
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
|
||||
GATKSAMRecord output = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 10, BASES.length());
|
||||
output.setReadBases(new String(BASES).getBytes());
|
||||
output.setBaseQualityString(new String(QUALS));
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
public static GATKSAMRecord makeReadFromCigar(Cigar cigar) {
|
||||
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
|
||||
GATKSAMRecord output = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 10, cigar.getReadLength());
|
||||
output.setReadBases(cycleString(BASES, cigar.getReadLength()).getBytes());
|
||||
output.setBaseQualityString(cycleString(QUALS, cigar.getReadLength()));
|
||||
output.setCigar(cigar);
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
private static String cycleString(String string, int length) {
|
||||
String output = "";
|
||||
int cycles = (length / string.length()) + 1;
|
||||
|
||||
for (int i = 1; i < cycles; i++)
|
||||
output += string;
|
||||
|
||||
for (int j = 0; output.length() < length; j++)
|
||||
output += string.charAt(j % string.length());
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
public static Set<Cigar> generateCigars() {
|
||||
|
||||
// This function generates every permutation of cigar strings we need.
|
||||
|
||||
LinkedHashSet<Cigar> output = new LinkedHashSet<Cigar>();
|
||||
|
||||
List<Cigar> clippingOptionsStart = new LinkedList<Cigar>();
|
||||
clippingOptionsStart.add(new Cigar());
|
||||
clippingOptionsStart.add(TextCigarCodec.getSingleton().decode("1H1S"));
|
||||
clippingOptionsStart.add(TextCigarCodec.getSingleton().decode("1S"));
|
||||
clippingOptionsStart.add(TextCigarCodec.getSingleton().decode("1H"));
|
||||
|
||||
LinkedList<Cigar> clippingOptionsEnd = new LinkedList<Cigar>();
|
||||
clippingOptionsEnd.add(new Cigar());
|
||||
clippingOptionsEnd.add(TextCigarCodec.getSingleton().decode("1S1H"));
|
||||
clippingOptionsEnd.add(TextCigarCodec.getSingleton().decode("1S"));
|
||||
clippingOptionsEnd.add(TextCigarCodec.getSingleton().decode("1H"));
|
||||
|
||||
|
||||
LinkedList<Cigar> indelOptions1 = new LinkedList<Cigar>();
|
||||
indelOptions1.add(new Cigar());
|
||||
//indelOptions1.add( TextCigarCodec.getSingleton().decode("1I1D"));
|
||||
//indelOptions1.add( TextCigarCodec.getSingleton().decode("1D1I") );
|
||||
indelOptions1.add(TextCigarCodec.getSingleton().decode("1I"));
|
||||
indelOptions1.add(TextCigarCodec.getSingleton().decode("1D"));
|
||||
|
||||
LinkedList<Cigar> indelOptions2 = new LinkedList<Cigar>();
|
||||
indelOptions2.add(new Cigar());
|
||||
indelOptions2.add(TextCigarCodec.getSingleton().decode("1I"));
|
||||
indelOptions2.add(null);
|
||||
|
||||
|
||||
// Start With M as base CigarElements, M,
|
||||
|
||||
LinkedList<Cigar> base = new LinkedList<Cigar>();
|
||||
base.add(TextCigarCodec.getSingleton().decode("1M"));
|
||||
base.add(TextCigarCodec.getSingleton().decode("5M"));
|
||||
base.add(TextCigarCodec.getSingleton().decode("25M"));
|
||||
// Should indel be added as a base?
|
||||
|
||||
// Nested loops W00t!
|
||||
for (Cigar Base : base) {
|
||||
for (Cigar indelStart : indelOptions1) {
|
||||
for (Cigar indelEnd : indelOptions2) {
|
||||
for (Cigar clipStart : clippingOptionsStart) {
|
||||
for (Cigar clipEnd : clippingOptionsEnd) {
|
||||
// Create a list of Cigar Elements and construct Cigar
|
||||
List<CigarElement> CigarBuilder = new ArrayList<CigarElement>();
|
||||
// add starting clipping (H/S)
|
||||
CigarBuilder.addAll(clipStart.getCigarElements());
|
||||
// add first base (M)
|
||||
CigarBuilder.addAll(Base.getCigarElements());
|
||||
// add first indel
|
||||
CigarBuilder.addAll(indelStart.getCigarElements());
|
||||
// add second base (M)
|
||||
CigarBuilder.addAll(Base.getCigarElements());
|
||||
// add another indel or nothing (M)
|
||||
if (indelEnd != null)
|
||||
CigarBuilder.addAll(indelEnd.getCigarElements());
|
||||
// add final clipping (S/H)
|
||||
CigarBuilder.addAll(clipEnd.getCigarElements());
|
||||
|
||||
|
||||
output.add(new Cigar(removeConsecutiveElements(CigarBuilder)));
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
private static List<CigarElement> removeConsecutiveElements(List<CigarElement> cigarBuilder) {
|
||||
LinkedList<CigarElement> output = new LinkedList<CigarElement>();
|
||||
for (CigarElement E : cigarBuilder) {
|
||||
if (output.isEmpty() || output.getLast().getOperator() != E.getOperator())
|
||||
output.add(E);
|
||||
}
|
||||
return output;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,8 +5,6 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|||
import org.testng.annotations.BeforeTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -27,43 +25,43 @@ public class ClippingOpUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
private void testHardClip() {
|
||||
List<TestParameter> testList = new LinkedList<TestParameter>();
|
||||
testList.add(new TestParameter(0, 0, 1, 4, "1H3M"));//clip 1 base at start
|
||||
testList.add(new TestParameter(3, 3, 0, 3, "3M1H"));//clip 1 base at end
|
||||
testList.add(new TestParameter(0, 1, 2, 4, "2H2M"));//clip 2 bases at start
|
||||
testList.add(new TestParameter(2, 3, 0, 2, "2M2H"));//clip 2 bases at end
|
||||
testList.add(new TestParameter(0, 2, 3, 4, "3H1M"));//clip 3 bases at start
|
||||
testList.add(new TestParameter(1, 3, 0, 1, "1M3H"));//clip 3 bases at end
|
||||
|
||||
for (TestParameter p : testList) {
|
||||
init();
|
||||
clippingOp = new ClippingOp(p.inputStart, p.inputStop);
|
||||
logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.substringStart + "," + p.substringStop + "," + p.cigar);
|
||||
ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.HARDCLIP_BASES, read),
|
||||
ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
|
||||
ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
|
||||
p.cigar);
|
||||
}
|
||||
// List<TestParameter> testList = new LinkedList<TestParameter>();
|
||||
// testList.add(new TestParameter(0, 0, 1, 4, "1H3M"));//clip 1 base at start
|
||||
// testList.add(new TestParameter(3, 3, 0, 3, "3M1H"));//clip 1 base at end
|
||||
// testList.add(new TestParameter(0, 1, 2, 4, "2H2M"));//clip 2 bases at start
|
||||
// testList.add(new TestParameter(2, 3, 0, 2, "2M2H"));//clip 2 bases at end
|
||||
// testList.add(new TestParameter(0, 2, 3, 4, "3H1M"));//clip 3 bases at start
|
||||
// testList.add(new TestParameter(1, 3, 0, 1, "1M3H"));//clip 3 bases at end
|
||||
//
|
||||
// for (TestParameter p : testList) {
|
||||
// init();
|
||||
// clippingOp = new ClippingOp(p.inputStart, p.inputStop);
|
||||
// logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.substringStart + "," + p.substringStop + "," + p.cigar);
|
||||
// ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.HARDCLIP_BASES, read),
|
||||
// ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
|
||||
// ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
|
||||
// p.cigar);
|
||||
// }
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
private void testSoftClip() {
|
||||
List<TestParameter> testList = new LinkedList<TestParameter>();
|
||||
testList.add(new TestParameter(0, 0, -1, -1, "1S3M"));//clip 1 base at start
|
||||
testList.add(new TestParameter(3, 3, -1, -1, "3M1S"));//clip 1 base at end
|
||||
testList.add(new TestParameter(0, 1, -1, -1, "2S2M"));//clip 2 bases at start
|
||||
testList.add(new TestParameter(2, 3, -1, -1, "2M2S"));//clip 2 bases at end
|
||||
testList.add(new TestParameter(0, 2, -1, -1, "3S1M"));//clip 3 bases at start
|
||||
testList.add(new TestParameter(1, 3, -1, -1, "1M3S"));//clip 3 bases at end
|
||||
|
||||
for (TestParameter p : testList) {
|
||||
init();
|
||||
clippingOp = new ClippingOp(p.inputStart, p.inputStop);
|
||||
logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.cigar);
|
||||
ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.SOFTCLIP_BASES, read),
|
||||
ClipReadsTestUtils.BASES.getBytes(), ClipReadsTestUtils.QUALS.getBytes(), p.cigar);
|
||||
}
|
||||
// List<TestParameter> testList = new LinkedList<TestParameter>();
|
||||
// testList.add(new TestParameter(0, 0, -1, -1, "1S3M"));//clip 1 base at start
|
||||
// testList.add(new TestParameter(3, 3, -1, -1, "3M1S"));//clip 1 base at end
|
||||
// testList.add(new TestParameter(0, 1, -1, -1, "2S2M"));//clip 2 bases at start
|
||||
// testList.add(new TestParameter(2, 3, -1, -1, "2M2S"));//clip 2 bases at end
|
||||
// testList.add(new TestParameter(0, 2, -1, -1, "3S1M"));//clip 3 bases at start
|
||||
// testList.add(new TestParameter(1, 3, -1, -1, "1M3S"));//clip 3 bases at end
|
||||
//
|
||||
// for (TestParameter p : testList) {
|
||||
// init();
|
||||
// clippingOp = new ClippingOp(p.inputStart, p.inputStop);
|
||||
// logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.cigar);
|
||||
// ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.SOFTCLIP_BASES, read),
|
||||
// ClipReadsTestUtils.BASES.getBytes(), ClipReadsTestUtils.QUALS.getBytes(), p.cigar);
|
||||
// }
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,212 +28,181 @@ package org.broadinstitute.sting.utils.clipreads;
|
|||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.TextCigarCodec;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: roger
|
||||
* Date: 9/28/11
|
||||
* Time: 9:54 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ReadClipperUnitTest extends BaseTest {
|
||||
|
||||
// TODO: exception testing, make cases that should fail will fail
|
||||
List<Cigar> cigarList;
|
||||
int maximumCigarSize = 6; // 6 is the minimum necessary number to try all combinations of cigar types with guarantee of clipping an element with length = 2
|
||||
|
||||
// TODO: add indels to all test cases
|
||||
|
||||
ReadClipper readClipper;
|
||||
|
||||
@BeforeMethod
|
||||
@BeforeClass
|
||||
public void init() {
|
||||
readClipper = new ReadClipper(ClipReadsTestUtils.makeRead());
|
||||
cigarList = ClipReadsTestUtils.generateCigarList(maximumCigarSize);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testHardClipBothEndsByReferenceCoordinates() {
|
||||
|
||||
logger.warn("Executing testHardClipBothEndsByReferenceCoordinates");
|
||||
//int debug = 1;
|
||||
//Clip whole read
|
||||
Assert.assertEquals(readClipper.hardClipBothEndsByReferenceCoordinates(10, 10), new GATKSAMRecord(readClipper.read.getHeader()));
|
||||
|
||||
//clip 1 base
|
||||
ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipBothEndsByReferenceCoordinates(10, 13),
|
||||
ClipReadsTestUtils.BASES.substring(1, 3).getBytes(), ClipReadsTestUtils.QUALS.substring(1, 3).getBytes(),
|
||||
"1H2M1H");
|
||||
|
||||
List<CigarStringTestPair> cigarStringTestPairs = new LinkedList<CigarStringTestPair>();
|
||||
cigarStringTestPairs.add(new CigarStringTestPair("5M1D1M2I4M5I6M1D3M2I100M", "1H4M1D1M2I4M5I6M1D3M2I99M1H"));
|
||||
//cigarStringTestPairs.add( new CigarStringTestPair("5M1I1M2I1M","1H4M1I1M2I1H"));
|
||||
cigarStringTestPairs.add(new CigarStringTestPair("1S1M1I1M1I1M1I1M1I1M1I1M1S", "1H1M1I1M1I1M1I1M1I1M1I1M1H"));
|
||||
cigarStringTestPairs.add(new CigarStringTestPair("1S1M1D1M1D1M1D1M1D1M1D1M1S", "1H1M1D1M1D1M1D1M1D1M1D1M1H"));
|
||||
|
||||
for (CigarStringTestPair pair : cigarStringTestPairs) {
|
||||
readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar(TextCigarCodec.getSingleton().decode(pair.toTest)));
|
||||
ClipReadsTestUtils.testCigar(readClipper.hardClipBothEndsByReferenceCoordinates(
|
||||
ReadUtils.getRefCoordSoftUnclippedStart(readClipper.read),
|
||||
ReadUtils.getRefCoordSoftUnclippedEnd(readClipper.read)),
|
||||
pair.expected);
|
||||
}
|
||||
/*
|
||||
for ( Cigar cigar: ClipReadsTestUtils.generateCigars() ) {
|
||||
// The read has to be long enough to clip one base from each side
|
||||
// This also filters a lot of cigars
|
||||
if ( cigar.getReadLength() > 26 ) {
|
||||
readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar( cigar ));
|
||||
System.out.println( "Testing Cigar: "+cigar.toString() ) ;
|
||||
//cigar length reference plus soft clip
|
||||
|
||||
ClipReadsTestUtils.testBaseQual(
|
||||
readClipper.hardClipBothEndsByReferenceCoordinates(
|
||||
ReadUtils.getRefCoordSoftUnclippedStart(readClipper.read),
|
||||
ReadUtils.getRefCoordSoftUnclippedEnd(readClipper.read) ),
|
||||
readClipper.read.getReadString().substring(1, (cigar.getReadLength() - 1)).getBytes(),
|
||||
readClipper.read.getBaseQualityString().substring(1, (cigar.getReadLength() - 1)).getBytes());
|
||||
for (Cigar cigar : cigarList) {
|
||||
GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar);
|
||||
int alnStart = read.getAlignmentStart();
|
||||
int alnEnd = read.getAlignmentEnd();
|
||||
int readLength = alnStart - alnEnd;
|
||||
for (int i=0; i<readLength/2; i++) {
|
||||
GATKSAMRecord clippedRead = (new ReadClipper(read)).hardClipBothEndsByReferenceCoordinates(alnStart + i, alnEnd - i);
|
||||
Assert.assertTrue(clippedRead.getAlignmentStart() >= alnStart + i, String.format("Clipped alignment start is less than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString()));
|
||||
Assert.assertTrue(clippedRead.getAlignmentEnd() <= alnEnd + i, String.format("Clipped alignment end is greater than original read (minus %d): %s -> %s", i, read.getCigarString(), clippedRead.getCigarString()));
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testHardClipByReadCoordinates() {
|
||||
for (Cigar cigar : cigarList) {
|
||||
GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar);
|
||||
int readLength = read.getReadLength();
|
||||
for (int i=0; i<readLength; i++) {
|
||||
GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReadCoordinates(0, i);
|
||||
Assert.assertTrue(clipLeft.getReadLength() <= readLength - i, String.format("Clipped read length is greater than original read length (minus %d): %s -> %s", i, read.getCigarString(), clipLeft.getCigarString()));
|
||||
|
||||
logger.warn("Executing testHardClipByReadCoordinates");
|
||||
|
||||
//Clip whole read
|
||||
Assert.assertEquals(readClipper.hardClipByReadCoordinates(0, 3), new GATKSAMRecord(readClipper.read.getHeader()));
|
||||
|
||||
List<TestParameter> testList = new LinkedList<TestParameter>();
|
||||
testList.add(new TestParameter(0, 0, 1, 4, "1H3M"));//clip 1 base at start
|
||||
testList.add(new TestParameter(3, 3, 0, 3, "3M1H"));//clip 1 base at end
|
||||
testList.add(new TestParameter(0, 1, 2, 4, "2H2M"));//clip 2 bases at start
|
||||
testList.add(new TestParameter(2, 3, 0, 2, "2M2H"));//clip 2 bases at end
|
||||
|
||||
for (TestParameter p : testList) {
|
||||
init();
|
||||
//logger.warn("Testing Parameters: " + p.inputStart+","+p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar);
|
||||
ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipByReadCoordinates(p.inputStart, p.inputStop),
|
||||
ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
|
||||
ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
|
||||
p.cigar);
|
||||
GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReadCoordinates(i, readLength-1);
|
||||
Assert.assertTrue(clipRight.getReadLength() <= i, String.format("Clipped read length is greater than original read length (minus %d): %s -> %s", i, read.getCigarString(), clipRight.getCigarString()));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testHardClipByReferenceCoordinates() {
|
||||
logger.warn("Executing testHardClipByReferenceCoordinates");
|
||||
//logger.warn(debug);
|
||||
//Clip whole read
|
||||
Assert.assertEquals(readClipper.hardClipByReferenceCoordinates(10, 13), new GATKSAMRecord(readClipper.read.getHeader()));
|
||||
for (Cigar cigar : cigarList) {
|
||||
GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar);
|
||||
int alnStart = read.getAlignmentStart();
|
||||
int alnEnd = read.getAlignmentEnd();
|
||||
for (int i=alnStart; i<=alnEnd; i++) {
|
||||
if (ReadUtils.getRefCoordSoftUnclippedStart(read) == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
|
||||
GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(alnStart, i);
|
||||
if (!clipLeft.isEmpty())
|
||||
Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
|
||||
}
|
||||
|
||||
List<TestParameter> testList = new LinkedList<TestParameter>();
|
||||
testList.add(new TestParameter(-1, 10, 1, 4, "1H3M"));//clip 1 base at start
|
||||
testList.add(new TestParameter(13, -1, 0, 3, "3M1H"));//clip 1 base at end
|
||||
testList.add(new TestParameter(-1, 11, 2, 4, "2H2M"));//clip 2 bases at start
|
||||
testList.add(new TestParameter(12, -1, 0, 2, "2M2H"));//clip 2 bases at end
|
||||
|
||||
for (TestParameter p : testList) {
|
||||
init();
|
||||
//logger.warn("Testing Parameters: " + p.inputStart+","+p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar);
|
||||
ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipByReferenceCoordinates(p.inputStart, p.inputStop),
|
||||
ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
|
||||
ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
|
||||
p.cigar);
|
||||
}
|
||||
|
||||
List<CigarStringTestPair> cigarStringTestPairs = new LinkedList<CigarStringTestPair>();
|
||||
cigarStringTestPairs.add(new CigarStringTestPair("5M1D1M2I4M5I6M1D3M2I100M", "1H4M1D1M2I4M5I6M1D3M2I100M"));
|
||||
//cigarStringTestPairs.add( new CigarStringTestPair("5M1I1M2I1M","1H4M1I1M2I1M"));
|
||||
cigarStringTestPairs.add(new CigarStringTestPair("1S1M1I1M1I1M1I1M1I1M1I1M1S", "1H1M1I1M1I1M1I1M1I1M1I1M1S"));
|
||||
cigarStringTestPairs.add(new CigarStringTestPair("1S1M1D1M1D1M1D1M1D1M1D1M1S", "1H1M1D1M1D1M1D1M1D1M1D1M1S"));
|
||||
|
||||
//Clips only first base
|
||||
for (CigarStringTestPair pair : cigarStringTestPairs) {
|
||||
readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar(TextCigarCodec.getSingleton().decode(pair.toTest)));
|
||||
ClipReadsTestUtils.testCigar(readClipper.hardClipByReadCoordinates(0, 0), pair.expected);
|
||||
}
|
||||
/*
|
||||
for ( Cigar cigar: ClipReadsTestUtils.generateCigars() ) {
|
||||
// The read has to be long enough to clip one base
|
||||
// This also filters a lot of cigars
|
||||
if ( cigar.getReadLength() > 26 ) {
|
||||
readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar( cigar ));
|
||||
System.out.println( "Testing Cigar: "+cigar.toString() ) ;
|
||||
//cigar length reference plus soft clip
|
||||
|
||||
// Clip first read
|
||||
ClipReadsTestUtils.testBaseQual(
|
||||
readClipper.hardClipByReadCoordinates(0,0),
|
||||
readClipper.read.getReadString().substring(1, cigar.getReadLength()).getBytes(),
|
||||
readClipper.read.getBaseQualityString().substring(1, cigar.getReadLength()).getBytes());
|
||||
if (ReadUtils.getRefCoordSoftUnclippedEnd(read) == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
|
||||
GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, alnEnd);
|
||||
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
|
||||
Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString()));
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testHardClipByReferenceCoordinatesLeftTail() {
|
||||
init();
|
||||
logger.warn("Executing testHardClipByReferenceCoordinatesLeftTail");
|
||||
|
||||
//Clip whole read
|
||||
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesLeftTail(13), new GATKSAMRecord(readClipper.read.getHeader()));
|
||||
|
||||
List<TestParameter> testList = new LinkedList<TestParameter>();
|
||||
testList.add(new TestParameter(10, -1, 1, 4, "1H3M"));//clip 1 base at start
|
||||
testList.add(new TestParameter(11, -1, 2, 4, "2H2M"));//clip 2 bases at start
|
||||
|
||||
for (TestParameter p : testList) {
|
||||
init();
|
||||
//logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar);
|
||||
ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipByReferenceCoordinatesLeftTail(p.inputStart),
|
||||
ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
|
||||
ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
|
||||
p.cigar);
|
||||
for (Cigar cigar : cigarList) {
|
||||
GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar);
|
||||
int alnStart = read.getAlignmentStart();
|
||||
int alnEnd = read.getAlignmentEnd();
|
||||
if (ReadUtils.getRefCoordSoftUnclippedStart(read) == alnStart) { // we can't test left clipping if the read has hanging soft clips on the left side
|
||||
for (int i=alnStart; i<=alnEnd; i++) {
|
||||
GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(alnStart, i);
|
||||
if (!clipLeft.isEmpty())
|
||||
Assert.assertTrue(clipLeft.getAlignmentStart() >= i + 1, String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getAlignmentStart(), i + 1, read.getCigarString(), clipLeft.getCigarString()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testHardClipByReferenceCoordinatesRightTail() {
|
||||
init();
|
||||
logger.warn("Executing testHardClipByReferenceCoordinatesRightTail");
|
||||
|
||||
//Clip whole read
|
||||
Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesRightTail(10), new GATKSAMRecord(readClipper.read.getHeader()));
|
||||
|
||||
List<TestParameter> testList = new LinkedList<TestParameter>();
|
||||
testList.add(new TestParameter(-1, 13, 0, 3, "3M1H"));//clip 1 base at end
|
||||
testList.add(new TestParameter(-1, 12, 0, 2, "2M2H"));//clip 2 bases at end
|
||||
|
||||
for (TestParameter p : testList) {
|
||||
init();
|
||||
//logger.warn("Testing Parameters: " + p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar);
|
||||
ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipByReferenceCoordinatesRightTail(p.inputStop),
|
||||
ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
|
||||
ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
|
||||
p.cigar);
|
||||
for (Cigar cigar : cigarList) {
|
||||
GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar);
|
||||
int alnStart = read.getAlignmentStart();
|
||||
int alnEnd = read.getAlignmentEnd();
|
||||
if (ReadUtils.getRefCoordSoftUnclippedEnd(read) == alnEnd) { // we can't test right clipping if the read has hanging soft clips on the right side
|
||||
for (int i=alnStart; i<=alnEnd; i++) {
|
||||
GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, alnEnd);
|
||||
if (!clipRight.isEmpty() && clipRight.getAlignmentStart() <= clipRight.getAlignmentEnd()) // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
|
||||
Assert.assertTrue(clipRight.getAlignmentEnd() <= i - 1, String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getAlignmentEnd(), i - 1, read.getCigarString(), clipRight.getCigarString()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testHardClipLowQualEnds() {
|
||||
logger.warn("Executing testHardClipLowQualEnds");
|
||||
final byte LOW_QUAL = 2;
|
||||
final byte HIGH_QUAL = 30;
|
||||
|
||||
// Testing clipping that ends inside an insertion
|
||||
// create a read for every cigar permutation
|
||||
for (Cigar cigar : cigarList) {
|
||||
GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar);
|
||||
int readLength = read.getReadLength();
|
||||
byte [] quals = new byte[readLength];
|
||||
|
||||
for (int nLowQualBases = 0; nLowQualBases < readLength; nLowQualBases++) {
|
||||
|
||||
// create a read with nLowQualBases in the left tail
|
||||
Utils.fillArrayWithByte(quals, HIGH_QUAL);
|
||||
for (int addLeft = 0; addLeft < nLowQualBases; addLeft++)
|
||||
quals[addLeft] = LOW_QUAL;
|
||||
read.setBaseQualities(quals);
|
||||
GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipLowQualEnds(LOW_QUAL);
|
||||
|
||||
// Tests
|
||||
|
||||
// Make sure the low qualities are gone
|
||||
assertNoLowQualBases(clipLeft, LOW_QUAL);
|
||||
|
||||
// Can't run this test with the current contract of no hanging insertions
|
||||
//Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipLeft.getCigarString()));
|
||||
|
||||
// create a read with nLowQualBases in the right tail
|
||||
Utils.fillArrayWithByte(quals, HIGH_QUAL);
|
||||
for (int addRight = 0; addRight < nLowQualBases; addRight++)
|
||||
quals[readLength - addRight - 1] = LOW_QUAL;
|
||||
read.setBaseQualities(quals);
|
||||
GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipLowQualEnds(LOW_QUAL);
|
||||
|
||||
// Tests
|
||||
|
||||
// Make sure the low qualities are gone
|
||||
assertNoLowQualBases(clipRight, LOW_QUAL);
|
||||
|
||||
// Make sure we haven't clipped any high quals -- Can't run this test with the current contract of no hanging insertions
|
||||
//Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipRight.getCigarString()));
|
||||
|
||||
// create a read with nLowQualBases in the both tails
|
||||
if (nLowQualBases <= readLength/2) {
|
||||
Utils.fillArrayWithByte(quals, HIGH_QUAL);
|
||||
for (int addBoth = 0; addBoth < nLowQualBases; addBoth++) {
|
||||
quals[addBoth] = LOW_QUAL;
|
||||
quals[readLength - addBoth - 1] = LOW_QUAL;
|
||||
}
|
||||
read.setBaseQualities(quals);
|
||||
GATKSAMRecord clipBoth = (new ReadClipper(read)).hardClipLowQualEnds(LOW_QUAL);
|
||||
|
||||
// Tests
|
||||
|
||||
// Make sure the low qualities are gone
|
||||
assertNoLowQualBases(clipBoth, LOW_QUAL);
|
||||
|
||||
// Can't run this test with the current contract of no hanging insertions
|
||||
//Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - (2*nLowQualBases), read.getCigarString(), clipBoth.getCigarString()));
|
||||
}
|
||||
}
|
||||
// logger.warn(String.format("Testing %s for all combinations of low/high qual... PASSED", read.getCigarString()));
|
||||
}
|
||||
|
||||
// ONE OFF Testing clipping that ends inside an insertion ( Ryan's bug )
|
||||
final byte[] BASES = {'A','C','G','T','A','C','G','T'};
|
||||
final byte[] QUALS = {2, 2, 2, 2, 20, 20, 20, 2};
|
||||
final String CIGAR = "1S1M5I1S";
|
||||
|
|
@ -248,17 +217,15 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
|
||||
ReadClipper lowQualClipper = new ReadClipper(read);
|
||||
ClipReadsTestUtils.assertEqualReads(lowQualClipper.hardClipLowQualEnds((byte) 2), expected);
|
||||
|
||||
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testHardClipSoftClippedBases() {
|
||||
|
||||
// Generate a list of cigars to test
|
||||
for (Cigar cigar : ClipReadsTestUtils.generateCigars()) {
|
||||
for (Cigar cigar : cigarList) {
|
||||
GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar);
|
||||
readClipper = new ReadClipper(read);
|
||||
ReadClipper readClipper = new ReadClipper(read);
|
||||
GATKSAMRecord clippedRead = readClipper.hardClipSoftClippedBases();
|
||||
|
||||
int sumHardClips = 0;
|
||||
|
|
@ -301,7 +268,59 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
Assert.assertTrue( sumMatches == 0, String.format("Cigar %s -> %s -- FAILED (number of matches mismatched by %d)", read.getCigarString(), clippedRead.getCigarString(), sumMatches));
|
||||
|
||||
|
||||
logger.warn(String.format("Cigar %s -> %s -- PASSED!", read.getCigarString(), clippedRead.getCigarString()));
|
||||
// logger.warn(String.format("Cigar %s -> %s -- PASSED!", read.getCigarString(), clippedRead.getCigarString()));
|
||||
}
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testHardClipLeadingInsertions() {
|
||||
for (Cigar cigar : cigarList) {
|
||||
if (startsWithInsertion(cigar)) {
|
||||
GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar);
|
||||
GATKSAMRecord clippedRead = (new ReadClipper(read)).hardClipLeadingInsertions();
|
||||
|
||||
int expectedLength = read.getReadLength() - leadingInsertionLength(read.getCigar());
|
||||
if (cigarHasElementsDifferentThanInsertionsAndHardClips(read.getCigar()))
|
||||
expectedLength -= leadingInsertionLength(ClipReadsTestUtils.invertCigar(read.getCigar()));
|
||||
|
||||
if (! clippedRead.isEmpty()) {
|
||||
Assert.assertEquals(expectedLength, clippedRead.getReadLength(), String.format("%s -> %s", read.getCigarString(), clippedRead.getCigarString())); // check that everything else is still there
|
||||
Assert.assertFalse(startsWithInsertion(clippedRead.getCigar())); // check that the insertions are gone
|
||||
}
|
||||
else
|
||||
Assert.assertTrue(expectedLength == 0, String.format("expected length: %d", expectedLength)); // check that the read was expected to be fully clipped
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
private void assertNoLowQualBases(GATKSAMRecord read, byte low_qual) {
|
||||
if (!read.isEmpty()) {
|
||||
byte [] quals = read.getBaseQualities();
|
||||
for (int i=0; i<quals.length; i++)
|
||||
Assert.assertFalse(quals[i] <= low_qual, String.format("Found low qual (%d) base after hard clipping. Position: %d -- %s", low_qual, i, read.getCigarString()));
|
||||
}
|
||||
}
|
||||
|
||||
private boolean startsWithInsertion(Cigar cigar) {
|
||||
return leadingInsertionLength(cigar) > 0;
|
||||
}
|
||||
|
||||
private int leadingInsertionLength(Cigar cigar) {
|
||||
for (CigarElement cigarElement : cigar.getCigarElements()) {
|
||||
if (cigarElement.getOperator() == CigarOperator.INSERTION)
|
||||
return cigarElement.getLength();
|
||||
if (cigarElement.getOperator() != CigarOperator.HARD_CLIP)
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
private boolean cigarHasElementsDifferentThanInsertionsAndHardClips (Cigar cigar) {
|
||||
for (CigarElement cigarElement : cigar.getCigarElements())
|
||||
if (cigarElement.getOperator() != CigarOperator.INSERTION && cigarElement.getOperator() != CigarOperator.HARD_CLIP)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,24 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.clipreads;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: roger
|
||||
* Date: 11/28/11
|
||||
* Time: 4:07 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class TestParameter {
|
||||
int inputStart;
|
||||
int inputStop;
|
||||
int substringStart;
|
||||
int substringStop;
|
||||
String cigar;
|
||||
|
||||
public TestParameter(int InputStart, int InputStop, int SubstringStart, int SubstringStop, String Cigar) {
|
||||
inputStart = InputStart;
|
||||
inputStop = InputStop;
|
||||
substringStart = SubstringStart;
|
||||
substringStop = SubstringStop;
|
||||
cigar = Cigar;
|
||||
}
|
||||
}
|
||||
|
|
@ -19,7 +19,7 @@ class ExampleCountLoci extends QScript {
|
|||
@Output
|
||||
var out: File = _
|
||||
|
||||
def script = {
|
||||
def script() {
|
||||
val countLoci = new CountLoci
|
||||
countLoci.reference_sequence = referenceFile
|
||||
countLoci.input_file = bamFiles
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ class ExampleCountReads extends QScript {
|
|||
/**
|
||||
* In script, you create and then add() functions to the pipeline.
|
||||
*/
|
||||
def script = {
|
||||
def script() {
|
||||
|
||||
// Run CountReads for all bams jointly.
|
||||
|
||||
|
|
@ -41,6 +41,9 @@ class ExampleCountReads extends QScript {
|
|||
// matches the full form of the argument, but will actually be a scala List[]
|
||||
jointCountReads.input_file = bamFiles
|
||||
|
||||
// Set the memory limit. Also acts as a memory request on LSF and GridEngine.
|
||||
jointCountReads.memoryLimit = 1
|
||||
|
||||
// Add the newly created function to the pipeline.
|
||||
add(jointCountReads)
|
||||
|
||||
|
|
@ -51,6 +54,7 @@ class ExampleCountReads extends QScript {
|
|||
singleCountReads.reference_sequence = referenceFile
|
||||
// ':+' is the scala List append operator
|
||||
singleCountReads.input_file :+= bamFile
|
||||
singleCountReads.memoryLimit = 1
|
||||
add(singleCountReads)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ class ExampleCustomWalker extends QScript {
|
|||
/**
|
||||
* In script, you create and then add() functions to the pipeline.
|
||||
*/
|
||||
def script = {
|
||||
def script() {
|
||||
val customWalker = new CommandLineGATK {
|
||||
// Set the name of your walker, for example this will be passed as -T MyCustomWalker
|
||||
this.analysis_type = "MyCustomWalker"
|
||||
|
|
|
|||
|
|
@ -33,7 +33,6 @@ class ExampleUnifiedGenotyper extends QScript {
|
|||
@Argument(doc="An optional list of filter expressions.", shortName="filterExpression", required=false)
|
||||
var filterExpressions: List[String] = Nil
|
||||
|
||||
|
||||
// This trait allows us set the variables below in one place,
|
||||
// and then reuse this trait on each CommandLineGATK function below.
|
||||
trait UnifiedGenotyperArguments extends CommandLineGATK {
|
||||
|
|
|
|||
|
|
@ -89,7 +89,7 @@ class QCommandLine extends CommandLineProgram with Logging {
|
|||
private var shuttingDown = false
|
||||
|
||||
private lazy val pluginManager = {
|
||||
qScriptClasses = IOUtils.tempDir("Q-Classes", "", settings.qSettings.tempDirectory)
|
||||
qScriptClasses = IOUtils.tempDir("Q-Classes-", "", settings.qSettings.tempDirectory)
|
||||
qScriptManager.loadScripts(scripts, qScriptClasses)
|
||||
new PluginManager[QScript](classOf[QScript], List(qScriptClasses.toURI.toURL))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -81,6 +81,10 @@ trait QJobReport extends Logging {
|
|||
this.reportFeatures = features.mapValues(_.toString)
|
||||
}
|
||||
|
||||
def addJobReportBinding(key: String, value: Any) {
|
||||
this.reportFeatures += (key -> value.toString)
|
||||
}
|
||||
|
||||
// copy the QJobReport information -- todo : what's the best way to do this?
|
||||
override def copySettingsTo(function: QFunction) {
|
||||
self.copySettingsTo(function)
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ import org.broadinstitute.sting.BaseTest
|
|||
|
||||
class ExampleCountLociPipelineTest {
|
||||
@Test
|
||||
def testCountLoci {
|
||||
def testCountLoci() {
|
||||
val testOut = "count.out"
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "countloci"
|
||||
|
|
|
|||
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.pipeline.examples
|
||||
|
||||
import org.testng.annotations.Test
|
||||
import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}
|
||||
import org.broadinstitute.sting.BaseTest
|
||||
|
||||
class ExampleCountReadsPipelineTest {
|
||||
@Test
|
||||
def testCountReads() {
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "countreads"
|
||||
spec.args = Array(
|
||||
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCountReads.scala",
|
||||
" -R " + BaseTest.testDir + "exampleFASTA.fasta",
|
||||
" -I " + BaseTest.testDir + "exampleBAM.bam").mkString
|
||||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.pipeline.examples
|
||||
|
||||
import org.testng.annotations.Test
|
||||
import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}
|
||||
import org.broadinstitute.sting.BaseTest
|
||||
|
||||
class ExampleUnifiedGenotyperPipelineTest {
|
||||
@Test
|
||||
def testUnifiedGenotyper() {
|
||||
val spec = new PipelineTestSpec
|
||||
spec.name = "unifiedgenotyper"
|
||||
spec.args = Array(
|
||||
" -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala",
|
||||
" -R " + BaseTest.testDir + "exampleFASTA.fasta",
|
||||
" -I " + BaseTest.testDir + "exampleBAM.bam",
|
||||
" -filter QD",
|
||||
" -filterExpression 'QD < 2.0'").mkString
|
||||
PipelineTest.executeTest(spec)
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue