Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
8e23c98dd9
11
build.xml
11
build.xml
|
|
@ -1,5 +1,5 @@
|
|||
<!--
|
||||
~ Copyright (c) 2011, The Broad Institute
|
||||
~ Copyright (c) 2012, The Broad Institute
|
||||
~
|
||||
~ Permission is hereby granted, free of charge, to any person
|
||||
~ obtaining a copy of this software and associated documentation
|
||||
|
|
@ -1210,7 +1210,14 @@
|
|||
<!-- Build gsalib R module -->
|
||||
<target name="gsalib">
|
||||
<exec executable="R" failonerror="true">
|
||||
<arg line="R CMD INSTALL -l ${R.library.dir} ${R.public.src.dir}/${R.package.path}/gsalib" />
|
||||
<arg line="R CMD INSTALL --preclean ${R.public.src.dir}/${R.package.path}/gsalib" />
|
||||
</exec>
|
||||
</target>
|
||||
|
||||
<target name="clean.gsalib">
|
||||
<!-- Currently not cleaning out the lib during 'ant clean' -->
|
||||
<exec executable="R" failonerror="false">
|
||||
<arg line="R CMD REMOVE gsalib" />
|
||||
</exec>
|
||||
</target>
|
||||
</project>
|
||||
|
|
|
|||
26
ivy.xml
26
ivy.xml
|
|
@ -1,3 +1,26 @@
|
|||
<!--
|
||||
~ Copyright (c) 2012, The Broad Institute
|
||||
~
|
||||
~ Permission is hereby granted, free of charge, to any person
|
||||
~ obtaining a copy of this software and associated documentation
|
||||
~ files (the "Software"), to deal in the Software without
|
||||
~ restriction, including without limitation the rights to use,
|
||||
~ copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
~ copies of the Software, and to permit persons to whom the
|
||||
~ Software is furnished to do so, subject to the following
|
||||
~ conditions:
|
||||
~
|
||||
~ The above copyright notice and this permission notice shall be
|
||||
~ included in all copies or substantial portions of the Software.
|
||||
~ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
~ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
~ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
~ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
~ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
~ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
~ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
~ OTHER DEALINGS IN THE SOFTWARE.
|
||||
-->
|
||||
|
||||
<ivy-module version="1.0">
|
||||
<info organisation="org.broadinstitute" module="Sting"/>
|
||||
|
|
@ -21,7 +44,6 @@
|
|||
<dependency org="jboss" name="javassist" rev="3.7.ga"/>
|
||||
<dependency org="org.simpleframework" name="simple-xml" rev="2.0.4"/>
|
||||
<dependency org="org.apache.bcel" name="bcel" rev="5.2"/>
|
||||
<dependency org="org.yaml" name="snakeyaml" rev="1.7"/>
|
||||
|
||||
<!-- Dependencies for reflections mvn repository -->
|
||||
<dependency org="org.reflections" name="reflections" rev="0.9.5-RC2"/>
|
||||
|
|
@ -40,7 +62,7 @@
|
|||
<dependency org="org.apache.commons" name="commons-jexl" rev="2.0"/>
|
||||
<dependency org="commons-lang" name="commons-lang" rev="2.5"/>
|
||||
<dependency org="commons-logging" name="commons-logging" rev="1.1.1"/>
|
||||
<dependency org="commons-io" name="commons-io" rev="2.0"/>
|
||||
<dependency org="commons-io" name="commons-io" rev="2.1"/>
|
||||
<dependency org="org.apache.commons" name="commons-math" rev="2.2" />
|
||||
|
||||
<!-- Lucene core utilities -->
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
library(gsalib)
|
||||
require("ggplot2")
|
||||
require("gplots")
|
||||
library(ggplot2)
|
||||
library(gplots)
|
||||
library(tools)
|
||||
|
||||
#
|
||||
# Standard command line switch. Can we loaded interactively for development
|
||||
|
|
@ -201,4 +202,7 @@ for ( group in gatkReportData ) {
|
|||
|
||||
if ( ! is.na(outputPDF) ) {
|
||||
dev.off()
|
||||
}
|
||||
if (exists("compactPDF")) {
|
||||
compactPDF(outputPDF)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,247 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
package net.sf.picard.sam;
|
||||
|
||||
import net.sf.picard.PicardException;
|
||||
|
||||
import java.util.*;
|
||||
import java.lang.reflect.Constructor;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
|
||||
/**
|
||||
* Provides an iterator interface for merging multiple underlying iterators into a single
|
||||
* iterable stream. The underlying iterators/files must all have the same sort order unless
|
||||
* the requested output format is unsorted, in which case any combination is valid.
|
||||
*/
|
||||
public class MergingSamRecordIterator implements CloseableIterator<SAMRecord> {
|
||||
private final PriorityQueue<ComparableSamRecordIterator> pq;
|
||||
private final SamFileHeaderMerger samHeaderMerger;
|
||||
private final Collection<SAMFileReader> readers;
|
||||
private final SAMFileHeader.SortOrder sortOrder;
|
||||
private final SAMRecordComparator comparator;
|
||||
|
||||
private boolean initialized = false;
|
||||
private boolean iterationStarted = false;
|
||||
|
||||
/**
|
||||
* Constructs a new merging iterator with the same set of readers and sort order as
|
||||
* provided by the header merger parameter.
|
||||
* @param headerMerger The merged header and contents of readers.
|
||||
* @param forcePresorted True to ensure that the iterator checks the headers of the readers for appropriate sort order.
|
||||
* @deprecated replaced by (SamFileHeaderMerger, Collection<SAMFileReader>, boolean)
|
||||
*/
|
||||
public MergingSamRecordIterator(final SamFileHeaderMerger headerMerger, final boolean forcePresorted) {
|
||||
this(headerMerger, headerMerger.getReaders(), forcePresorted);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new merging iterator with the same set of readers and sort order as
|
||||
* provided by the header merger parameter.
|
||||
* @param headerMerger The merged header and contents of readers.
|
||||
* @param assumeSorted false ensures that the iterator checks the headers of the readers for appropriate sort order.
|
||||
*/
|
||||
public MergingSamRecordIterator(final SamFileHeaderMerger headerMerger, Collection<SAMFileReader> readers, final boolean assumeSorted) {
|
||||
this.samHeaderMerger = headerMerger;
|
||||
this.sortOrder = headerMerger.getMergedHeader().getSortOrder();
|
||||
this.comparator = getComparator();
|
||||
this.readers = readers;
|
||||
|
||||
this.pq = new PriorityQueue<ComparableSamRecordIterator>(readers.size());
|
||||
|
||||
for (final SAMFileReader reader : readers) {
|
||||
if (!assumeSorted && this.sortOrder != SAMFileHeader.SortOrder.unsorted &&
|
||||
reader.getFileHeader().getSortOrder() != this.sortOrder){
|
||||
throw new PicardException("Files are not compatible with sort order");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a given SAM file iterator to the merging iterator. Use this to restrict the merged iteration to a given genomic interval,
|
||||
* rather than iterating over every read in the backing file or stream.
|
||||
* @param reader Reader to add to the merging iterator.
|
||||
* @param iterator Iterator traversing over reader contents.
|
||||
*/
|
||||
public void addIterator(final SAMFileReader reader, final CloseableIterator<SAMRecord> iterator) {
|
||||
if(iterationStarted)
|
||||
throw new PicardException("Cannot add another iterator; iteration has already begun");
|
||||
if(!samHeaderMerger.containsHeader(reader.getFileHeader()))
|
||||
throw new PicardException("All iterators to be merged must be accounted for in the SAM header merger");
|
||||
final ComparableSamRecordIterator comparableIterator = new ComparableSamRecordIterator(reader,iterator,comparator);
|
||||
addIfNotEmpty(comparableIterator);
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
private void startIterationIfRequired() {
|
||||
if(initialized)
|
||||
return;
|
||||
for(SAMFileReader reader: readers)
|
||||
addIterator(reader,reader.iterator());
|
||||
iterationStarted = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close down all open iterators.
|
||||
*/
|
||||
public void close() {
|
||||
// Iterators not in the priority queue have already been closed; only close down the iterators that are still in the priority queue.
|
||||
for(CloseableIterator<SAMRecord> iterator: pq)
|
||||
iterator.close();
|
||||
}
|
||||
|
||||
/** Returns true if any of the underlying iterators has more records, otherwise false. */
|
||||
public boolean hasNext() {
|
||||
startIterationIfRequired();
|
||||
return !this.pq.isEmpty();
|
||||
}
|
||||
|
||||
/** Returns the next record from the top most iterator during merging. */
|
||||
public SAMRecord next() {
|
||||
startIterationIfRequired();
|
||||
|
||||
final ComparableSamRecordIterator iterator = this.pq.poll();
|
||||
final SAMRecord record = iterator.next();
|
||||
addIfNotEmpty(iterator);
|
||||
record.setHeader(this.samHeaderMerger.getMergedHeader());
|
||||
|
||||
// Fix the read group if needs be
|
||||
if (this.samHeaderMerger.hasReadGroupCollisions()) {
|
||||
final String oldGroupId = (String) record.getAttribute(ReservedTagConstants.READ_GROUP_ID);
|
||||
if (oldGroupId != null ) {
|
||||
final String newGroupId = this.samHeaderMerger.getReadGroupId(iterator.getReader().getFileHeader(),oldGroupId);
|
||||
record.setAttribute(ReservedTagConstants.READ_GROUP_ID, newGroupId);
|
||||
}
|
||||
}
|
||||
|
||||
// Fix the program group if needs be
|
||||
if (this.samHeaderMerger.hasProgramGroupCollisions()) {
|
||||
final String oldGroupId = (String) record.getAttribute(ReservedTagConstants.PROGRAM_GROUP_ID);
|
||||
if (oldGroupId != null ) {
|
||||
final String newGroupId = this.samHeaderMerger.getProgramGroupId(iterator.getReader().getFileHeader(),oldGroupId);
|
||||
record.setAttribute(ReservedTagConstants.PROGRAM_GROUP_ID, newGroupId);
|
||||
}
|
||||
}
|
||||
|
||||
// Fix up the sequence indexes if needs be
|
||||
if (this.samHeaderMerger.hasMergedSequenceDictionary()) {
|
||||
if (record.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
|
||||
record.setReferenceIndex(this.samHeaderMerger.getMergedSequenceIndex(iterator.getReader().getFileHeader(),record.getReferenceIndex()));
|
||||
}
|
||||
|
||||
if (record.getReadPairedFlag() && record.getMateReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
|
||||
record.setMateReferenceIndex(this.samHeaderMerger.getMergedSequenceIndex(iterator.getReader().getFileHeader(),record.getMateReferenceIndex()));
|
||||
}
|
||||
}
|
||||
|
||||
return record;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds iterator to priority queue. If the iterator has more records it is added
|
||||
* otherwise it is closed and not added.
|
||||
*/
|
||||
private void addIfNotEmpty(final ComparableSamRecordIterator iterator) {
|
||||
if (iterator.hasNext()) {
|
||||
pq.offer(iterator);
|
||||
}
|
||||
else {
|
||||
iterator.close();
|
||||
}
|
||||
}
|
||||
|
||||
/** Unsupported operation. */
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("MergingSAMRecorderIterator.remove()");
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the right comparator for a given sort order (coordinate, alphabetic). In the
|
||||
* case of "unsorted" it will return a comparator that gives an arbitrary but reflexive
|
||||
* ordering.
|
||||
*/
|
||||
private SAMRecordComparator getComparator() {
|
||||
// For unsorted build a fake comparator that compares based on object ID
|
||||
if (this.sortOrder == SAMFileHeader.SortOrder.unsorted) {
|
||||
return new SAMRecordComparator() {
|
||||
public int fileOrderCompare(final SAMRecord lhs, final SAMRecord rhs) {
|
||||
return System.identityHashCode(lhs) - System.identityHashCode(rhs);
|
||||
}
|
||||
|
||||
public int compare(final SAMRecord lhs, final SAMRecord rhs) {
|
||||
return fileOrderCompare(lhs, rhs);
|
||||
}
|
||||
};
|
||||
}
|
||||
if (samHeaderMerger.hasMergedSequenceDictionary() && sortOrder.equals(SAMFileHeader.SortOrder.coordinate)) {
|
||||
return new MergedSequenceDictionaryCoordinateOrderComparator();
|
||||
}
|
||||
|
||||
// Otherwise try and figure out what kind of comparator to return and build it
|
||||
return this.sortOrder.getComparatorInstance();
|
||||
}
|
||||
|
||||
/** Returns the merged header that the merging iterator is working from. */
|
||||
public SAMFileHeader getMergedHeader() {
|
||||
return this.samHeaderMerger.getMergedHeader();
|
||||
}
|
||||
|
||||
/**
|
||||
* Ugh. Basically does a regular coordinate compare, but looks up the sequence indices in the merged
|
||||
* sequence dictionary. I hate the fact that this extends SAMRecordCoordinateComparator, but it avoids
|
||||
* more copy & paste.
|
||||
*/
|
||||
private class MergedSequenceDictionaryCoordinateOrderComparator extends SAMRecordCoordinateComparator {
|
||||
|
||||
public int fileOrderCompare(final SAMRecord samRecord1, final SAMRecord samRecord2) {
|
||||
final int referenceIndex1 = getReferenceIndex(samRecord1);
|
||||
final int referenceIndex2 = getReferenceIndex(samRecord2);
|
||||
if (referenceIndex1 != referenceIndex2) {
|
||||
if (referenceIndex1 == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
|
||||
return 1;
|
||||
} else if (referenceIndex2 == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
|
||||
return -1;
|
||||
} else {
|
||||
return referenceIndex1 - referenceIndex2;
|
||||
}
|
||||
}
|
||||
if (referenceIndex1 == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
|
||||
// Both are unmapped.
|
||||
return 0;
|
||||
}
|
||||
return samRecord1.getAlignmentStart() - samRecord2.getAlignmentStart();
|
||||
}
|
||||
|
||||
private int getReferenceIndex(final SAMRecord samRecord) {
|
||||
if (samRecord.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
|
||||
return samHeaderMerger.getMergedSequenceIndex(samRecord.getHeader(), samRecord.getReferenceIndex());
|
||||
}
|
||||
if (samRecord.getMateReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
|
||||
return samHeaderMerger.getMergedSequenceIndex(samRecord.getHeader(), samRecord.getMateReferenceIndex());
|
||||
}
|
||||
return SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,744 +0,0 @@
|
|||
/*
|
||||
* The MIT License
|
||||
*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*/
|
||||
package net.sf.picard.sam;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import net.sf.picard.PicardException;
|
||||
import net.sf.samtools.AbstractSAMHeaderRecord;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMProgramRecord;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import net.sf.samtools.util.SequenceUtil;
|
||||
|
||||
/**
|
||||
* Merges SAMFileHeaders that have the same sequences into a single merged header
|
||||
* object while providing read group translation for cases where read groups
|
||||
* clash across input headers.
|
||||
*/
|
||||
public class SamFileHeaderMerger {
|
||||
//Super Header to construct
|
||||
private final SAMFileHeader mergedHeader;
|
||||
private Collection<SAMFileReader> readers;
|
||||
private final Collection<SAMFileHeader> headers;
|
||||
|
||||
//Translation of old group ids to new group ids
|
||||
private final Map<SAMFileHeader, Map<String, String>> samReadGroupIdTranslation =
|
||||
new IdentityHashMap<SAMFileHeader, Map<String, String>>();
|
||||
|
||||
//the read groups from different files use the same group ids
|
||||
private boolean hasReadGroupCollisions = false;
|
||||
|
||||
//the program records from different files use the same program record ids
|
||||
private boolean hasProgramGroupCollisions = false;
|
||||
|
||||
//Translation of old program group ids to new program group ids
|
||||
private Map<SAMFileHeader, Map<String, String>> samProgramGroupIdTranslation =
|
||||
new IdentityHashMap<SAMFileHeader, Map<String, String>>();
|
||||
|
||||
private boolean hasMergedSequenceDictionary = false;
|
||||
|
||||
// Translation of old sequence dictionary ids to new dictionary ids
|
||||
// This is an IdentityHashMap because it can be quite expensive to compute the hashCode for
|
||||
// large SAMFileHeaders. It is possible that two input files will have identical headers so that
|
||||
// the regular HashMap would fold them together, but the value stored in each of the two
|
||||
// Map entries will be the same, so it should not hurt anything.
|
||||
private final Map<SAMFileHeader, Map<Integer, Integer>> samSeqDictionaryIdTranslationViaHeader =
|
||||
new IdentityHashMap<SAMFileHeader, Map<Integer, Integer>>();
|
||||
|
||||
//HeaderRecordFactory that creates SAMReadGroupRecord instances.
|
||||
private static final HeaderRecordFactory<SAMReadGroupRecord> READ_GROUP_RECORD_FACTORY = new HeaderRecordFactory<SAMReadGroupRecord>() {
|
||||
public SAMReadGroupRecord createRecord(String id, SAMReadGroupRecord srcReadGroupRecord) {
|
||||
return new SAMReadGroupRecord(id, srcReadGroupRecord);
|
||||
}
|
||||
};
|
||||
|
||||
//HeaderRecordFactory that creates SAMProgramRecord instances.
|
||||
private static final HeaderRecordFactory<SAMProgramRecord> PROGRAM_RECORD_FACTORY = new HeaderRecordFactory<SAMProgramRecord>() {
|
||||
public SAMProgramRecord createRecord(String id, SAMProgramRecord srcProgramRecord) {
|
||||
return new SAMProgramRecord(id, srcProgramRecord);
|
||||
}
|
||||
};
|
||||
|
||||
//comparator used to sort lists of program group and read group records
|
||||
private static final Comparator<AbstractSAMHeaderRecord> RECORD_ID_COMPARATOR = new Comparator<AbstractSAMHeaderRecord>() {
|
||||
public int compare(AbstractSAMHeaderRecord o1, AbstractSAMHeaderRecord o2) {
|
||||
return o1.getId().compareTo(o2.getId());
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Create SAMFileHeader with additional information. Required that sequence dictionaries agree.
|
||||
*
|
||||
* @param readers sam file readers to combine
|
||||
* @param sortOrder sort order new header should have
|
||||
* @deprecated replaced by SamFileHeaderMerger(Collection<SAMFileHeader>, SAMFileHeader.SortOrder, boolean)
|
||||
*/
|
||||
public SamFileHeaderMerger(final Collection<SAMFileReader> readers, final SAMFileHeader.SortOrder sortOrder) {
|
||||
this(readers, sortOrder, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create SAMFileHeader with additional information.
|
||||
*
|
||||
* @param readers sam file readers to combine
|
||||
* @param sortOrder sort order new header should have
|
||||
* @param mergeDictionaries If true, merge sequence dictionaries in new header. If false, require that
|
||||
* all input sequence dictionaries be identical.
|
||||
* @deprecated replaced by SamFileHeaderMerger(Collection<SAMFileHeader>, SAMFileHeader.SortOrder, boolean)
|
||||
*/
|
||||
public SamFileHeaderMerger(final Collection<SAMFileReader> readers, final SAMFileHeader.SortOrder sortOrder, final boolean mergeDictionaries) {
|
||||
this(sortOrder, getHeadersFromReaders(readers), mergeDictionaries);
|
||||
this.readers = readers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create SAMFileHeader with additional information.. This is the preferred constructor.
|
||||
*
|
||||
* @param sortOrder sort order new header should have
|
||||
* @param headers sam file headers to combine
|
||||
* @param mergeDictionaries If true, merge sequence dictionaries in new header. If false, require that
|
||||
* all input sequence dictionaries be identical.
|
||||
*/
|
||||
public SamFileHeaderMerger(final SAMFileHeader.SortOrder sortOrder, final Collection<SAMFileHeader> headers, final boolean mergeDictionaries) {
|
||||
this.headers = headers;
|
||||
this.mergedHeader = new SAMFileHeader();
|
||||
|
||||
SAMSequenceDictionary sequenceDictionary;
|
||||
try {
|
||||
sequenceDictionary = getSequenceDictionary(headers);
|
||||
this.hasMergedSequenceDictionary = false;
|
||||
}
|
||||
catch (SequenceUtil.SequenceListsDifferException pe) {
|
||||
if (mergeDictionaries) {
|
||||
sequenceDictionary = mergeSequenceDictionaries(headers);
|
||||
this.hasMergedSequenceDictionary = true;
|
||||
}
|
||||
else {
|
||||
throw pe;
|
||||
}
|
||||
}
|
||||
|
||||
this.mergedHeader.setSequenceDictionary(sequenceDictionary);
|
||||
|
||||
// Set program that creates input alignments
|
||||
for (final SAMProgramRecord program : mergeProgramGroups(headers)) {
|
||||
this.mergedHeader.addProgramRecord(program);
|
||||
}
|
||||
|
||||
// Set read groups for merged header
|
||||
final List<SAMReadGroupRecord> readGroups = mergeReadGroups(headers);
|
||||
this.mergedHeader.setReadGroups(readGroups);
|
||||
this.mergedHeader.setGroupOrder(SAMFileHeader.GroupOrder.none);
|
||||
|
||||
this.mergedHeader.setSortOrder(sortOrder);
|
||||
|
||||
for (final SAMFileHeader header : headers) {
|
||||
for (final String comment : header.getComments()) {
|
||||
this.mergedHeader.addComment(comment);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Utilility method to make use with old constructor
|
||||
private static List<SAMFileHeader> getHeadersFromReaders(Collection<SAMFileReader> readers) {
|
||||
List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(readers.size());
|
||||
for (SAMFileReader reader : readers) {
|
||||
headers.add(reader.getFileHeader());
|
||||
}
|
||||
return headers;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks to see if there are clashes where different readers are using the same read
|
||||
* group IDs. If yes, then those IDs that collided are remapped.
|
||||
*
|
||||
* @param headers headers to combine
|
||||
* @return new list of read groups constructed from all the readers
|
||||
*/
|
||||
private List<SAMReadGroupRecord> mergeReadGroups(final Collection<SAMFileHeader> headers) {
|
||||
//prepare args for mergeHeaderRecords(..) call
|
||||
final HashSet<String> idsThatAreAlreadyTaken = new HashSet<String>();
|
||||
|
||||
final List<HeaderRecordAndFileHeader<SAMReadGroupRecord>> readGroupsToProcess = new LinkedList<HeaderRecordAndFileHeader<SAMReadGroupRecord>>();
|
||||
for (final SAMFileHeader header : headers) {
|
||||
for (final SAMReadGroupRecord readGroup : header.getReadGroups()) {
|
||||
//verify that there are no existing id collisions in this input file
|
||||
if(!idsThatAreAlreadyTaken.add(readGroup.getId()))
|
||||
throw new PicardException("Input file: " + header + " contains more than one RG with the same id (" + readGroup.getId() + ")");
|
||||
|
||||
readGroupsToProcess.add(new HeaderRecordAndFileHeader<SAMReadGroupRecord>(readGroup, header));
|
||||
}
|
||||
idsThatAreAlreadyTaken.clear();
|
||||
}
|
||||
|
||||
final List<SAMReadGroupRecord> result = new LinkedList<SAMReadGroupRecord>();
|
||||
|
||||
hasReadGroupCollisions = mergeHeaderRecords(readGroupsToProcess, READ_GROUP_RECORD_FACTORY, idsThatAreAlreadyTaken, samReadGroupIdTranslation, result);
|
||||
|
||||
//sort the result list by record id
|
||||
Collections.sort(result, RECORD_ID_COMPARATOR);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks to see if there are clashes where different readers are using the same program
|
||||
* group IDs. If yes, then those IDs that collided are remapped.
|
||||
*
|
||||
* @param headers headers to combine
|
||||
* @return new list of program groups constructed from all the readers
|
||||
*/
|
||||
private List<SAMProgramRecord> mergeProgramGroups(final Collection<SAMFileHeader> headers) {
|
||||
|
||||
final List<SAMProgramRecord> overallResult = new LinkedList<SAMProgramRecord>();
|
||||
|
||||
//this Set will accumulate all SAMProgramRecord ids that have been encountered so far.
|
||||
final HashSet<String> idsThatAreAlreadyTaken = new HashSet<String>();
|
||||
|
||||
//need to process all program groups
|
||||
List<HeaderRecordAndFileHeader<SAMProgramRecord>> programGroupsLeftToProcess = new LinkedList<HeaderRecordAndFileHeader<SAMProgramRecord>>();
|
||||
for (final SAMFileHeader header : headers) {
|
||||
for (final SAMProgramRecord programGroup : header.getProgramRecords()) {
|
||||
//verify that there are no existing id collisions in this input file
|
||||
if(!idsThatAreAlreadyTaken.add(programGroup.getId()))
|
||||
throw new PicardException("Input file: " + header + " contains more than one PG with the same id (" + programGroup.getId() + ")");
|
||||
|
||||
programGroupsLeftToProcess.add(new HeaderRecordAndFileHeader<SAMProgramRecord>(programGroup, header));
|
||||
}
|
||||
idsThatAreAlreadyTaken.clear();
|
||||
}
|
||||
|
||||
//A program group header (lets say ID=2 PN=B PP=1) may have a PP (previous program) attribute which chains it to
|
||||
//another program group header (lets say ID=1 PN=A) to indicate that the given file was
|
||||
//processed by program A followed by program B. These PP attributes potentially
|
||||
//connect headers into one or more tree structures. Merging is done by
|
||||
//first merging all headers that don't have PP attributes (eg. tree roots),
|
||||
//then updating and merging all headers whose PPs point to the tree-root headers,
|
||||
//and so on until all program group headers are processed.
|
||||
|
||||
//currentProgramGroups is the list of records to merge next. Start by merging the programGroups that don't have a PP attribute (eg. the tree roots).
|
||||
List< HeaderRecordAndFileHeader<SAMProgramRecord> > currentProgramGroups = new LinkedList<HeaderRecordAndFileHeader<SAMProgramRecord>>();
|
||||
for(final Iterator<HeaderRecordAndFileHeader<SAMProgramRecord>> programGroupsLeftToProcessIterator = programGroupsLeftToProcess.iterator(); programGroupsLeftToProcessIterator.hasNext(); ) {
|
||||
final HeaderRecordAndFileHeader<SAMProgramRecord> pair = programGroupsLeftToProcessIterator.next();
|
||||
if(pair.getHeaderRecord().getAttribute(SAMProgramRecord.PREVIOUS_PROGRAM_GROUP_ID_TAG) == null) {
|
||||
programGroupsLeftToProcessIterator.remove();
|
||||
currentProgramGroups.add(pair);
|
||||
}
|
||||
}
|
||||
|
||||
//merge currentProgramGroups
|
||||
while(!currentProgramGroups.isEmpty())
|
||||
{
|
||||
final List<SAMProgramRecord> currentResult = new LinkedList<SAMProgramRecord>();
|
||||
|
||||
hasProgramGroupCollisions |= mergeHeaderRecords(currentProgramGroups, PROGRAM_RECORD_FACTORY, idsThatAreAlreadyTaken, samProgramGroupIdTranslation, currentResult);
|
||||
|
||||
//add currentResults to overallResults
|
||||
overallResult.addAll(currentResult);
|
||||
|
||||
//apply the newly-computed id translations to currentProgramGroups and programGroupsLeftToProcess
|
||||
currentProgramGroups = translateIds(currentProgramGroups, samProgramGroupIdTranslation, false);
|
||||
programGroupsLeftToProcess = translateIds(programGroupsLeftToProcess, samProgramGroupIdTranslation, true);
|
||||
|
||||
//find all records in programGroupsLeftToProcess whose ppId points to a record that was just processed (eg. a record that's in currentProgramGroups),
|
||||
//and move them to the list of programGroupsToProcessNext.
|
||||
LinkedList<HeaderRecordAndFileHeader<SAMProgramRecord>> programGroupsToProcessNext = new LinkedList<HeaderRecordAndFileHeader<SAMProgramRecord>>();
|
||||
for(final Iterator<HeaderRecordAndFileHeader<SAMProgramRecord>> programGroupsLeftToProcessIterator = programGroupsLeftToProcess.iterator(); programGroupsLeftToProcessIterator.hasNext(); ) {
|
||||
final HeaderRecordAndFileHeader<SAMProgramRecord> pairLeftToProcess = programGroupsLeftToProcessIterator.next();
|
||||
final Object ppIdOfRecordLeftToProcess = pairLeftToProcess.getHeaderRecord().getAttribute(SAMProgramRecord.PREVIOUS_PROGRAM_GROUP_ID_TAG);
|
||||
//find what currentProgramGroups this ppId points to (NOTE: they have to come from the same file)
|
||||
for(final HeaderRecordAndFileHeader<SAMProgramRecord> justProcessedPair : currentProgramGroups) {
|
||||
String idJustProcessed = justProcessedPair.getHeaderRecord().getId();
|
||||
if(pairLeftToProcess.getFileHeader() == justProcessedPair.getFileHeader() && ppIdOfRecordLeftToProcess.equals(idJustProcessed)) {
|
||||
programGroupsLeftToProcessIterator.remove();
|
||||
programGroupsToProcessNext.add(pairLeftToProcess);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
currentProgramGroups = programGroupsToProcessNext;
|
||||
}
|
||||
|
||||
//verify that all records were processed
|
||||
if(!programGroupsLeftToProcess.isEmpty()) {
|
||||
StringBuffer errorMsg = new StringBuffer(programGroupsLeftToProcess.size() + " program groups weren't processed. Do their PP ids point to existing PGs? \n");
|
||||
for( final HeaderRecordAndFileHeader<SAMProgramRecord> pair : programGroupsLeftToProcess ) {
|
||||
SAMProgramRecord record = pair.getHeaderRecord();
|
||||
errorMsg.append("@PG ID:"+record.getProgramGroupId()+" PN:"+record.getProgramName()+" PP:"+record.getPreviousProgramGroupId() +"\n");
|
||||
}
|
||||
throw new PicardException(errorMsg.toString());
|
||||
}
|
||||
|
||||
//sort the result list by record id
|
||||
Collections.sort(overallResult, RECORD_ID_COMPARATOR);
|
||||
|
||||
return overallResult;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Utility method that takes a list of program groups and remaps all their
|
||||
* ids (including ppIds if requested) using the given idTranslationTable.
|
||||
*
|
||||
* NOTE: when remapping, this method creates new SAMProgramRecords and
|
||||
* doesn't mutate any records in the programGroups list.
|
||||
*
|
||||
* @param programGroups The program groups to translate.
|
||||
* @param idTranslationTable The translation table.
|
||||
* @param translatePpIds Whether ppIds should be translated as well.
|
||||
*
|
||||
* @return The list of translated records.
|
||||
*/
|
||||
private List<HeaderRecordAndFileHeader<SAMProgramRecord>> translateIds(
|
||||
List<HeaderRecordAndFileHeader<SAMProgramRecord>> programGroups,
|
||||
Map<SAMFileHeader, Map<String, String>> idTranslationTable,
|
||||
boolean translatePpIds) {
|
||||
|
||||
//go through programGroups and translate any IDs and PPs based on the idTranslationTable.
|
||||
List<HeaderRecordAndFileHeader<SAMProgramRecord>> result = new LinkedList<HeaderRecordAndFileHeader<SAMProgramRecord>>();
|
||||
for(final HeaderRecordAndFileHeader<SAMProgramRecord> pair : programGroups ) {
|
||||
final SAMProgramRecord record = pair.getHeaderRecord();
|
||||
final String id = record.getProgramGroupId();
|
||||
final String ppId = (String) record.getAttribute(SAMProgramRecord.PREVIOUS_PROGRAM_GROUP_ID_TAG);
|
||||
|
||||
final SAMFileHeader header = pair.getFileHeader();
|
||||
final Map<String, String> translations = idTranslationTable.get(header);
|
||||
|
||||
//see if one or both ids need to be translated
|
||||
SAMProgramRecord translatedRecord = null;
|
||||
if(translations != null)
|
||||
{
|
||||
String translatedId = translations.get( id );
|
||||
String translatedPpId = translatePpIds ? translations.get( ppId ) : null;
|
||||
|
||||
boolean needToTranslateId = translatedId != null && !translatedId.equals(id);
|
||||
boolean needToTranslatePpId = translatedPpId != null && !translatedPpId.equals(ppId);
|
||||
|
||||
if(needToTranslateId && needToTranslatePpId) {
|
||||
translatedRecord = new SAMProgramRecord(translatedId, record);
|
||||
translatedRecord.setAttribute(SAMProgramRecord.PREVIOUS_PROGRAM_GROUP_ID_TAG, translatedPpId);
|
||||
} else if(needToTranslateId) {
|
||||
translatedRecord = new SAMProgramRecord(translatedId, record);
|
||||
} else if(needToTranslatePpId) {
|
||||
translatedRecord = new SAMProgramRecord(id, record);
|
||||
translatedRecord.setAttribute(SAMProgramRecord.PREVIOUS_PROGRAM_GROUP_ID_TAG, translatedPpId);
|
||||
}
|
||||
}
|
||||
|
||||
if(translatedRecord != null) {
|
||||
result.add(new HeaderRecordAndFileHeader<SAMProgramRecord>(translatedRecord, header));
|
||||
} else {
|
||||
result.add(pair); //keep the original record
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Utility method for merging a List of AbstractSAMHeaderRecords. If it finds
|
||||
* records that have identical ids and attributes, it will collapse them
|
||||
* into one record. If it finds records that have identical ids but
|
||||
* non-identical attributes, this is treated as a collision. When collision happens,
|
||||
* the records' ids are remapped, and an old-id to new-id mapping is added to the idTranslationTable.
|
||||
*
|
||||
* NOTE: Non-collided records also get recorded in the idTranslationTable as
|
||||
* old-id to old-id. This way, an idTranslationTable lookup should never return null.
|
||||
*
|
||||
* @param headerRecords The header records to merge.
|
||||
* @param headerRecordFactory Constructs a specific subclass of AbstractSAMHeaderRecord.
|
||||
* @param idsThatAreAlreadyTaken If the id of a headerRecord matches an id in this set, it will be treated as a collision, and the headRecord's id will be remapped.
|
||||
* @param idTranslationTable When records collide, their ids are remapped, and an old-id to new-id
|
||||
* mapping is added to the idTranslationTable. Non-collided records also get recorded in the idTranslationTable as
|
||||
* old-id to old-id. This way, an idTranslationTable lookup should never return null.
|
||||
*
|
||||
* @param result The list of merged header records.
|
||||
*
|
||||
* @return True if there were collisions.
|
||||
*/
|
||||
private <RecordType extends AbstractSAMHeaderRecord> boolean mergeHeaderRecords(final List<HeaderRecordAndFileHeader<RecordType>> headerRecords, HeaderRecordFactory<RecordType> headerRecordFactory,
|
||||
final HashSet<String> idsThatAreAlreadyTaken, Map<SAMFileHeader, Map<String, String>> idTranslationTable, List<RecordType> result) {
|
||||
|
||||
//The outer Map bins the header records by their ids. The nested Map further collapses
|
||||
//header records which, in addition to having the same id, also have identical attributes.
|
||||
//In other words, each key in the nested map represents one or more
|
||||
//header records which have both identical ids and identical attributes. The List of
|
||||
//SAMFileHeaders keeps track of which readers these header record(s) came from.
|
||||
final Map<String, Map<RecordType, List<SAMFileHeader>>> idToRecord =
|
||||
new HashMap<String, Map<RecordType, List<SAMFileHeader>>>();
|
||||
|
||||
//Populate the idToRecord and seenIds data structures
|
||||
for (final HeaderRecordAndFileHeader<RecordType> pair : headerRecords) {
|
||||
final RecordType record = pair.getHeaderRecord();
|
||||
final SAMFileHeader header = pair.getFileHeader();
|
||||
final String recordId = record.getId();
|
||||
Map<RecordType, List<SAMFileHeader>> recordsWithSameId = idToRecord.get(recordId);
|
||||
if(recordsWithSameId == null) {
|
||||
recordsWithSameId = new LinkedHashMap<RecordType, List<SAMFileHeader>>();
|
||||
idToRecord.put(recordId, recordsWithSameId);
|
||||
}
|
||||
|
||||
List<SAMFileHeader> fileHeaders = recordsWithSameId.get(record);
|
||||
if(fileHeaders == null) {
|
||||
fileHeaders = new LinkedList<SAMFileHeader>();
|
||||
recordsWithSameId.put(record, fileHeaders);
|
||||
}
|
||||
|
||||
fileHeaders.add(header);
|
||||
}
|
||||
|
||||
//Resolve any collisions between header records by remapping their ids.
|
||||
boolean hasCollisions = false;
|
||||
for (final Map.Entry<String, Map<RecordType, List<SAMFileHeader>>> entry : idToRecord.entrySet() )
|
||||
{
|
||||
final String recordId = entry.getKey();
|
||||
final Map<RecordType, List<SAMFileHeader>> recordsWithSameId = entry.getValue();
|
||||
|
||||
|
||||
for( Map.Entry<RecordType, List<SAMFileHeader>> recordWithUniqueAttr : recordsWithSameId.entrySet()) {
|
||||
final RecordType record = recordWithUniqueAttr.getKey();
|
||||
final List<SAMFileHeader> fileHeaders = recordWithUniqueAttr.getValue();
|
||||
|
||||
String newId;
|
||||
if(!idsThatAreAlreadyTaken.contains(recordId)) {
|
||||
//don't remap 1st record. If there are more records
|
||||
//with this id, they will be remapped in the 'else'.
|
||||
newId = recordId;
|
||||
idsThatAreAlreadyTaken.add(recordId);
|
||||
} else {
|
||||
//there is more than one record with this id.
|
||||
hasCollisions = true;
|
||||
|
||||
//find a unique newId for this record
|
||||
int idx=1;
|
||||
while(idsThatAreAlreadyTaken.contains(newId = recordId + "." + Integer.toString(idx++)))
|
||||
;
|
||||
|
||||
idsThatAreAlreadyTaken.add( newId );
|
||||
}
|
||||
|
||||
for(SAMFileHeader fileHeader : fileHeaders) {
|
||||
Map<String, String> readerTranslationTable = idTranslationTable.get(fileHeader);
|
||||
if(readerTranslationTable == null) {
|
||||
readerTranslationTable = new HashMap<String, String>();
|
||||
idTranslationTable.put(fileHeader, readerTranslationTable);
|
||||
}
|
||||
readerTranslationTable.put(recordId, newId);
|
||||
}
|
||||
|
||||
result.add( headerRecordFactory.createRecord(newId, record) );
|
||||
}
|
||||
}
|
||||
|
||||
return hasCollisions;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get the sequences off the SAMFileHeader. Throws runtime exception if the sequence
|
||||
* are different from one another.
|
||||
*
|
||||
* @param headers headers to pull sequences from
|
||||
* @return sequences from files. Each file should have the same sequence
|
||||
*/
|
||||
private SAMSequenceDictionary getSequenceDictionary(final Collection<SAMFileHeader> headers) {
|
||||
SAMSequenceDictionary sequences = null;
|
||||
for (final SAMFileHeader header : headers) {
|
||||
|
||||
if (sequences == null) {
|
||||
sequences = header.getSequenceDictionary();
|
||||
}
|
||||
else {
|
||||
final SAMSequenceDictionary currentSequences = header.getSequenceDictionary();
|
||||
SequenceUtil.assertSequenceDictionariesEqual(sequences, currentSequences);
|
||||
}
|
||||
}
|
||||
|
||||
return sequences;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the sequences from the SAMFileHeader, and merge the resulting sequence dictionaries.
|
||||
*
|
||||
* @param headers headers to pull sequences from
|
||||
* @return sequences from files. Each file should have the same sequence
|
||||
*/
|
||||
private SAMSequenceDictionary mergeSequenceDictionaries(final Collection<SAMFileHeader> headers) {
|
||||
SAMSequenceDictionary sequences = new SAMSequenceDictionary();
|
||||
for (final SAMFileHeader header : headers) {
|
||||
final SAMSequenceDictionary currentSequences = header.getSequenceDictionary();
|
||||
sequences = mergeSequences(sequences, currentSequences);
|
||||
}
|
||||
// second pass, make a map of the original seqeunce id -> new sequence id
|
||||
createSequenceMapping(headers, sequences);
|
||||
return sequences;
|
||||
}
|
||||
|
||||
/**
|
||||
* They've asked to merge the sequence headers. What we support right now is finding the sequence name superset.
|
||||
*
|
||||
* @param mergeIntoDict the result of merging so far. All SAMSequenceRecords in here have been cloned from the originals.
|
||||
* @param mergeFromDict A new sequence dictionary to merge into mergeIntoDict.
|
||||
* @return A new sequence dictionary that resulting from merging the two inputs.
|
||||
*/
|
||||
private SAMSequenceDictionary mergeSequences(SAMSequenceDictionary mergeIntoDict, SAMSequenceDictionary mergeFromDict) {
|
||||
|
||||
// a place to hold the sequences that we haven't found a home for, in the order the appear in mergeFromDict.
|
||||
LinkedList<SAMSequenceRecord> holder = new LinkedList<SAMSequenceRecord>();
|
||||
|
||||
// Return value will be created from this.
|
||||
LinkedList<SAMSequenceRecord> resultingDict = new LinkedList<SAMSequenceRecord>();
|
||||
for (final SAMSequenceRecord sequenceRecord : mergeIntoDict.getSequences()) {
|
||||
resultingDict.add(sequenceRecord);
|
||||
}
|
||||
|
||||
// Index into resultingDict of previous SAMSequenceRecord from mergeFromDict that already existed in mergeIntoDict.
|
||||
int prevloc = -1;
|
||||
// Previous SAMSequenceRecord from mergeFromDict that already existed in mergeIntoDict.
|
||||
SAMSequenceRecord previouslyMerged = null;
|
||||
|
||||
for (SAMSequenceRecord sequenceRecord : mergeFromDict.getSequences()) {
|
||||
// Does it already exist in resultingDict?
|
||||
int loc = getIndexOfSequenceName(resultingDict, sequenceRecord.getSequenceName());
|
||||
if (loc == -1) {
|
||||
// If doesn't already exist in resultingDict, save it an decide where to insert it later.
|
||||
holder.add(sequenceRecord.clone());
|
||||
} else if (prevloc > loc) {
|
||||
// If sequenceRecord already exists in resultingDict, but prior to the previous one
|
||||
// from mergeIntoDict that already existed, cannot merge.
|
||||
throw new PicardException("Cannot merge sequence dictionaries because sequence " +
|
||||
sequenceRecord.getSequenceName() + " and " + previouslyMerged.getSequenceName() +
|
||||
" are in different orders in two input sequence dictionaries.");
|
||||
} else {
|
||||
// Since sequenceRecord already exists in resultingDict, don't need to add it.
|
||||
// Add in all the sequences prior to it that have been held in holder.
|
||||
resultingDict.addAll(loc, holder);
|
||||
// Remember the index of sequenceRecord so can check for merge imcompatibility.
|
||||
prevloc = loc + holder.size();
|
||||
previouslyMerged = sequenceRecord;
|
||||
holder.clear();
|
||||
}
|
||||
}
|
||||
// Append anything left in holder.
|
||||
if (holder.size() != 0) {
|
||||
resultingDict.addAll(holder);
|
||||
}
|
||||
return new SAMSequenceDictionary(resultingDict);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find sequence in list.
|
||||
* @param list List to search for the sequence name.
|
||||
* @param sequenceName Name to search for.
|
||||
* @return Index of SAMSequenceRecord with the given name in list, or -1 if not found.
|
||||
*/
|
||||
private static int getIndexOfSequenceName(final List<SAMSequenceRecord> list, final String sequenceName) {
|
||||
for (int i = 0; i < list.size(); ++i) {
|
||||
if (list.get(i).getSequenceName().equals(sequenceName)) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* create the sequence mapping. This map is used to convert the unmerged header sequence ID's to the merged
|
||||
* list of sequence id's.
|
||||
* @param headers the collections of headers.
|
||||
* @param masterDictionary the superset dictionary we've created.
|
||||
*/
|
||||
private void createSequenceMapping(final Collection<SAMFileHeader> headers, SAMSequenceDictionary masterDictionary) {
|
||||
LinkedList<String> resultingDictStr = new LinkedList<String>();
|
||||
for (SAMSequenceRecord r : masterDictionary.getSequences()) {
|
||||
resultingDictStr.add(r.getSequenceName());
|
||||
}
|
||||
for (final SAMFileHeader header : headers) {
|
||||
Map<Integer, Integer> seqMap = new HashMap<Integer, Integer>();
|
||||
SAMSequenceDictionary dict = header.getSequenceDictionary();
|
||||
for (SAMSequenceRecord rec : dict.getSequences()) {
|
||||
seqMap.put(rec.getSequenceIndex(), resultingDictStr.indexOf(rec.getSequenceName()));
|
||||
}
|
||||
this.samSeqDictionaryIdTranslationViaHeader.put(header, seqMap);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Returns the read group id that should be used for the input read and RG id.
|
||||
*
|
||||
* @deprecated replaced by getReadGroupId(SAMFileHeader, String)
|
||||
* */
|
||||
public String getReadGroupId(final SAMFileReader reader, final String originalReadGroupId) {
|
||||
return getReadGroupId(reader.getFileHeader(), originalReadGroupId);
|
||||
}
|
||||
|
||||
/** Returns the read group id that should be used for the input read and RG id. */
|
||||
public String getReadGroupId(final SAMFileHeader header, final String originalReadGroupId) {
|
||||
return this.samReadGroupIdTranslation.get(header).get(originalReadGroupId);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param reader one of the input files
|
||||
* @param originalProgramGroupId a program group ID from the above input file
|
||||
* @return new ID from the merged list of program groups in the output file
|
||||
* @deprecated replaced by getProgramGroupId(SAMFileHeader, String)
|
||||
*/
|
||||
public String getProgramGroupId(final SAMFileReader reader, final String originalProgramGroupId) {
|
||||
return getProgramGroupId(reader.getFileHeader(), originalProgramGroupId);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param header one of the input headers
|
||||
* @param originalProgramGroupId a program group ID from the above input file
|
||||
* @return new ID from the merged list of program groups in the output file
|
||||
*/
|
||||
public String getProgramGroupId(final SAMFileHeader header, final String originalProgramGroupId) {
|
||||
return this.samProgramGroupIdTranslation.get(header).get(originalProgramGroupId);
|
||||
}
|
||||
|
||||
/** Returns true if there are read group duplicates within the merged headers. */
|
||||
public boolean hasReadGroupCollisions() {
|
||||
return this.hasReadGroupCollisions;
|
||||
}
|
||||
|
||||
/** Returns true if there are program group duplicates within the merged headers. */
|
||||
public boolean hasProgramGroupCollisions() {
|
||||
return hasProgramGroupCollisions;
|
||||
}
|
||||
|
||||
/** @return if we've merged the sequence dictionaries, return true */
|
||||
public boolean hasMergedSequenceDictionary() {
|
||||
return hasMergedSequenceDictionary;
|
||||
}
|
||||
|
||||
/** Returns the merged header that should be written to any output merged file. */
|
||||
public SAMFileHeader getMergedHeader() {
|
||||
return this.mergedHeader;
|
||||
}
|
||||
|
||||
/** Returns the collection of readers that this header merger is working with. May return null.
|
||||
* @deprecated replaced by getHeaders()
|
||||
*/
|
||||
public Collection<SAMFileReader> getReaders() {
|
||||
return this.readers;
|
||||
}
|
||||
|
||||
/** Returns the collection of readers that this header merger is working with.
|
||||
*/
|
||||
public Collection<SAMFileHeader> getHeaders() {
|
||||
return this.headers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells whether this header merger contains a given SAM file header. Note that header presence
|
||||
* is confirmed / blocked by == equality, rather than actually testing SAMFileHeader.equals(), for
|
||||
* reasons of performance.
|
||||
* @param header header to check for.
|
||||
* @return True if the header exists in this HeaderMerger. False otherwise.
|
||||
*/
|
||||
boolean containsHeader(SAMFileHeader header) {
|
||||
for(SAMFileHeader headerMergerHeader: headers) {
|
||||
if(headerMergerHeader == header)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* returns the new mapping for a specified reader, given it's old sequence index
|
||||
* @param reader the reader
|
||||
* @param oldReferenceSequenceIndex the old sequence (also called reference) index
|
||||
* @return the new index value
|
||||
* @deprecated replaced by getMergedSequenceIndex(SAMFileHeader, Integer)
|
||||
*/
|
||||
public Integer getMergedSequenceIndex(SAMFileReader reader, Integer oldReferenceSequenceIndex) {
|
||||
return this.getMergedSequenceIndex(reader.getFileHeader(), oldReferenceSequenceIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Another mechanism for getting the new sequence index, for situations in which the reader is not available.
|
||||
* Note that if the SAMRecord has already had its header replaced with the merged header, this won't work.
|
||||
* @param header The original header for the input record in question.
|
||||
* @param oldReferenceSequenceIndex The original sequence index.
|
||||
* @return the new index value that is compatible with the merged sequence index.
|
||||
*/
|
||||
public Integer getMergedSequenceIndex(final SAMFileHeader header, Integer oldReferenceSequenceIndex) {
|
||||
final Map<Integer, Integer> mapping = this.samSeqDictionaryIdTranslationViaHeader.get(header);
|
||||
if (mapping == null) {
|
||||
throw new PicardException("No sequence dictionary mapping available for header: " + header);
|
||||
}
|
||||
|
||||
final Integer newIndex = mapping.get(oldReferenceSequenceIndex);
|
||||
if (newIndex == null) {
|
||||
throw new PicardException("No mapping for reference index " + oldReferenceSequenceIndex + " from header: " + header);
|
||||
}
|
||||
|
||||
return newIndex;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Implementations of this interface are used by mergeHeaderRecords(..) to instantiate
|
||||
* specific subclasses of AbstractSAMHeaderRecord.
|
||||
*/
|
||||
private static interface HeaderRecordFactory<RecordType extends AbstractSAMHeaderRecord> {
|
||||
|
||||
/**
|
||||
* Constructs a new instance of RecordType.
|
||||
* @param id The id of the new record.
|
||||
* @param srcRecord Except for the id, the new record will be a copy of this source record.
|
||||
*/
|
||||
public RecordType createRecord(final String id, RecordType srcRecord);
|
||||
}
|
||||
|
||||
/**
|
||||
* Struct that groups together a subclass of AbstractSAMHeaderRecord with the
|
||||
* SAMFileHeader that it came from.
|
||||
*/
|
||||
private static class HeaderRecordAndFileHeader<RecordType extends AbstractSAMHeaderRecord> {
|
||||
private RecordType headerRecord;
|
||||
private SAMFileHeader samFileHeader;
|
||||
|
||||
public HeaderRecordAndFileHeader(RecordType headerRecord, SAMFileHeader samFileHeader) {
|
||||
this.headerRecord = headerRecord;
|
||||
this.samFileHeader = samFileHeader;
|
||||
}
|
||||
|
||||
public RecordType getHeaderRecord() {
|
||||
return headerRecord;
|
||||
}
|
||||
public SAMFileHeader getFileHeader() {
|
||||
return samFileHeader;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -34,5 +34,6 @@ import java.lang.annotation.*;
|
|||
@Retention(RetentionPolicy.RUNTIME)
|
||||
@Target({ElementType.FIELD})
|
||||
public @interface Gather {
|
||||
Class value();
|
||||
Class value() default Gather.class;
|
||||
boolean enabled() default true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -443,7 +443,7 @@ public class GenomeAnalysisEngine {
|
|||
if(!readsDataSource.hasIndex() && intervals != null && !argCollection.allowIntervalsWithUnindexedBAM)
|
||||
throw new UserException.CommandLineException("Cannot perform interval processing when reads are present but no index is available.");
|
||||
|
||||
if(walker instanceof LocusWalker) {
|
||||
if(walker instanceof LocusWalker || walker instanceof ActiveRegionWalker) {
|
||||
if (readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.coordinate)
|
||||
throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.coordinate, "Locus walkers can only traverse coordinate-sorted data. Please resort your input BAM file(s) or set the Sort Order tag in the header appropriately.");
|
||||
if(intervals == null)
|
||||
|
|
|
|||
|
|
@ -256,7 +256,7 @@ public class BAMScheduler implements Iterator<FilePointer> {
|
|||
// Naive algorithm: find all elements in current contig for proper schedule creation.
|
||||
List<GenomeLoc> lociInContig = new LinkedList<GenomeLoc>();
|
||||
for(GenomeLoc locus: loci) {
|
||||
if(dataSource.getHeader().getSequence(locus.getContig()).getSequenceIndex() == lastReferenceSequenceLoaded)
|
||||
if(!GenomeLoc.isUnmapped(locus) && dataSource.getHeader().getSequence(locus.getContig()).getSequenceIndex() == lastReferenceSequenceLoaded)
|
||||
lociInContig.add(locus);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -31,13 +31,13 @@ import net.sf.samtools.util.BlockCompressedFilePointerUtil;
|
|||
import net.sf.samtools.util.BlockCompressedInputStream;
|
||||
import net.sf.samtools.util.RuntimeEOFException;
|
||||
import net.sf.samtools.util.SeekableStream;
|
||||
import org.broad.tribble.util.BlockCompressedStreamConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
|
||||
/**
|
||||
|
|
@ -120,6 +120,12 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
|
|||
// TODO: Kill the region when all we want to do is start at the beginning of the stream and run to the end of the stream.
|
||||
this.position = new SAMReaderPosition(reader,this,new GATKBAMFileSpan(new GATKChunk(0,Long.MAX_VALUE)));
|
||||
|
||||
// The block offsets / block positions guarantee that the ending offset/position in the data structure maps to
|
||||
// the point in the file just following the last read. These two arrays should never be empty; initializing
|
||||
// to 0 to match the position above.
|
||||
this.blockOffsets.add(0);
|
||||
this.blockPositions.add(0L);
|
||||
|
||||
try {
|
||||
if(validate) {
|
||||
System.out.printf("BlockInputStream %s: BGZF block validation mode activated%n",this);
|
||||
|
|
@ -143,34 +149,52 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
|
|||
public long getFilePointer() {
|
||||
long filePointer;
|
||||
synchronized(lock) {
|
||||
if(buffer.remaining() > 0) {
|
||||
// If there's data in the buffer, figure out from whence it came.
|
||||
final long blockAddress = blockPositions.size() > 0 ? blockPositions.get(0) : 0;
|
||||
final int blockOffset = buffer.position();
|
||||
filePointer = blockAddress << 16 | blockOffset;
|
||||
}
|
||||
else {
|
||||
// Otherwise, find the next position to load.
|
||||
filePointer = position.getBlockAddress() << 16;
|
||||
}
|
||||
// Find the current block within the input stream.
|
||||
int blockIndex;
|
||||
for(blockIndex = 0; blockIndex+1 < blockOffsets.size() && buffer.position() >= blockOffsets.get(blockIndex + 1); blockIndex++)
|
||||
;
|
||||
filePointer = blockPositions.get(blockIndex) + (buffer.position()-blockOffsets.get(blockIndex));
|
||||
}
|
||||
|
||||
if(validatingInputStream != null && filePointer != validatingInputStream.getFilePointer())
|
||||
throw new ReviewedStingException(String.format("Position of input stream is invalid; expected (block address, block offset) = (%d,%d), got (%d,%d)",
|
||||
BlockCompressedFilePointerUtil.getBlockAddress(filePointer),BlockCompressedFilePointerUtil.getBlockOffset(filePointer),
|
||||
BlockCompressedFilePointerUtil.getBlockAddress(validatingInputStream.getFilePointer()),BlockCompressedFilePointerUtil.getBlockOffset(validatingInputStream.getFilePointer())));
|
||||
// if(validatingInputStream != null && filePointer != validatingInputStream.getFilePointer())
|
||||
// throw new ReviewedStingException(String.format("Position of input stream is invalid; expected (block address, block offset) = (%d,%d), got (%d,%d)",
|
||||
// BlockCompressedFilePointerUtil.getBlockAddress(validatingInputStream.getFilePointer()),BlockCompressedFilePointerUtil.getBlockOffset(validatingInputStream.getFilePointer()),
|
||||
// BlockCompressedFilePointerUtil.getBlockAddress(filePointer),BlockCompressedFilePointerUtil.getBlockOffset(filePointer)));
|
||||
|
||||
return filePointer;
|
||||
}
|
||||
|
||||
public void seek(long target) {
|
||||
// TODO: Validate the seek point.
|
||||
//System.out.printf("Thread %s, BlockInputStream %s: seeking to block %d, offset %d%n",Thread.currentThread().getId(),this,BlockCompressedFilePointerUtil.getBlockAddress(target),BlockCompressedFilePointerUtil.getBlockOffset(target));
|
||||
synchronized(lock) {
|
||||
clearBuffers();
|
||||
position.advancePosition(BlockCompressedFilePointerUtil.getBlockAddress(target));
|
||||
waitForBufferFill();
|
||||
buffer.position(BlockCompressedFilePointerUtil.getBlockOffset(target));
|
||||
|
||||
// Ensure that the position filled in by submitAccessPlan() is in sync with the seek target just specified.
|
||||
position.advancePosition(target);
|
||||
|
||||
// If the position advances past the end of the target, that must mean that we seeked to a point at the end
|
||||
// of one of the chunk list's subregions. Make a note of our current position and punt on loading any data.
|
||||
if(target < position.getBlockAddress() << 16) {
|
||||
blockOffsets.clear();
|
||||
blockOffsets.add(0);
|
||||
blockPositions.clear();
|
||||
blockPositions.add(target);
|
||||
}
|
||||
else {
|
||||
waitForBufferFill();
|
||||
// A buffer fill will load the relevant data from the shard, but the buffer position still needs to be
|
||||
// advanced as appropriate.
|
||||
Iterator<Integer> blockOffsetIterator = blockOffsets.descendingIterator();
|
||||
Iterator<Long> blockPositionIterator = blockPositions.descendingIterator();
|
||||
while(blockOffsetIterator.hasNext() && blockPositionIterator.hasNext()) {
|
||||
final int blockOffset = blockOffsetIterator.next();
|
||||
final long blockPosition = blockPositionIterator.next();
|
||||
if((blockPosition >> 16) == (target >> 16) && (blockPosition&0xFFFF) < (target&0xFFFF)) {
|
||||
buffer.position(blockOffset + (int)(target&0xFFFF)-(int)(blockPosition&0xFFFF));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(validatingInputStream != null) {
|
||||
try {
|
||||
|
|
@ -191,14 +215,17 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
|
|||
buffer.clear();
|
||||
buffer.limit(0);
|
||||
|
||||
// Clear everything except the last block offset / position
|
||||
blockOffsets.clear();
|
||||
blockPositions.clear();
|
||||
blockOffsets.add(0);
|
||||
while(blockPositions.size() > 1)
|
||||
blockPositions.removeFirst();
|
||||
}
|
||||
|
||||
public boolean eof() {
|
||||
synchronized(lock) {
|
||||
// TODO: Handle multiple empty BGZF blocks at end of the file.
|
||||
return position != null && position.getBlockAddress() >= length;
|
||||
return position != null && (position.getBlockAddress() < 0 || position.getBlockAddress() >= length);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -216,7 +243,7 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
|
|||
// Assume that the access plan is going to tell us to start where we are and move forward.
|
||||
// If this isn't the case, we'll soon receive a seek request and the buffer will be forced to reset.
|
||||
if(this.position != null && position.getBlockAddress() < this.position.getBlockAddress())
|
||||
position.advancePosition(this.position.getBlockAddress());
|
||||
position.advancePosition(this.position.getBlockAddress() << 16);
|
||||
}
|
||||
this.position = position;
|
||||
}
|
||||
|
|
@ -225,14 +252,15 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
|
|||
// Compact buffer to maximize storage space.
|
||||
int bytesToRemove = 0;
|
||||
|
||||
// Look ahead to see if we can compact away the first block in the series.
|
||||
while(blockOffsets.size() > 1 && buffer.position() < blockOffsets.get(1)) {
|
||||
bytesToRemove += blockOffsets.remove();
|
||||
// Look ahead to see if we can compact away the first blocks in the series.
|
||||
while(blockOffsets.size() > 1 && buffer.position() >= blockOffsets.get(1)) {
|
||||
blockOffsets.remove();
|
||||
blockPositions.remove();
|
||||
bytesToRemove = blockOffsets.peek();
|
||||
}
|
||||
|
||||
// If we end up with an empty block at the end of the series, compact this as well.
|
||||
if(buffer.remaining() == 0 && !blockOffsets.isEmpty() && buffer.position() >= blockOffsets.peek()) {
|
||||
if(buffer.remaining() == 0 && blockOffsets.size() > 1 && buffer.position() >= blockOffsets.peek()) {
|
||||
bytesToRemove += buffer.position();
|
||||
blockOffsets.remove();
|
||||
blockPositions.remove();
|
||||
|
|
@ -241,11 +269,17 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
|
|||
int finalBufferStart = buffer.position() - bytesToRemove;
|
||||
int finalBufferSize = buffer.remaining();
|
||||
|
||||
// Position the buffer to remove the unneeded data, and compact it away.
|
||||
buffer.position(bytesToRemove);
|
||||
buffer.compact();
|
||||
|
||||
// Reset the limits for reading.
|
||||
buffer.position(finalBufferStart);
|
||||
buffer.limit(finalBufferStart+finalBufferSize);
|
||||
|
||||
// Shift everything in the offset buffer down to accommodate the bytes removed from the buffer.
|
||||
for(int i = 0; i < blockOffsets.size(); i++)
|
||||
blockOffsets.set(i,blockOffsets.get(i)-bytesToRemove);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -253,6 +287,7 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
|
|||
* MUST be called from a thread that is NOT the reader thread.
|
||||
* @param incomingBuffer The data being pushed into this input stream.
|
||||
* @param position target position for the data.
|
||||
* @param filePosition the current position of the file pointer
|
||||
*/
|
||||
public void copyIntoBuffer(final ByteBuffer incomingBuffer, final SAMReaderPosition position, final long filePosition) {
|
||||
synchronized(lock) {
|
||||
|
|
@ -262,7 +297,12 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
|
|||
buffer.limit(buffer.capacity());
|
||||
|
||||
// Advance the position to take the most recent read into account.
|
||||
long lastReadPosition = position.getBlockAddress();
|
||||
final long lastBlockAddress = position.getBlockAddress();
|
||||
final int blockOffsetStart = position.getFirstOffsetInBlock();
|
||||
final int blockOffsetEnd = position.getLastOffsetInBlock();
|
||||
|
||||
// Where did this read end? It either ended in the middle of a block (for a bounding chunk) or it ended at the start of the next block.
|
||||
final long endOfRead = (blockOffsetEnd < incomingBuffer.remaining()) ? (lastBlockAddress << 16) | blockOffsetEnd : filePosition << 16;
|
||||
|
||||
byte[] validBytes = null;
|
||||
if(validatingInputStream != null) {
|
||||
|
|
@ -277,7 +317,7 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
|
|||
incomingBuffer.position(pos);
|
||||
|
||||
long currentFilePointer = validatingInputStream.getFilePointer();
|
||||
validatingInputStream.seek(lastReadPosition << 16);
|
||||
validatingInputStream.seek(lastBlockAddress << 16);
|
||||
validatingInputStream.read(validBytes);
|
||||
validatingInputStream.seek(currentFilePointer);
|
||||
|
||||
|
|
@ -286,7 +326,7 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
|
|||
}
|
||||
|
||||
this.position = position;
|
||||
position.advancePosition(filePosition);
|
||||
position.advancePosition(filePosition << 16);
|
||||
|
||||
if(buffer.remaining() < incomingBuffer.remaining()) {
|
||||
//System.out.printf("Thread %s: waiting for available space in buffer; buffer remaining = %d, incoming buffer remaining = %d%n",Thread.currentThread().getId(),buffer.remaining(),incomingBuffer.remaining());
|
||||
|
|
@ -294,12 +334,21 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
|
|||
//System.out.printf("Thread %s: waited for available space in buffer; buffer remaining = %d, incoming buffer remaining = %d%n", Thread.currentThread().getId(), buffer.remaining(), incomingBuffer.remaining());
|
||||
}
|
||||
|
||||
// Queue list of block offsets / block positions.
|
||||
// Remove the last position in the list and add in the last read position, in case the two are different.
|
||||
blockOffsets.removeLast();
|
||||
blockOffsets.add(buffer.position());
|
||||
blockPositions.add(lastReadPosition);
|
||||
blockPositions.removeLast();
|
||||
blockPositions.add(lastBlockAddress << 16 | blockOffsetStart);
|
||||
|
||||
// Stream the buffer into the data stream.
|
||||
incomingBuffer.position(blockOffsetStart);
|
||||
incomingBuffer.limit(Math.min(incomingBuffer.limit(),blockOffsetEnd));
|
||||
buffer.put(incomingBuffer);
|
||||
|
||||
// Then, add the last position read to the very end of the list, just past the end of the last buffer.
|
||||
blockOffsets.add(buffer.position());
|
||||
blockPositions.add(endOfRead);
|
||||
|
||||
// Set up the buffer for reading.
|
||||
buffer.flip();
|
||||
bufferFilled = true;
|
||||
|
|
@ -380,23 +429,21 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
|
|||
lock.notify();
|
||||
}
|
||||
|
||||
if(validatingInputStream != null) {
|
||||
byte[] validBytes = new byte[length];
|
||||
try {
|
||||
validatingInputStream.read(validBytes,offset,length);
|
||||
for(int i = offset; i < offset+length; i++) {
|
||||
if(bytes[i] != validBytes[i]) {
|
||||
System.out.printf("Thread %s: preparing to throw an exception because contents don't match%n",Thread.currentThread().getId());
|
||||
throw new ReviewedStingException(String.format("Thread %s: blockInputStream %s attempting to return wrong set of bytes; mismatch at offset %d",Thread.currentThread().getId(),this,i));
|
||||
}
|
||||
}
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedStingException("Unable to validate against Picard input stream",ex);
|
||||
}
|
||||
}
|
||||
// if(validatingInputStream != null) {
|
||||
// byte[] validBytes = new byte[length];
|
||||
// try {
|
||||
// validatingInputStream.read(validBytes,offset,length);
|
||||
// for(int i = offset; i < offset+length; i++) {
|
||||
// if(bytes[i] != validBytes[i])
|
||||
// throw new ReviewedStingException(String.format("Thread %s: blockInputStream %s attempting to return wrong set of bytes; mismatch at offset %d",Thread.currentThread().getId(),this,i));
|
||||
// }
|
||||
// }
|
||||
// catch(IOException ex) {
|
||||
// throw new ReviewedStingException("Unable to validate against Picard input stream",ex);
|
||||
// }
|
||||
// }
|
||||
|
||||
return length - remaining;
|
||||
return eof() ? -1 : length-remaining;
|
||||
}
|
||||
|
||||
public void close() {
|
||||
|
|
@ -424,7 +471,6 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
|
|||
lock.wait();
|
||||
}
|
||||
catch(InterruptedException ex) {
|
||||
// TODO: handle me.
|
||||
throw new ReviewedStingException("Interrupt occurred waiting for buffer to fill",ex);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -88,7 +88,7 @@ public class GATKBAMIndex {
|
|||
seek(0);
|
||||
final byte[] buffer = readBytes(4);
|
||||
if (!Arrays.equals(buffer, BAM_INDEX_MAGIC)) {
|
||||
throw new RuntimeException("Invalid file header in BAM index " + mFile +
|
||||
throw new ReviewedStingException("Invalid file header in BAM index " + mFile +
|
||||
": " + new String(buffer));
|
||||
}
|
||||
|
||||
|
|
@ -112,7 +112,7 @@ public class GATKBAMIndex {
|
|||
openIndexFile();
|
||||
|
||||
if (referenceSequence >= sequenceCount)
|
||||
throw new ReviewedStingException("Invalid sequence number " + referenceSequence);
|
||||
throw new ReviewedStingException("Invalid sequence number " + referenceSequence + " in index file " + mFile);
|
||||
|
||||
skipToSequence(referenceSequence);
|
||||
|
||||
|
|
@ -183,12 +183,12 @@ public class GATKBAMIndex {
|
|||
public int getLevelForBin(final Bin bin) {
|
||||
GATKBin gatkBin = new GATKBin(bin);
|
||||
if(gatkBin.getBinNumber() >= MAX_BINS)
|
||||
throw new SAMException("Tried to get level for invalid bin.");
|
||||
throw new ReviewedStingException("Tried to get level for invalid bin in index file " + mFile);
|
||||
for(int i = getNumIndexLevels()-1; i >= 0; i--) {
|
||||
if(gatkBin.getBinNumber() >= LEVEL_STARTS[i])
|
||||
return i;
|
||||
}
|
||||
throw new SAMException("Unable to find correct bin for bin "+bin);
|
||||
throw new ReviewedStingException("Unable to find correct bin for bin " + bin + " in index file " + mFile);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -352,7 +352,7 @@ public class GATKBAMIndex {
|
|||
fileChannel.read(buffer);
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedStingException("Index: unable to read bytes from index file.");
|
||||
throw new ReviewedStingException("Index: unable to read bytes from index file " + mFile);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -379,7 +379,7 @@ public class GATKBAMIndex {
|
|||
fileChannel.position(fileChannel.position() + count);
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedStingException("Index: unable to reposition file channel.");
|
||||
throw new ReviewedStingException("Index: unable to reposition file channel of index file " + mFile);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -388,7 +388,7 @@ public class GATKBAMIndex {
|
|||
fileChannel.position(position);
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedStingException("Index: unable to reposition of file channel.");
|
||||
throw new ReviewedStingException("Index: unable to reposition of file channel of index file " + mFile);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -556,7 +556,7 @@ public class SAMDataSource {
|
|||
*/
|
||||
private StingSAMIterator getIterator(SAMReaders readers, Shard shard, boolean enableVerification) {
|
||||
// Set up merging to dynamically merge together multiple BAMs.
|
||||
MergingSamRecordIterator mergingIterator = readers.createMergingIterator();
|
||||
Map<SAMFileReader,CloseableIterator<SAMRecord>> iteratorMap = new HashMap<SAMFileReader,CloseableIterator<SAMRecord>>();
|
||||
|
||||
for(SAMReaderID id: getReaderIDs()) {
|
||||
CloseableIterator<SAMRecord> iterator = null;
|
||||
|
|
@ -573,9 +573,13 @@ public class SAMDataSource {
|
|||
iterator = readers.getReader(id).iterator(shard.getFileSpans().get(id));
|
||||
if(shard.getGenomeLocs().size() > 0)
|
||||
iterator = new IntervalOverlapFilteringIterator(iterator,shard.getGenomeLocs());
|
||||
mergingIterator.addIterator(readers.getReader(id),iterator);
|
||||
iteratorMap.put(readers.getReader(id), iterator);
|
||||
}
|
||||
|
||||
MergingSamRecordIterator mergingIterator = readers.createMergingIterator(iteratorMap);
|
||||
|
||||
|
||||
|
||||
return applyDecoratingIterators(shard.getReadMetrics(),
|
||||
enableVerification,
|
||||
readProperties.useOriginalBaseQualities(),
|
||||
|
|
@ -847,8 +851,13 @@ public class SAMDataSource {
|
|||
return headerMerger.getReadGroupId(header,originalReadGroupID);
|
||||
}
|
||||
|
||||
public MergingSamRecordIterator createMergingIterator() {
|
||||
return new MergingSamRecordIterator(headerMerger,readers.values(),true);
|
||||
/**
|
||||
* Creates a new merging iterator from the given map, with the given header.
|
||||
* @param iteratorMap A map of readers to iterators.
|
||||
* @return An iterator which will merge those individual iterators.
|
||||
*/
|
||||
public MergingSamRecordIterator createMergingIterator(final Map<SAMFileReader,CloseableIterator<SAMRecord>> iteratorMap) {
|
||||
return new MergingSamRecordIterator(headerMerger,iteratorMap,true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -75,6 +75,22 @@ class SAMReaderPosition {
|
|||
return nextBlockAddress;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the first offset of interest in the block returned by getBlockAddress().
|
||||
* @return First block of interest in this segment.
|
||||
*/
|
||||
public int getFirstOffsetInBlock() {
|
||||
return (nextBlockAddress == positionIterator.peek().getBlockStart()) ? positionIterator.peek().getBlockOffsetStart() : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves the last offset of interest in the block returned by getBlockAddress().
|
||||
* @return First block of interest in this segment.
|
||||
*/
|
||||
public int getLastOffsetInBlock() {
|
||||
return (nextBlockAddress == positionIterator.peek().getBlockEnd()) ? positionIterator.peek().getBlockOffsetEnd() : 65536;
|
||||
}
|
||||
|
||||
public void reset() {
|
||||
initialize();
|
||||
}
|
||||
|
|
@ -95,26 +111,27 @@ class SAMReaderPosition {
|
|||
* @param filePosition The current position within the file.
|
||||
*/
|
||||
void advancePosition(final long filePosition) {
|
||||
nextBlockAddress = filePosition;
|
||||
nextBlockAddress = filePosition >> 16;
|
||||
|
||||
// Check the current file position against the iterator; if the iterator is before the current file position,
|
||||
// draw the iterator forward. Remember when performing the check that coordinates are half-open!
|
||||
try {
|
||||
while(positionIterator.hasNext() && isFilePositionPastEndOfChunk(filePosition,positionIterator.peek())) {
|
||||
positionIterator.next();
|
||||
// Check to see if the iterator has more data available.
|
||||
if(positionIterator.hasNext() && filePosition < positionIterator.peek().getBlockStart()) {
|
||||
nextBlockAddress = positionIterator.peek().getBlockStart();
|
||||
break;
|
||||
}
|
||||
while(positionIterator.hasNext() && isFilePositionPastEndOfChunk(filePosition,positionIterator.peek())) {
|
||||
positionIterator.next();
|
||||
|
||||
// If the block iterator has shot past the file pointer, bring the file pointer flush with the start of the current block.
|
||||
if(positionIterator.hasNext() && filePosition < positionIterator.peek().getChunkStart()) {
|
||||
nextBlockAddress = positionIterator.peek().getBlockStart();
|
||||
//System.out.printf("SAMReaderPosition: next block address advanced to %d%n",nextBlockAddress);
|
||||
break;
|
||||
}
|
||||
}
|
||||
catch(Exception ex) {
|
||||
throw new ReviewedStingException("");
|
||||
}
|
||||
|
||||
// If we've shot off the end of the block pointer, notify consumers that iteration is complete.
|
||||
if(!positionIterator.hasNext())
|
||||
nextBlockAddress = -1;
|
||||
}
|
||||
|
||||
private boolean isFilePositionPastEndOfChunk(final long filePosition, final GATKChunk chunk) {
|
||||
return (filePosition > chunk.getBlockEnd() || (filePosition == chunk.getBlockEnd() && chunk.getBlockOffsetEnd() == 0));
|
||||
return filePosition >= chunk.getChunkEnd();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@ import org.broadinstitute.sting.gatk.io.ThreadLocalOutputTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.threading.ThreadPoolMonitor;
|
||||
|
||||
import java.util.Collection;
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
|||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
|
||||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraverseActiveRegions;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
|
|
@ -55,7 +56,6 @@ public class LinearMicroScheduler extends MicroScheduler {
|
|||
|
||||
traversalEngine.startTimersIfNecessary();
|
||||
if(shard.getShardType() == Shard.ShardType.LOCUS) {
|
||||
LocusWalker lWalker = (LocusWalker)walker;
|
||||
WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(),
|
||||
getReadIterator(shard), shard.getGenomeLocs(), SampleUtils.getSAMFileSamples(engine));
|
||||
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
|
||||
|
|
@ -77,6 +77,12 @@ public class LinearMicroScheduler extends MicroScheduler {
|
|||
done = walker.isDone();
|
||||
}
|
||||
|
||||
// Special function call to empty out the work queue. Ugly for now but will be cleaned up when we push this functionality more into the engine
|
||||
if( traversalEngine instanceof TraverseActiveRegions ) {
|
||||
final Object result = ((TraverseActiveRegions) traversalEngine).endTraversal(walker, accumulator.getReduceInit());
|
||||
accumulator.accumulate(null, result); // Assumes only used with StandardAccumulator
|
||||
}
|
||||
|
||||
Object result = accumulator.finishTraversal();
|
||||
|
||||
printOnTraversalDone(result);
|
||||
|
|
|
|||
|
|
@ -128,6 +128,8 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
|
|||
traversalEngine = new TraverseDuplicates();
|
||||
} else if (walker instanceof ReadPairWalker) {
|
||||
traversalEngine = new TraverseReadPairs();
|
||||
} else if (walker instanceof ActiveRegionWalker) {
|
||||
traversalEngine = new TraverseActiveRegions();
|
||||
} else {
|
||||
throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type.");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,8 +27,8 @@ import java.util.concurrent.Future;
|
|||
public class TreeReducer implements Callable {
|
||||
private HierarchicalMicroScheduler microScheduler;
|
||||
private TreeReducible walker;
|
||||
private final Future lhs;
|
||||
private final Future rhs;
|
||||
private Future lhs;
|
||||
private Future rhs;
|
||||
|
||||
/**
|
||||
* Create a one-sided reduce. Result will be a simple pass-through of the result.
|
||||
|
|
@ -99,6 +99,10 @@ public class TreeReducer implements Callable {
|
|||
|
||||
long endTime = System.currentTimeMillis();
|
||||
|
||||
// Constituent bits of this tree reduces are no longer required. Throw them away.
|
||||
this.lhs = null;
|
||||
this.rhs = null;
|
||||
|
||||
microScheduler.reportTreeReduceTime( endTime - startTime );
|
||||
|
||||
return result;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,213 @@
|
|||
package org.broadinstitute.sting.gatk.traversals;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.Queue;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: 12/9/11
|
||||
*/
|
||||
|
||||
public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegionWalker<M,T>,LocusShardDataProvider> {
|
||||
/**
|
||||
* our log, which we want to capture anything from this class
|
||||
*/
|
||||
protected static Logger logger = Logger.getLogger(TraversalEngine.class);
|
||||
|
||||
private final Queue<ActiveRegion> workQueue = new LinkedList<ActiveRegion>();
|
||||
private final LinkedHashSet<SAMRecord> myReads = new LinkedHashSet<SAMRecord>();
|
||||
|
||||
@Override
|
||||
protected String getTraversalType() {
|
||||
return "active regions";
|
||||
}
|
||||
|
||||
@Override
|
||||
public T traverse( final ActiveRegionWalker<M,T> walker,
|
||||
final LocusShardDataProvider dataProvider,
|
||||
T sum) {
|
||||
logger.debug(String.format("TraverseActiveRegion.traverse: Shard is %s", dataProvider));
|
||||
|
||||
LocusView locusView = getLocusView( walker, dataProvider );
|
||||
|
||||
int minStart = Integer.MAX_VALUE;
|
||||
final LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
|
||||
|
||||
if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all
|
||||
|
||||
final ArrayList<ActiveRegion> isActiveList = new ArrayList<ActiveRegion>();
|
||||
|
||||
//ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider );
|
||||
ReferenceOrderedView referenceOrderedDataView = null;
|
||||
if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA )
|
||||
referenceOrderedDataView = new ManagingReferenceOrderedView( dataProvider );
|
||||
else
|
||||
referenceOrderedDataView = (RodLocusView)locusView;
|
||||
|
||||
// We keep processing while the next reference location is within the interval
|
||||
while( locusView.hasNext() ) {
|
||||
final AlignmentContext locus = locusView.next();
|
||||
GenomeLoc location = locus.getLocation();
|
||||
|
||||
dataProvider.getShard().getReadMetrics().incrementNumIterations();
|
||||
|
||||
if ( locus.hasExtendedEventPileup() ) {
|
||||
// if the alignment context we received holds an "extended" pileup (i.e. pileup of insertions/deletions
|
||||
// associated with the current site), we need to update the location. The updated location still starts
|
||||
// at the current genomic position, but it has to span the length of the longest deletion (if any).
|
||||
location = engine.getGenomeLocParser().setStop(location,location.getStop()+locus.getExtendedEventPileup().getMaxDeletionLength());
|
||||
|
||||
// it is possible that the new expanded location spans the current shard boundary; the next method ensures
|
||||
// that when it is the case, the reference sequence held by the ReferenceView will be reloaded so that
|
||||
// the view has all the bases we are gonna need. If the location fits within the current view bounds,
|
||||
// the next call will not do anything to the view:
|
||||
referenceView.expandBoundsToAccomodateLoc(location);
|
||||
}
|
||||
|
||||
// create reference context. Note that if we have a pileup of "extended events", the context will
|
||||
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
|
||||
final ReferenceContext refContext = referenceView.getReferenceContext(location);
|
||||
|
||||
// Iterate forward to get all reference ordered data covering this location
|
||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
|
||||
|
||||
// Call the walkers isActive function for this locus and add them to the list to be integrated later
|
||||
final boolean isActive = walker.isActive( tracker, refContext, locus );
|
||||
isActiveList.add( new ActiveRegion(location, isActive, engine.getGenomeLocParser()) );
|
||||
|
||||
// Grab all the previously unseen reads from this pileup and add them to the massive read list
|
||||
for( final PileupElement p : locus.getBasePileup() ) {
|
||||
final SAMRecord read = p.getRead();
|
||||
if( !myReads.contains(read) ) {
|
||||
myReads.add(read);
|
||||
}
|
||||
}
|
||||
|
||||
// If this is the last pileup for this shard then need to calculate the minimum alignment start so that
|
||||
// we know which active regions in the work queue are now safe to process
|
||||
if( !locusView.hasNext() ) {
|
||||
for( final PileupElement p : locus.getBasePileup() ) {
|
||||
final SAMRecord read = p.getRead();
|
||||
if( read.getAlignmentStart() < minStart ) { minStart = read.getAlignmentStart(); }
|
||||
}
|
||||
}
|
||||
printProgress(dataProvider.getShard(),locus.getLocation());
|
||||
}
|
||||
|
||||
// Take the individual isActive calls and integrate them into contiguous active regions and
|
||||
// add these blocks of work to the work queue
|
||||
final ArrayList<ActiveRegion> activeRegions = integrateActiveList( isActiveList );
|
||||
logger.debug("Integrated " + isActiveList.size() + " isActive calls into " + activeRegions.size() + " regions." );
|
||||
workQueue.addAll( activeRegions );
|
||||
}
|
||||
|
||||
while( workQueue.peek().getLocation().getStop() < minStart ) {
|
||||
final ActiveRegion activeRegion = workQueue.remove();
|
||||
sum = processActiveRegion( activeRegion, myReads, workQueue, sum, walker );
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
// Special function called in LinearMicroScheduler to empty out the work queue. Ugly for now but will be cleaned up when we push this functionality more into the engine
|
||||
public T endTraversal( final Walker<M,T> walker, T sum) {
|
||||
while( workQueue.peek() != null ) {
|
||||
final ActiveRegion activeRegion = workQueue.remove();
|
||||
sum = processActiveRegion( activeRegion, myReads, workQueue, sum, (ActiveRegionWalker<M,T>) walker );
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
private T processActiveRegion( final ActiveRegion activeRegion, final LinkedHashSet<SAMRecord> reads, final Queue<ActiveRegion> workQueue, final T sum, final ActiveRegionWalker<M,T> walker ) {
|
||||
final ArrayList<SAMRecord> placedReads = new ArrayList<SAMRecord>();
|
||||
for( final SAMRecord read : reads ) {
|
||||
final GenomeLoc readLoc = this.engine.getGenomeLocParser().createGenomeLoc( read );
|
||||
if( activeRegion.getLocation().overlapsP( readLoc ) ) {
|
||||
// The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region)
|
||||
long maxOverlap = activeRegion.getLocation().sizeOfOverlap( readLoc );
|
||||
ActiveRegion bestRegion = activeRegion;
|
||||
for( final ActiveRegion otherRegionToTest : workQueue ) {
|
||||
if( otherRegionToTest.getLocation().sizeOfOverlap(readLoc) >= maxOverlap ) {
|
||||
maxOverlap = otherRegionToTest.getLocation().sizeOfOverlap(readLoc);
|
||||
bestRegion = otherRegionToTest;
|
||||
}
|
||||
}
|
||||
bestRegion.add( (GATKSAMRecord) read, true );
|
||||
|
||||
// The read is also added to all other region in which it overlaps but marked as non-primary
|
||||
if( !bestRegion.equals(activeRegion) ) {
|
||||
activeRegion.add( (GATKSAMRecord) read, false );
|
||||
}
|
||||
for( final ActiveRegion otherRegionToTest : workQueue ) {
|
||||
if( !bestRegion.equals(otherRegionToTest) && otherRegionToTest.getLocation().overlapsP( readLoc ) ) {
|
||||
activeRegion.add( (GATKSAMRecord) read, false );
|
||||
}
|
||||
}
|
||||
placedReads.add( read );
|
||||
}
|
||||
}
|
||||
reads.removeAll( placedReads ); // remove all the reads which have been placed into their active region
|
||||
|
||||
logger.debug(">> Map call with " + activeRegion.getReads().size() + " " + (activeRegion.isActive ? "active" : "inactive") + " reads @ " + activeRegion.getLocation() + " with full extent: " + activeRegion.getReferenceLocation());
|
||||
final M x = walker.map( activeRegion, null ); // BUGBUG: tracker needs to be filled in and passed to the walker
|
||||
return walker.reduce( x, sum );
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the best view of loci for this walker given the available data.
|
||||
* @param walker walker to interrogate.
|
||||
* @param dataProvider Data which which to drive the locus view.
|
||||
* @return A view of the locus data, where one iteration of the locus view maps to one iteration of the traversal.
|
||||
*/
|
||||
private LocusView getLocusView( Walker<M,T> walker, LocusShardDataProvider dataProvider ) {
|
||||
DataSource dataSource = WalkerManager.getWalkerDataSource(walker);
|
||||
if( dataSource == DataSource.READS )
|
||||
return new CoveredLocusView(dataProvider);
|
||||
else if( dataSource == DataSource.REFERENCE ) //|| ! GenomeAnalysisEngine.instance.getArguments().enableRodWalkers )
|
||||
return new AllLocusView(dataProvider);
|
||||
else if( dataSource == DataSource.REFERENCE_ORDERED_DATA )
|
||||
return new RodLocusView(dataProvider);
|
||||
else
|
||||
throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource);
|
||||
}
|
||||
|
||||
// integrate active regions into contiguous chunks based on active status
|
||||
private ArrayList<ActiveRegion> integrateActiveList( final ArrayList<ActiveRegion> activeList ) {
|
||||
final ArrayList<ActiveRegion> returnList = new ArrayList<ActiveRegion>();
|
||||
ActiveRegion prevLocus = activeList.remove(0);
|
||||
ActiveRegion startLocus = prevLocus;
|
||||
for( final ActiveRegion thisLocus : activeList ) {
|
||||
if( prevLocus.isActive != thisLocus.isActive ) {
|
||||
returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(startLocus.getLocation().getContig(), startLocus.getLocation().getStart(), prevLocus.getLocation().getStart()),
|
||||
prevLocus.isActive, engine.getGenomeLocParser() ) );
|
||||
startLocus = thisLocus;
|
||||
}
|
||||
prevLocus = thisLocus;
|
||||
}
|
||||
// output the last region if necessary
|
||||
if( startLocus != prevLocus ) {
|
||||
returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(startLocus.getLocation().getContig(), startLocus.getLocation().getStart(), prevLocus.getLocation().getStart()),
|
||||
prevLocus.isActive, engine.getGenomeLocParser() ) );
|
||||
}
|
||||
return returnList;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,29 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: 12/7/11
|
||||
*/
|
||||
|
||||
@By(DataSource.READS)
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE_BASES})
|
||||
@PartitionBy(PartitionType.READ)
|
||||
public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
||||
// Do we actually want to operate on the context?
|
||||
public boolean filter(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
|
||||
return true; // We are keeping all the reads
|
||||
}
|
||||
|
||||
// Determine active status over the AlignmentContext
|
||||
public abstract boolean isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
|
||||
|
||||
// Map over the ActiveRegion
|
||||
public abstract MapType map(final ActiveRegion activeRegion, final ReadMetaDataTracker metaDataTracker);
|
||||
}
|
||||
|
|
@ -30,18 +30,19 @@ import net.sf.samtools.SAMReadGroupRecord;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Collection;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/**
|
||||
* Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear in the input file.
|
||||
*
|
||||
|
|
@ -70,12 +71,21 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
|||
* -I input2.bam \
|
||||
* --read_filter MappingQualityZero
|
||||
*
|
||||
* // Prints the first 2000 reads in the BAM file
|
||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||
* -R ref.fasta \
|
||||
* -T PrintReads \
|
||||
* -o output.bam \
|
||||
* -I input.bam \
|
||||
* -n 2000
|
||||
*
|
||||
* // Downsamples BAM file to 25%
|
||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||
* -R ref.fasta \
|
||||
* -T PrintReads \
|
||||
* -o output.bam \
|
||||
* -I input.bam \
|
||||
* -ds 0.25
|
||||
* </pre>
|
||||
*
|
||||
*/
|
||||
|
|
@ -95,9 +105,18 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
|||
@Argument(fullName = "platform", shortName = "platform", doc="Exclude all reads with this platform from the output", required = false)
|
||||
String platform = null;
|
||||
|
||||
/**
|
||||
* Only prints the first n reads of the file
|
||||
*/
|
||||
@Argument(fullName = "number", shortName = "n", doc="Print the first n reads from the file, discarding the rest", required = false)
|
||||
int nReadsToPrint = -1;
|
||||
|
||||
/**
|
||||
* Downsamples the bam file by the given ratio, printing only approximately the given percentage of reads. The downsampling is balanced (over the entire coverage)
|
||||
*/
|
||||
@Argument(fullName = "downsample_coverage", shortName = "ds", doc="Downsample BAM to desired coverage", required = false)
|
||||
public double downsampleRatio = 1.0;
|
||||
|
||||
/**
|
||||
* Only reads from samples listed in the provided file(s) will be included in the output.
|
||||
*/
|
||||
|
|
@ -112,6 +131,8 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
|||
|
||||
private TreeSet<String> samplesToChoose = new TreeSet<String>();
|
||||
private boolean SAMPLES_SPECIFIED = false;
|
||||
|
||||
Random random;
|
||||
|
||||
/**
|
||||
* The initialize function.
|
||||
|
|
@ -132,13 +153,15 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
|||
if(!samplesToChoose.isEmpty()) {
|
||||
SAMPLES_SPECIFIED = true;
|
||||
}
|
||||
|
||||
random = GenomeAnalysisEngine.getRandomGenerator();
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* The reads filter function.
|
||||
*
|
||||
* @param ref the reference bases that correspond to our read, if a reference was provided
|
||||
* @param ref the reference bases that correspond to our read, if a reference was provided
|
||||
* @param read the read itself, as a SAMRecord
|
||||
* @return true if the read passes the filter, false if it doesn't
|
||||
*/
|
||||
|
|
@ -177,13 +200,14 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
|||
nReadsToPrint--; // n > 0 means there are still reads to be printed.
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
// if downsample option is turned off (= 1) then don't waste time getting the next random number.
|
||||
return (downsampleRatio == 1 || random.nextDouble() < downsampleRatio);
|
||||
}
|
||||
|
||||
/**
|
||||
* The reads map function.
|
||||
*
|
||||
* @param ref the reference bases that correspond to our read, if a reference was provided
|
||||
* @param ref the reference bases that correspond to our read, if a reference was provided
|
||||
* @param read the read itself, as a SAMRecord
|
||||
* @return the read itself
|
||||
*/
|
||||
|
|
@ -194,6 +218,7 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
|||
/**
|
||||
* reduceInit is called once before any calls to the map function. We use it here to setup the output
|
||||
* bam file, if it was specified on the command line
|
||||
*
|
||||
* @return SAMFileWriter, set to the BAM output file if the command line option was set, null otherwise
|
||||
*/
|
||||
public SAMFileWriter reduceInit() {
|
||||
|
|
@ -202,7 +227,8 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
|||
|
||||
/**
|
||||
* given a read and a output location, reduce by emitting the read
|
||||
* @param read the read itself
|
||||
*
|
||||
* @param read the read itself
|
||||
* @param output the output source
|
||||
* @return the SAMFileWriter, so that the next reduce can emit to the same source
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ import java.util.*;
|
|||
|
||||
public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||
|
||||
private final static boolean DEBUG = false;
|
||||
// private final static boolean DEBUG = false;
|
||||
|
||||
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
||||
|
||||
|
|
@ -70,47 +70,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
return genotypeLikelihoods;
|
||||
}
|
||||
|
||||
|
||||
final static double approximateLog10SumLog10(double[] vals) {
|
||||
if ( vals.length < 2 )
|
||||
throw new ReviewedStingException("Passing array with fewer than 2 values when computing approximateLog10SumLog10");
|
||||
|
||||
double approx = approximateLog10SumLog10(vals[0], vals[1]);
|
||||
for ( int i = 2; i < vals.length; i++ )
|
||||
approx = approximateLog10SumLog10(approx, vals[i]);
|
||||
return approx;
|
||||
}
|
||||
|
||||
final static double approximateLog10SumLog10(double small, double big) {
|
||||
// make sure small is really the smaller value
|
||||
if ( small > big ) {
|
||||
final double t = big;
|
||||
big = small;
|
||||
small = t;
|
||||
}
|
||||
|
||||
if (small == Double.NEGATIVE_INFINITY || big == Double.NEGATIVE_INFINITY )
|
||||
return big;
|
||||
|
||||
if (big >= small + MathUtils.MAX_JACOBIAN_TOLERANCE)
|
||||
return big;
|
||||
|
||||
// OK, so |y-x| < tol: we use the following identity then:
|
||||
// we need to compute log10(10^x + 10^y)
|
||||
// By Jacobian logarithm identity, this is equal to
|
||||
// max(x,y) + log10(1+10^-abs(x-y))
|
||||
// we compute the second term as a table lookup
|
||||
// with integer quantization
|
||||
// we have pre-stored correction for 0,0.1,0.2,... 10.0
|
||||
//final int ind = (int)(((big-small)/JACOBIAN_LOG_TABLE_STEP)); // hard rounding
|
||||
int ind = (int)(Math.round((big-small)/MathUtils.JACOBIAN_LOG_TABLE_STEP)); // hard rounding
|
||||
|
||||
//double z =Math.log10(1+Math.pow(10.0,-diff));
|
||||
//System.out.format("x: %f, y:%f, app: %f, true: %f ind:%d\n",x,y,t2,z,ind);
|
||||
return big + MathUtils.jacobianLogTable[ind];
|
||||
}
|
||||
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
//
|
||||
// Multi-allelic implementation.
|
||||
|
|
@ -218,12 +177,18 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
ACqueue.add(zeroSet);
|
||||
indexesToACset.put(zeroSet.ACcounts, zeroSet);
|
||||
|
||||
// optimization: create the temporary storage for computing L(j,k) just once
|
||||
final int maxPossibleDependencies = numAlternateAlleles + (numAlternateAlleles * (numAlternateAlleles + 1) / 2) + 1;
|
||||
final double[][] tempLog10ConformationLikelihoods = new double[numSamples+1][maxPossibleDependencies];
|
||||
for ( int i = 0; i < maxPossibleDependencies; i++ )
|
||||
tempLog10ConformationLikelihoods[0][i] = Double.NEGATIVE_INFINITY;
|
||||
|
||||
// keep processing while we have AC conformations that need to be calculated
|
||||
double maxLog10L = Double.NEGATIVE_INFINITY;
|
||||
while ( !ACqueue.isEmpty() ) {
|
||||
// compute log10Likelihoods
|
||||
final ExactACset set = ACqueue.remove();
|
||||
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, preserveData, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result);
|
||||
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, preserveData, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result, tempLog10ConformationLikelihoods);
|
||||
|
||||
// adjust max likelihood seen if needed
|
||||
maxLog10L = Math.max(maxLog10L, log10LofKs);
|
||||
|
|
@ -238,20 +203,21 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
final Queue<ExactACset> ACqueue,
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset,
|
||||
final double[][] log10AlleleFrequencyPriors,
|
||||
final AlleleFrequencyCalculationResult result) {
|
||||
final AlleleFrequencyCalculationResult result,
|
||||
final double[][] tempLog10ConformationLikelihoods) {
|
||||
|
||||
if ( DEBUG )
|
||||
System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts);
|
||||
//if ( DEBUG )
|
||||
// System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts);
|
||||
|
||||
// compute the log10Likelihoods
|
||||
computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, result);
|
||||
computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, result, tempLog10ConformationLikelihoods);
|
||||
|
||||
// clean up memory
|
||||
if ( !preserveData ) {
|
||||
for ( ExactACcounts index : set.dependentACsetsToDelete ) {
|
||||
indexesToACset.put(index, null);
|
||||
if ( DEBUG )
|
||||
System.out.printf(" *** removing used set=%s after seeing final dependent set=%s%n", index, set.ACcounts);
|
||||
//if ( DEBUG )
|
||||
// System.out.printf(" *** removing used set=%s after seeing final dependent set=%s%n", index, set.ACcounts);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -259,8 +225,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
|
||||
// can we abort early because the log10Likelihoods are so small?
|
||||
if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
|
||||
if ( DEBUG )
|
||||
System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
|
||||
//if ( DEBUG )
|
||||
// System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
|
||||
|
||||
// no reason to keep this data around because nothing depends on it
|
||||
if ( !preserveData )
|
||||
|
|
@ -303,8 +269,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
// if the last dependent set was not at the back of the queue (i.e. not just added), then we need to iterate
|
||||
// over all the dependent sets to find the last one in the queue (otherwise it will be cleaned up too early)
|
||||
if ( !preserveData && lastSet == null ) {
|
||||
if ( DEBUG )
|
||||
System.out.printf(" *** iterating over dependent sets for set=%s%n", set.ACcounts);
|
||||
//if ( DEBUG )
|
||||
// System.out.printf(" *** iterating over dependent sets for set=%s%n", set.ACcounts);
|
||||
lastSet = determineLastDependentSetInQueue(set.ACcounts, ACqueue);
|
||||
}
|
||||
if ( lastSet != null )
|
||||
|
|
@ -350,7 +316,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
final ArrayList<double[]> genotypeLikelihoods,
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset,
|
||||
final double[][] log10AlleleFrequencyPriors,
|
||||
final AlleleFrequencyCalculationResult result) {
|
||||
final AlleleFrequencyCalculationResult result,
|
||||
final double[][] tempLog10ConformationLikelihoods) {
|
||||
|
||||
set.log10Likelihoods[0] = 0.0; // the zero case
|
||||
final int totalK = set.getACsum();
|
||||
|
|
@ -362,38 +329,41 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
}
|
||||
// k > 0 for at least one k
|
||||
else {
|
||||
// all possible likelihoods for a given cell from which to choose the max
|
||||
final int numPaths = set.ACsetIndexToPLIndex.size() + 1;
|
||||
final double[] log10ConformationLikelihoods = new double[numPaths]; // TODO can be created just once, since you initialize it
|
||||
// deal with the non-AA possible conformations
|
||||
int conformationIndex = 1;
|
||||
for ( Map.Entry<ExactACcounts, Integer> mapping : set.ACsetIndexToPLIndex.entrySet() ) {
|
||||
//if ( DEBUG )
|
||||
// System.out.printf(" *** evaluating set=%s which depends on set=%s%n", set.ACcounts, mapping.getKey());
|
||||
|
||||
for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {
|
||||
final double[] gl = genotypeLikelihoods.get(j);
|
||||
final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
|
||||
ExactACset dependent = indexesToACset.get(mapping.getKey());
|
||||
|
||||
// initialize
|
||||
for ( int i = 0; i < numPaths; i++ )
|
||||
// TODO -- Arrays.fill?
|
||||
// todo -- is this even necessary? Why not have as else below?
|
||||
log10ConformationLikelihoods[i] = Double.NEGATIVE_INFINITY;
|
||||
for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {
|
||||
|
||||
// deal with the AA case first
|
||||
if ( totalK < 2*j-1 )
|
||||
log10ConformationLikelihoods[0] = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX];
|
||||
|
||||
// deal with the other possible conformations now
|
||||
if ( totalK <= 2*j ) { // skip impossible conformations
|
||||
int conformationIndex = 1;
|
||||
for ( Map.Entry<ExactACcounts, Integer> mapping : set.ACsetIndexToPLIndex.entrySet() ) {
|
||||
if ( DEBUG )
|
||||
System.out.printf(" *** evaluating set=%s which depends on set=%s%n", set.ACcounts, mapping.getKey());
|
||||
log10ConformationLikelihoods[conformationIndex++] =
|
||||
determineCoefficient(mapping.getValue(), j, set.ACcounts.getCounts(), totalK) + indexesToACset.get(mapping.getKey()).log10Likelihoods[j-1] + gl[mapping.getValue()];
|
||||
}
|
||||
if ( totalK <= 2*j ) { // skip impossible conformations
|
||||
final double[] gl = genotypeLikelihoods.get(j);
|
||||
tempLog10ConformationLikelihoods[j][conformationIndex] =
|
||||
determineCoefficient(mapping.getValue(), j, set.ACcounts.getCounts(), totalK) + dependent.log10Likelihoods[j-1] + gl[mapping.getValue()];
|
||||
} else {
|
||||
tempLog10ConformationLikelihoods[j][conformationIndex] = Double.NEGATIVE_INFINITY;
|
||||
}
|
||||
}
|
||||
|
||||
final double log10Max = approximateLog10SumLog10(log10ConformationLikelihoods);
|
||||
conformationIndex++;
|
||||
}
|
||||
|
||||
// finally, update the L(j,k) value
|
||||
// finally, deal with the AA case (which depends on previous cells in this column) and then update the L(j,k) value
|
||||
final int numPaths = set.ACsetIndexToPLIndex.size() + 1;
|
||||
for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {
|
||||
|
||||
if ( totalK < 2*j-1 ) {
|
||||
final double[] gl = genotypeLikelihoods.get(j);
|
||||
tempLog10ConformationLikelihoods[j][0] = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX];
|
||||
} else {
|
||||
tempLog10ConformationLikelihoods[j][0] = Double.NEGATIVE_INFINITY;
|
||||
}
|
||||
|
||||
final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
|
||||
final double log10Max = MathUtils.approximateLog10SumLog10(tempLog10ConformationLikelihoods[j], numPaths);
|
||||
set.log10Likelihoods[j] = log10Max - logDenominator;
|
||||
}
|
||||
}
|
||||
|
|
@ -415,10 +385,10 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
// update the likelihoods/posteriors vectors which are collapsed views of each of the various ACs
|
||||
for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) {
|
||||
int AC = set.ACcounts.getCounts()[i];
|
||||
result.log10AlleleFrequencyLikelihoods[i][AC] = approximateLog10SumLog10(result.log10AlleleFrequencyLikelihoods[i][AC], log10LofK);
|
||||
result.log10AlleleFrequencyLikelihoods[i][AC] = MathUtils.approximateLog10SumLog10(result.log10AlleleFrequencyLikelihoods[i][AC], log10LofK);
|
||||
|
||||
final double prior = log10AlleleFrequencyPriors[nonRefAlleles-1][AC];
|
||||
result.log10AlleleFrequencyPosteriors[i][AC] = approximateLog10SumLog10(result.log10AlleleFrequencyPosteriors[i][AC], log10LofK + prior);
|
||||
result.log10AlleleFrequencyPosteriors[i][AC] = MathUtils.approximateLog10SumLog10(result.log10AlleleFrequencyPosteriors[i][AC], log10LofK + prior);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -564,7 +534,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
lastK = k;
|
||||
maxLog10L = Math.max(maxLog10L, log10LofK);
|
||||
if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
|
||||
if ( DEBUG ) System.out.printf(" *** breaking early k=%d log10L=%.2f maxLog10L=%.2f%n", k, log10LofK, maxLog10L);
|
||||
//if ( DEBUG ) System.out.printf(" *** breaking early k=%d log10L=%.2f maxLog10L=%.2f%n", k, log10LofK, maxLog10L);
|
||||
done = true;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
@ -72,25 +73,28 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
|
|||
this.logger = logger;
|
||||
}
|
||||
|
||||
/**
|
||||
* Must be overridden by concrete subclasses
|
||||
*
|
||||
* @param tracker rod data
|
||||
* @param ref reference context
|
||||
* @param contexts stratified alignment contexts
|
||||
* @param contextType stratified context type
|
||||
* @param priors priors to use for GLs
|
||||
* @param alternateAlleleToUse the alternate allele to use, null if not set
|
||||
* @param useBAQedPileup should we use the BAQed pileup or the raw one?
|
||||
* @return variant context where genotypes are no-called but with GLs
|
||||
*/
|
||||
public abstract VariantContext getLikelihoods(RefMetaDataTracker tracker,
|
||||
ReferenceContext ref,
|
||||
Map<String, AlignmentContext> contexts,
|
||||
AlignmentContextUtils.ReadOrientation contextType,
|
||||
GenotypePriors priors,
|
||||
Allele alternateAlleleToUse,
|
||||
boolean useBAQedPileup);
|
||||
/**
|
||||
* Can be overridden by concrete subclasses
|
||||
*
|
||||
* @param tracker rod data
|
||||
* @param ref reference context
|
||||
* @param contexts stratified alignment contexts
|
||||
* @param contextType stratified context type
|
||||
* @param priors priors to use for GLs
|
||||
* @param alternateAlleleToUse the alternate allele to use, null if not set
|
||||
* @param useBAQedPileup should we use the BAQed pileup or the raw one?
|
||||
* @param locParser Genome Loc Parser
|
||||
* @return variant context where genotypes are no-called but with GLs
|
||||
*/
|
||||
public abstract VariantContext getLikelihoods(RefMetaDataTracker tracker,
|
||||
ReferenceContext ref,
|
||||
Map<String, AlignmentContext> contexts,
|
||||
AlignmentContextUtils.ReadOrientation contextType,
|
||||
GenotypePriors priors,
|
||||
Allele alternateAlleleToUse,
|
||||
boolean useBAQedPileup,
|
||||
GenomeLocParser locParser);
|
||||
|
||||
|
||||
protected int getFilteredDepth(ReadBackedPileup pileup) {
|
||||
int count = 0;
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Haplotype;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
|
|
@ -54,17 +55,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
private final boolean getAlleleListFromVCF;
|
||||
|
||||
private boolean DEBUG = false;
|
||||
|
||||
private final boolean doMultiAllelicCalls;
|
||||
private boolean ignoreSNPAllelesWhenGenotypingIndels = false;
|
||||
|
||||
private final int maxAlternateAlleles;
|
||||
private PairHMMIndelErrorModel pairModel;
|
||||
|
||||
private static ThreadLocal<HashMap<PileupElement,LinkedHashMap<Allele,Double>>> indelLikelihoodMap =
|
||||
new ThreadLocal<HashMap<PileupElement,LinkedHashMap<Allele,Double>>>() {
|
||||
protected synchronized HashMap<PileupElement,LinkedHashMap<Allele,Double>> initialValue() {
|
||||
return new HashMap<PileupElement,LinkedHashMap<Allele,Double>>();
|
||||
}
|
||||
};
|
||||
protected synchronized HashMap<PileupElement,LinkedHashMap<Allele,Double>> initialValue() {
|
||||
return new HashMap<PileupElement,LinkedHashMap<Allele,Double>>();
|
||||
}
|
||||
};
|
||||
|
||||
private LinkedHashMap<Allele,Haplotype> haplotypeMap;
|
||||
|
||||
|
|
@ -81,12 +82,14 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
protected IndelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
||||
super(UAC, logger);
|
||||
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY,UAC.INDEL_GAP_CONTINUATION_PENALTY,
|
||||
UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.BANDED_INDEL_COMPUTATION);
|
||||
UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION);
|
||||
alleleList = new ArrayList<Allele>();
|
||||
getAlleleListFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
|
||||
minIndelCountForGenotyping = UAC.MIN_INDEL_COUNT_FOR_GENOTYPING;
|
||||
HAPLOTYPE_SIZE = UAC.INDEL_HAPLOTYPE_SIZE;
|
||||
DEBUG = UAC.OUTPUT_DEBUG_INDEL_INFO;
|
||||
maxAlternateAlleles = UAC.MAX_ALTERNATE_ALLELES;
|
||||
doMultiAllelicCalls = UAC.MULTI_ALLELIC;
|
||||
|
||||
haplotypeMap = new LinkedHashMap<Allele,Haplotype>();
|
||||
ignoreSNPAllelesWhenGenotypingIndels = UAC.IGNORE_SNP_ALLELES;
|
||||
|
|
@ -95,7 +98,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
|
||||
private ArrayList<Allele> computeConsensusAlleles(ReferenceContext ref,
|
||||
Map<String, AlignmentContext> contexts,
|
||||
AlignmentContextUtils.ReadOrientation contextType) {
|
||||
AlignmentContextUtils.ReadOrientation contextType, GenomeLocParser locParser) {
|
||||
Allele refAllele=null, altAllele=null;
|
||||
GenomeLoc loc = ref.getLocus();
|
||||
ArrayList<Allele> aList = new ArrayList<Allele>();
|
||||
|
|
@ -114,7 +117,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
|
||||
if (insCount < minIndelCountForGenotyping && delCount < minIndelCountForGenotyping)
|
||||
return aList;
|
||||
|
||||
|
||||
for ( Map.Entry<String, AlignmentContext> sample : contexts.entrySet() ) {
|
||||
// todo -- warning, can be duplicating expensive partition here
|
||||
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
|
||||
|
|
@ -126,9 +129,9 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
|
||||
for ( ExtendedEventPileupElement p : indelPileup.toExtendedIterable() ) {
|
||||
//SAMRecord read = p.getRead();
|
||||
GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
|
||||
GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
|
||||
if (read == null)
|
||||
continue;
|
||||
continue;
|
||||
if(ReadUtils.is454Read(read)) {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -208,63 +211,69 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
}
|
||||
}
|
||||
|
||||
/* if (DEBUG) {
|
||||
int icount = indelPileup.getNumberOfInsertions();
|
||||
int dcount = indelPileup.getNumberOfDeletions();
|
||||
if (icount + dcount > 0)
|
||||
{
|
||||
List<Pair<String,Integer>> eventStrings = indelPileup.getEventStringsWithCounts(ref.getBases());
|
||||
System.out.format("#ins: %d, #del:%d\n", insCount, delCount);
|
||||
|
||||
for (int i=0 ; i < eventStrings.size() ; i++ ) {
|
||||
System.out.format("%s:%d,",eventStrings.get(i).first,eventStrings.get(i).second);
|
||||
// int k=0;
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
} */
|
||||
}
|
||||
|
||||
Collection<VariantContext> vcs = new ArrayList<VariantContext>();
|
||||
int maxAlleleCnt = 0;
|
||||
String bestAltAllele = "";
|
||||
|
||||
for (String s : consensusIndelStrings.keySet()) {
|
||||
int curCnt = consensusIndelStrings.get(s);
|
||||
if (curCnt > maxAlleleCnt) {
|
||||
maxAlleleCnt = curCnt;
|
||||
bestAltAllele = s;
|
||||
int curCnt = consensusIndelStrings.get(s), stop = 0;
|
||||
// if observed count if above minimum threshold, we will genotype this allele
|
||||
if (curCnt < minIndelCountForGenotyping)
|
||||
continue;
|
||||
|
||||
if (s.startsWith("D")) {
|
||||
// get deletion length
|
||||
int dLen = Integer.valueOf(s.substring(1));
|
||||
// get ref bases of accurate deletion
|
||||
int startIdxInReference = 1+loc.getStart()-ref.getWindow().getStart();
|
||||
stop = loc.getStart() + dLen;
|
||||
byte[] refBases = Arrays.copyOfRange(ref.getBases(),startIdxInReference,startIdxInReference+dLen);
|
||||
|
||||
if (Allele.acceptableAlleleBases(refBases)) {
|
||||
refAllele = Allele.create(refBases,true);
|
||||
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// insertion case
|
||||
if (Allele.acceptableAlleleBases(s)) {
|
||||
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
||||
altAllele = Allele.create(s, false);
|
||||
stop = loc.getStart();
|
||||
}
|
||||
}
|
||||
// if (DEBUG)
|
||||
// System.out.format("Key:%s, number: %d\n",s,consensusIndelStrings.get(s) );
|
||||
} //gdebug-
|
||||
|
||||
if (maxAlleleCnt < minIndelCountForGenotyping)
|
||||
return aList;
|
||||
|
||||
if (bestAltAllele.startsWith("D")) {
|
||||
// get deletion length
|
||||
int dLen = Integer.valueOf(bestAltAllele.substring(1));
|
||||
// get ref bases of accurate deletion
|
||||
int startIdxInReference = 1+loc.getStart()-ref.getWindow().getStart();
|
||||
ArrayList vcAlleles = new ArrayList<Allele>();
|
||||
vcAlleles.add(refAllele);
|
||||
vcAlleles.add(altAllele);
|
||||
|
||||
//System.out.println(new String(ref.getBases()));
|
||||
byte[] refBases = Arrays.copyOfRange(ref.getBases(),startIdxInReference,startIdxInReference+dLen);
|
||||
final VariantContextBuilder builder = new VariantContextBuilder().source("");
|
||||
builder.loc(loc.getContig(), loc.getStart(), stop);
|
||||
builder.alleles(vcAlleles);
|
||||
builder.referenceBaseForIndel(ref.getBase());
|
||||
builder.noGenotypes();
|
||||
if (doMultiAllelicCalls)
|
||||
vcs.add(builder.make());
|
||||
else {
|
||||
if (curCnt > maxAlleleCnt) {
|
||||
maxAlleleCnt = curCnt;
|
||||
vcs.clear();
|
||||
vcs.add(builder.make());
|
||||
}
|
||||
|
||||
if (Allele.acceptableAlleleBases(refBases)) {
|
||||
refAllele = Allele.create(refBases,true);
|
||||
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// insertion case
|
||||
if (Allele.acceptableAlleleBases(bestAltAllele)) {
|
||||
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
||||
altAllele = Allele.create(bestAltAllele, false);
|
||||
}
|
||||
}
|
||||
if (refAllele != null && altAllele != null) {
|
||||
aList.add(0,refAllele);
|
||||
aList.add(1,altAllele);
|
||||
}
|
||||
|
||||
if (vcs.isEmpty())
|
||||
return aList; // nothing else to do, no alleles passed minimum count criterion
|
||||
|
||||
VariantContext mergedVC = VariantContextUtils.simpleMerge(locParser, vcs, null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, null, false, false);
|
||||
|
||||
aList = new ArrayList<Allele>(mergedVC.getAlleles());
|
||||
|
||||
return aList;
|
||||
|
||||
}
|
||||
|
|
@ -277,7 +286,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
AlignmentContextUtils.ReadOrientation contextType,
|
||||
GenotypePriors priors,
|
||||
Allele alternateAlleleToUse,
|
||||
boolean useBAQedPileup) {
|
||||
boolean useBAQedPileup, GenomeLocParser locParser) {
|
||||
|
||||
if ( tracker == null )
|
||||
return null;
|
||||
|
|
@ -294,17 +303,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
haplotypeMap.clear();
|
||||
|
||||
if (getAlleleListFromVCF) {
|
||||
for( final VariantContext vc_input : tracker.getValues(UAC.alleles, loc) ) {
|
||||
if( vc_input != null &&
|
||||
allowableTypes.contains(vc_input.getType()) &&
|
||||
ref.getLocus().getStart() == vc_input.getStart()) {
|
||||
vc = vc_input;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// ignore places where we don't have a variant
|
||||
if ( vc == null )
|
||||
return null;
|
||||
for( final VariantContext vc_input : tracker.getValues(UAC.alleles, loc) ) {
|
||||
if( vc_input != null &&
|
||||
allowableTypes.contains(vc_input.getType()) &&
|
||||
ref.getLocus().getStart() == vc_input.getStart()) {
|
||||
vc = vc_input;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// ignore places where we don't have a variant
|
||||
if ( vc == null )
|
||||
return null;
|
||||
|
||||
alleleList.clear();
|
||||
if (ignoreSNPAllelesWhenGenotypingIndels) {
|
||||
|
|
@ -323,7 +332,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
|
||||
}
|
||||
else {
|
||||
alleleList = computeConsensusAlleles(ref,contexts, contextType);
|
||||
alleleList = computeConsensusAlleles(ref,contexts, contextType, locParser);
|
||||
if (alleleList.isEmpty())
|
||||
return null;
|
||||
}
|
||||
|
|
@ -340,7 +349,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
|
||||
if (alleleList.isEmpty())
|
||||
return null;
|
||||
|
||||
|
||||
refAllele = alleleList.get(0);
|
||||
altAllele = alleleList.get(1);
|
||||
|
||||
|
|
|
|||
|
|
@ -30,10 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
|
@ -66,7 +63,8 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
final AlignmentContextUtils.ReadOrientation contextType,
|
||||
final GenotypePriors priors,
|
||||
final Allele alternateAlleleToUse,
|
||||
final boolean useBAQedPileup) {
|
||||
final boolean useBAQedPileup,
|
||||
final GenomeLocParser locParser) {
|
||||
|
||||
if ( !(priors instanceof DiploidSNPGenotypePriors) )
|
||||
throw new StingException("Only diploid-based SNP priors are supported in the SNP GL model");
|
||||
|
|
|
|||
|
|
@ -204,6 +204,6 @@ public class UGBoundAF extends RodWalker<VariantContext,Integer> {
|
|||
return Math.log10(s_2 + (s_2 - s)/15.0);
|
||||
}
|
||||
|
||||
return ExactAFCalculationModel.approximateLog10SumLog10(simpAux(likelihoods,a,c,eps/2,s_l,fa,fc,fd,cap-1),simpAux(likelihoods, c, b, eps / 2, s_r, fc, fb, fe, cap - 1));
|
||||
return MathUtils.approximateLog10SumLog10(simpAux(likelihoods,a,c,eps/2,s_l,fa,fc,fd,cap-1),simpAux(likelihoods, c, b, eps / 2, s_r, fc, fb, fe, cap - 1));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -109,7 +109,7 @@ public class UnifiedArgumentCollection {
|
|||
* For advanced users only.
|
||||
*/
|
||||
@Advanced
|
||||
@Argument(fullName = "multiallelic", shortName = "multiallelic", doc = "Allow the discovery of multiple alleles (SNPs only)", required = false)
|
||||
@Argument(fullName = "multiallelic", shortName = "multiallelic", doc = "Allow the discovery of multiple alleles", required = false)
|
||||
public boolean MULTI_ALLELIC = false;
|
||||
|
||||
/**
|
||||
|
|
@ -146,8 +146,8 @@ public class UnifiedArgumentCollection {
|
|||
public int INDEL_HAPLOTYPE_SIZE = 80;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "bandedIndel", shortName = "bandedIndel", doc = "Banded Indel likelihood computation", required = false)
|
||||
public boolean BANDED_INDEL_COMPUTATION = false;
|
||||
@Argument(fullName = "noBandedIndel", shortName = "noBandedIndel", doc = "Don't do Banded Indel likelihood computation", required = false)
|
||||
public boolean DONT_DO_BANDED_INDEL_COMPUTATION = false;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "indelDebug", shortName = "indelDebug", doc = "Output indel debug info", required = false)
|
||||
|
|
@ -184,7 +184,7 @@ public class UnifiedArgumentCollection {
|
|||
|
||||
// todo- arguments to remove
|
||||
uac.IGNORE_SNP_ALLELES = IGNORE_SNP_ALLELES;
|
||||
uac.BANDED_INDEL_COMPUTATION = BANDED_INDEL_COMPUTATION;
|
||||
uac.DONT_DO_BANDED_INDEL_COMPUTATION = DONT_DO_BANDED_INDEL_COMPUTATION;
|
||||
uac.MULTI_ALLELIC = MULTI_ALLELIC;
|
||||
return uac;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -243,7 +243,7 @@ public class UnifiedGenotyperEngine {
|
|||
glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
|
||||
}
|
||||
|
||||
return glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine);
|
||||
return glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser);
|
||||
}
|
||||
|
||||
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ import java.util.*;
|
|||
public class VariantSummary extends VariantEvaluator implements StandardEval {
|
||||
final protected static Logger logger = Logger.getLogger(VariantSummary.class);
|
||||
|
||||
/** Indels with size greater than this value are tallied in the CNV column */
|
||||
private final static int MAX_INDEL_LENGTH = 50;
|
||||
private final static double MIN_CNV_OVERLAP = 0.5;
|
||||
private VariantEvalWalker walker;
|
||||
|
|
@ -196,14 +197,16 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
|||
}
|
||||
|
||||
private final boolean overlapsKnownCNV(VariantContext cnv) {
|
||||
final GenomeLoc loc = walker.getGenomeLocParser().createGenomeLoc(cnv, true);
|
||||
IntervalTree<GenomeLoc> intervalTree = knownCNVs.get(loc.getContig());
|
||||
if ( knownCNVs != null ) {
|
||||
final GenomeLoc loc = walker.getGenomeLocParser().createGenomeLoc(cnv, true);
|
||||
IntervalTree<GenomeLoc> intervalTree = knownCNVs.get(loc.getContig());
|
||||
|
||||
final Iterator<IntervalTree.Node<GenomeLoc>> nodeIt = intervalTree.overlappers(loc.getStart(), loc.getStop());
|
||||
while ( nodeIt.hasNext() ) {
|
||||
final double overlapP = loc.reciprocialOverlapFraction(nodeIt.next().getValue());
|
||||
if ( overlapP > MIN_CNV_OVERLAP )
|
||||
return true;
|
||||
final Iterator<IntervalTree.Node<GenomeLoc>> nodeIt = intervalTree.overlappers(loc.getStart(), loc.getStop());
|
||||
while ( nodeIt.hasNext() ) {
|
||||
final double overlapP = loc.reciprocialOverlapFraction(nodeIt.next().getValue());
|
||||
if ( overlapP > MIN_CNV_OVERLAP )
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
|
|
@ -224,7 +227,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
|||
allVariantCounts.inc(type, ALL);
|
||||
|
||||
// type specific calculations
|
||||
if ( type == Type.SNP ) {
|
||||
if ( type == Type.SNP && eval.isBiallelic() ) {
|
||||
titvTable = VariantContextUtils.isTransition(eval) ? transitionsPerSample : transversionsPerSample;
|
||||
titvTable.inc(type, ALL);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -70,17 +70,18 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
|
|||
" * Short name of %1$s%n" +
|
||||
" * @return Short name of %1$s%n" +
|
||||
" */%n" +
|
||||
"def %3$s = this.%1$s%n" +
|
||||
"%5$sdef %3$s = this.%1$s%n" +
|
||||
"%n" +
|
||||
"/**%n" +
|
||||
" * Short name of %1$s%n" +
|
||||
" * @param value Short name of %1$s%n" +
|
||||
" */%n" +
|
||||
"def %4$s(value: %2$s) { this.%1$s = value }%n",
|
||||
"%5$sdef %4$s(value: %2$s) { this.%1$s = value }%n",
|
||||
getFieldName(),
|
||||
getFieldType(),
|
||||
getShortFieldGetter(),
|
||||
getShortFieldSetter());
|
||||
getShortFieldSetter(),
|
||||
getPrivacy());
|
||||
}
|
||||
|
||||
protected static final String REQUIRED_TEMPLATE = " + required(\"%1$s\", %3$s, spaceSeparated=true, escape=true, format=%2$s)";
|
||||
|
|
@ -135,11 +136,8 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
|
|||
new IntervalFileArgumentField(argumentDefinition),
|
||||
new IntervalStringArgumentField(argumentDefinition));
|
||||
|
||||
// ROD Bindings are set by the RodBindField
|
||||
} else if (RodBindArgumentField.ROD_BIND_FIELD.equals(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) {
|
||||
// TODO: Once everyone is using @Allows and @Requires correctly, we can stop blindly allowing Triplets
|
||||
return Arrays.asList(new RodBindArgumentField(argumentDefinition), new InputIndexesArgumentField(argumentDefinition, Tribble.STANDARD_INDEX_EXTENSION));
|
||||
//return Collections.<ArgumentField>emptyList();
|
||||
} else if (NumThreadsArgumentField.NUM_THREADS_FIELD.equals(argumentDefinition.fullName)) {
|
||||
return Arrays.asList(new NumThreadsArgumentField(argumentDefinition));
|
||||
|
||||
} else if ("input_file".equals(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) {
|
||||
return Arrays.asList(new InputTaggedFileDefinitionField(argumentDefinition), new InputIndexesArgumentField(argumentDefinition, BAMIndex.BAMIndexSuffix, ".bam"));
|
||||
|
|
@ -166,10 +164,13 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
|
|||
|
||||
fields.add(new OutputArgumentField(argumentDefinition, gatherClass));
|
||||
|
||||
if (SAMFileWriter.class.isAssignableFrom(argumentDefinition.argumentType))
|
||||
if (SAMFileWriter.class.isAssignableFrom(argumentDefinition.argumentType)) {
|
||||
fields.add(new SAMFileWriterIndexArgumentField(argumentDefinition));
|
||||
else if (VCFWriter.class.isAssignableFrom(argumentDefinition.argumentType))
|
||||
fields.add(new SAMFileWriterMD5ArgumentField(argumentDefinition));
|
||||
}
|
||||
else if (VCFWriter.class.isAssignableFrom(argumentDefinition.argumentType)) {
|
||||
fields.add(new VCFWriterIndexArgumentField(argumentDefinition));
|
||||
}
|
||||
|
||||
return fields;
|
||||
|
||||
|
|
@ -228,7 +229,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
|
|||
@Override protected String getRawFieldName() { return super.getRawFieldName() + "String"; }
|
||||
@Override protected String getFullName() { return super.getFullName() + "String"; }
|
||||
@Override protected String getRawShortFieldName() { return super.getRawShortFieldName() + "String"; }
|
||||
@Override protected String getFieldType() { return "List[String]"; }
|
||||
@Override protected String getFieldType() { return "Seq[String]"; }
|
||||
@Override protected String getDefaultValue() { return "Nil"; }
|
||||
@Override public String getCommandLineTemplate() { return REPEAT_TEMPLATE; }
|
||||
|
||||
|
|
@ -250,7 +251,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
|
|||
}
|
||||
|
||||
@Override protected Class<?> getInnerType() { return File.class; }
|
||||
@Override protected String getFieldType() { return isMultiValued() ? "List[File]" : "File"; }
|
||||
@Override protected String getFieldType() { return isMultiValued() ? "Seq[File]" : "File"; }
|
||||
@Override protected String getDefaultValue() { return isMultiValued() ? "Nil" : "_"; }
|
||||
}
|
||||
|
||||
|
|
@ -294,7 +295,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
|
|||
}
|
||||
|
||||
@Override protected Class<?> getInnerType() { return mapType(argumentDefinition.componentType); }
|
||||
@Override protected String getFieldType() { return String.format("List[%s]", getType(getInnerType())); }
|
||||
@Override protected String getFieldType() { return String.format("Seq[%s]", getType(getInnerType())); }
|
||||
@Override protected String getDefaultValue() { return "Nil"; }
|
||||
@Override protected String getCommandLineTemplate() { return REPEAT_TEMPLATE; }
|
||||
}
|
||||
|
|
@ -336,17 +337,16 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
|
|||
}
|
||||
|
||||
// Allows the user to specify the track name, track type, and the file.
|
||||
public static class RodBindArgumentField extends ArgumentDefinitionField {
|
||||
public static final String ROD_BIND_FIELD = "rodBind";
|
||||
public static class NumThreadsArgumentField extends OptionedArgumentField {
|
||||
public static final String NUM_THREADS_FIELD = "num_threads";
|
||||
|
||||
public RodBindArgumentField(ArgumentDefinition argumentDefinition) {
|
||||
super(argumentDefinition);
|
||||
public NumThreadsArgumentField(ArgumentDefinition argumentDefinition) {
|
||||
super(argumentDefinition, false);
|
||||
}
|
||||
@Override protected Class<?> getInnerType() { return null; } // RodBind does not need to be imported.
|
||||
@Override protected String getFieldType() { return "List[RodBind]"; }
|
||||
@Override protected String getDefaultValue() { return "Nil"; }
|
||||
@Override protected String getCommandLineTemplate() {
|
||||
return " + repeat(\"%1$s\", %3$s, formatPrefix=RodBind.formatCommandLineParameter, spaceSeparated=true, escape=true, format=%2$s)";
|
||||
|
||||
@Override
|
||||
protected String getFreezeFields() {
|
||||
return String.format("if (num_threads.isDefined) nCoresRequest = num_threads%n");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -356,7 +356,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
|
|||
super(argumentDefinition);
|
||||
}
|
||||
@Override protected Class<?> getInnerType() { return null; } // TaggedFile does not need to be imported.
|
||||
@Override protected String getFieldType() { return argumentDefinition.isMultiValued ? "List[File]" : "File"; }
|
||||
@Override protected String getFieldType() { return argumentDefinition.isMultiValued ? "Seq[File]" : "File"; }
|
||||
@Override protected String getDefaultValue() { return argumentDefinition.isMultiValued ? "Nil" : "_"; }
|
||||
@Override protected String getCommandLineTemplate() {
|
||||
if (argumentDefinition.isMultiValued) {
|
||||
|
|
@ -395,10 +395,11 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
|
|||
}
|
||||
@Override protected String getFullName() { return this.indexFieldName; }
|
||||
@Override protected boolean isRequired() { return false; }
|
||||
@Override protected String getFieldType() { return "List[File]"; }
|
||||
@Override protected String getFieldType() { return "Seq[File]"; }
|
||||
@Override protected String getDefaultValue() { return "Nil"; }
|
||||
@Override protected Class<?> getInnerType() { return File.class; }
|
||||
@Override protected String getRawFieldName() { return this.indexFieldName; }
|
||||
@Override protected String getPrivacy() { return "private "; }
|
||||
@Override protected String getFreezeFields() {
|
||||
if (originalIsMultiValued) {
|
||||
if (originalSuffix == null) {
|
||||
|
|
@ -434,53 +435,69 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
|
|||
}
|
||||
}
|
||||
|
||||
// Tracks an automatically generated index
|
||||
private static abstract class OutputIndexArgumentField extends ArgumentField {
|
||||
protected final String indexFieldName;
|
||||
// Tracks an automatically generated index, md5, etc.
|
||||
private static abstract class AuxilliaryOutputArgumentField extends ArgumentField {
|
||||
protected final String originalFieldName;
|
||||
public OutputIndexArgumentField(ArgumentDefinition originalArgumentDefinition) {
|
||||
this.indexFieldName = originalArgumentDefinition.fullName + "Index";
|
||||
protected final String auxFieldName;
|
||||
protected final String auxFieldLabel;
|
||||
public AuxilliaryOutputArgumentField(ArgumentDefinition originalArgumentDefinition, String auxFieldLabel) {
|
||||
this.originalFieldName = originalArgumentDefinition.fullName;
|
||||
this.auxFieldName = originalArgumentDefinition.fullName + auxFieldLabel;
|
||||
this.auxFieldLabel = auxFieldLabel;
|
||||
}
|
||||
@Override protected Class<? extends Annotation> getAnnotationIOClass() { return Output.class; }
|
||||
@Override public String getCommandLineAddition() { return ""; }
|
||||
@Override protected String getDoc() { return "Automatically generated index for " + this.originalFieldName; }
|
||||
@Override protected String getFullName() { return this.indexFieldName; }
|
||||
@Override protected String getDoc() { return String.format("Automatically generated %s for %s", auxFieldLabel.toLowerCase(), this.originalFieldName); }
|
||||
@Override protected String getFullName() { return this.auxFieldName; }
|
||||
@Override protected boolean isRequired() { return false; }
|
||||
@Override protected String getFieldType() { return "File"; }
|
||||
@Override protected String getDefaultValue() { return "_"; }
|
||||
@Override protected Class<?> getInnerType() { return File.class; }
|
||||
@Override protected String getRawFieldName() { return this.indexFieldName; }
|
||||
@Override protected String getRawFieldName() { return this.auxFieldName; }
|
||||
@Override protected String getPrivacy() { return "private "; }
|
||||
|
||||
@Override public boolean isGather() { return true; }
|
||||
@Override protected String getGatherAnnotation() {
|
||||
return String.format("@Gather(classOf[AutoIndexGatherFunction])%n");
|
||||
return String.format("@Gather(enabled=false)%n");
|
||||
}
|
||||
}
|
||||
|
||||
private static class VCFWriterIndexArgumentField extends OutputIndexArgumentField {
|
||||
private static class VCFWriterIndexArgumentField extends AuxilliaryOutputArgumentField {
|
||||
public VCFWriterIndexArgumentField(ArgumentDefinition originalArgumentDefinition) {
|
||||
super(originalArgumentDefinition);
|
||||
super(originalArgumentDefinition, "Index");
|
||||
}
|
||||
@Override protected String getFreezeFields() {
|
||||
return String.format(
|
||||
("if (%2$s != null)%n" +
|
||||
" if (!org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor.isCompressed(%2$s.getPath))%n" +
|
||||
" %1$s = new File(%2$s.getPath + \"%3$s\")%n"),
|
||||
indexFieldName, originalFieldName, Tribble.STANDARD_INDEX_EXTENSION);
|
||||
auxFieldName, originalFieldName, Tribble.STANDARD_INDEX_EXTENSION);
|
||||
}
|
||||
}
|
||||
|
||||
private static class SAMFileWriterIndexArgumentField extends OutputIndexArgumentField {
|
||||
private static class SAMFileWriterIndexArgumentField extends AuxilliaryOutputArgumentField {
|
||||
public SAMFileWriterIndexArgumentField(ArgumentDefinition originalArgumentDefinition) {
|
||||
super(originalArgumentDefinition);
|
||||
super(originalArgumentDefinition, "Index");
|
||||
}
|
||||
@Override protected String getFreezeFields() {
|
||||
return String.format(
|
||||
("if (%2$s != null)%n" +
|
||||
" if (!%3$s)%n" +
|
||||
" %1$s = new File(%2$s.getPath.stripSuffix(\".bam\") + \"%4$s\")%n"),
|
||||
indexFieldName, originalFieldName, SAMFileWriterArgumentTypeDescriptor.DISABLE_INDEXING_FULLNAME, BAMIndex.BAMIndexSuffix);
|
||||
auxFieldName, originalFieldName, SAMFileWriterArgumentTypeDescriptor.DISABLE_INDEXING_FULLNAME, BAMIndex.BAMIndexSuffix);
|
||||
}
|
||||
}
|
||||
|
||||
private static class SAMFileWriterMD5ArgumentField extends AuxilliaryOutputArgumentField {
|
||||
public SAMFileWriterMD5ArgumentField(ArgumentDefinition originalArgumentDefinition) {
|
||||
super(originalArgumentDefinition, "MD5");
|
||||
}
|
||||
@Override protected String getFreezeFields() {
|
||||
return String.format(
|
||||
("if (%2$s != null)%n" +
|
||||
" if (%3$s)%n" +
|
||||
" %1$s = new File(%2$s.getPath + \"%4$s\")%n"),
|
||||
auxFieldName, originalFieldName, SAMFileWriterArgumentTypeDescriptor.ENABLE_MD5_FULLNAME, ".md5");
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -56,7 +56,7 @@ public abstract class ArgumentField {
|
|||
return String.format("%n" +
|
||||
"/** %s */%n" +
|
||||
"@%s(fullName=\"%s\", shortName=\"%s\", doc=\"%s\", required=%s, exclusiveOf=\"%s\", validation=\"%s\")%n" +
|
||||
"%svar %s: %s = %s%n" +
|
||||
"%s%svar %s: %s = %s%n" +
|
||||
"%s",
|
||||
getDoc(),
|
||||
getAnnotationIOClass().getSimpleName(),
|
||||
|
|
@ -66,7 +66,7 @@ public abstract class ArgumentField {
|
|||
isRequired(),
|
||||
getExclusiveOf(),
|
||||
getValidation(),
|
||||
getGatherAnnotation(), getFieldName(), getFieldType(), getDefaultValue(),
|
||||
getGatherAnnotation(), getPrivacy(), getFieldName(), getFieldType(), getDefaultValue(),
|
||||
getDefineAddition());
|
||||
}
|
||||
|
||||
|
|
@ -143,6 +143,9 @@ public abstract class ArgumentField {
|
|||
/** @return True if this field uses @Gather. */
|
||||
public boolean isGather() { return false; }
|
||||
|
||||
/** @return Privacy for the field. */
|
||||
protected String getPrivacy() { return ""; }
|
||||
|
||||
/** @return The raw field name, which will be checked against scala build in types. */
|
||||
protected abstract String getRawFieldName();
|
||||
/** @return The field name checked against reserved words. */
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -34,13 +34,11 @@ import org.broadinstitute.sting.commandline.ParsingEngine;
|
|||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.filters.FilterManager;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.walkers.PartitionBy;
|
||||
import org.broadinstitute.sting.gatk.walkers.PartitionType;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
|
|
@ -85,7 +83,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
|
|||
"%n" +
|
||||
"/** A dynamicly generated list of classes that the GATK Extensions depend on, but are not be detected by default by BCEL. */%n" +
|
||||
"class %s {%n" +
|
||||
"val types = List(%n%s)%n" +
|
||||
"val types = Seq(%n%s)%n" +
|
||||
"}%n";
|
||||
|
||||
@Output(fullName="output_directory", shortName="outDir", doc="Directory to output the generated scala", required=true)
|
||||
|
|
@ -95,10 +93,6 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
|
|||
GenomeAnalysisEngine GATKEngine = new GenomeAnalysisEngine();
|
||||
WalkerManager walkerManager = new WalkerManager();
|
||||
FilterManager filterManager = new FilterManager();
|
||||
// HACK: We're currently relying on the fact that RMDTrackBuilder is used only from RMD type lookups, not
|
||||
// RMD track location. Therefore, no sequence dictionary is required. In the future, we should separate
|
||||
// RMD track lookups from track creation.
|
||||
RMDTrackBuilder trackBuilder = new RMDTrackBuilder(null,null,ValidationExclusion.TYPE.ALL);
|
||||
|
||||
/**
|
||||
* Required main method implementation.
|
||||
|
|
@ -147,7 +141,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
|
|||
String clpConstructor = String.format("analysisName = \"%s\"%njavaMainClass = \"%s\"%n", clpClassName, clp.getName());
|
||||
|
||||
writeClass("org.broadinstitute.sting.queue.function.JavaCommandLineFunction", clpClassName,
|
||||
false, clpConstructor, ArgumentDefinitionField.getArgumentFields(parser,clp), dependents, false);
|
||||
false, clpConstructor, ArgumentDefinitionField.getArgumentFields(parser,clp), dependents);
|
||||
|
||||
if (clp == CommandLineGATK.class) {
|
||||
for (Entry<String, Collection<Class<? extends Walker>>> walkersByPackage: walkerManager.getWalkerNamesByPackage(false).entrySet()) {
|
||||
|
|
@ -169,7 +163,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
|
|||
}
|
||||
|
||||
writeClass(GATK_EXTENSIONS_PACKAGE_NAME + "." + clpClassName, walkerName,
|
||||
isScatter, constructor, argumentFields, dependents, true);
|
||||
isScatter, constructor, argumentFields, dependents);
|
||||
} catch (Exception e) {
|
||||
throw new ReviewedStingException("Error generating wrappers for walker " + walkerType, e);
|
||||
}
|
||||
|
|
@ -242,8 +236,8 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
|
|||
*/
|
||||
private void writeClass(String baseClass, String className, boolean isScatter,
|
||||
String constructor, List<? extends ArgumentField> argumentFields,
|
||||
Set<Class<?>> dependents, boolean isGATKWalker) throws IOException {
|
||||
String content = getContent(CLASS_TEMPLATE, baseClass, className, constructor, isScatter, "", argumentFields, dependents, isGATKWalker);
|
||||
Set<Class<?>> dependents) throws IOException {
|
||||
String content = getContent(CLASS_TEMPLATE, baseClass, className, constructor, isScatter, "", argumentFields, dependents);
|
||||
writeFile(GATK_EXTENSIONS_PACKAGE_NAME + "." + className, content);
|
||||
}
|
||||
|
||||
|
|
@ -257,7 +251,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
|
|||
*/
|
||||
private void writeFilter(String className, List<? extends ArgumentField> argumentFields, Set<Class<?>> dependents) throws IOException {
|
||||
String content = getContent(TRAIT_TEMPLATE, "org.broadinstitute.sting.queue.function.CommandLineFunction",
|
||||
className, "", false, String.format(" + \" -read_filter %s\"", className), argumentFields, dependents, false);
|
||||
className, "", false, String.format(" + \" -read_filter %s\"", className), argumentFields, dependents);
|
||||
writeFile(GATK_EXTENSIONS_PACKAGE_NAME + "." + className, content);
|
||||
}
|
||||
|
||||
|
|
@ -351,8 +345,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
|
|||
*/
|
||||
private static String getContent(String scalaTemplate, String baseClass, String className,
|
||||
String constructor, boolean isScatter, String commandLinePrefix,
|
||||
List<? extends ArgumentField> argumentFields, Set<Class<?>> dependents,
|
||||
boolean isGATKWalker) {
|
||||
List<? extends ArgumentField> argumentFields, Set<Class<?>> dependents) {
|
||||
StringBuilder arguments = new StringBuilder();
|
||||
StringBuilder commandLine = new StringBuilder(commandLinePrefix);
|
||||
|
||||
|
|
@ -376,9 +369,6 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
|
|||
if (isGather)
|
||||
importSet.add("import org.broadinstitute.sting.commandline.Gather");
|
||||
|
||||
// Needed for ShellUtils.escapeShellArgument()
|
||||
importSet.add("import org.broadinstitute.sting.queue.util.ShellUtils");
|
||||
|
||||
// Sort the imports so that the are always in the same order.
|
||||
List<String> sortedImports = new ArrayList<String>(importSet);
|
||||
Collections.sort(sortedImports);
|
||||
|
|
@ -386,10 +376,8 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
|
|||
StringBuffer freezeFieldOverride = new StringBuffer();
|
||||
for (String freezeField: freezeFields)
|
||||
freezeFieldOverride.append(freezeField);
|
||||
if (freezeFieldOverride.length() > 0 || isGATKWalker) {
|
||||
freezeFieldOverride.insert(0, String.format("override def freezeFieldValues = {%nsuper.freezeFieldValues%n"));
|
||||
if ( isGATKWalker )
|
||||
freezeFieldOverride.append(String.format("if ( num_threads.isDefined ) nCoresRequest = num_threads%n"));
|
||||
if (freezeFieldOverride.length() > 0) {
|
||||
freezeFieldOverride.insert(0, String.format("override def freezeFieldValues() {%nsuper.freezeFieldValues()%n"));
|
||||
freezeFieldOverride.append(String.format("}%n%n"));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -145,7 +145,7 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenome
|
|||
}
|
||||
|
||||
return new GenomeLoc(getContig(), this.contigIndex,
|
||||
Math.min(getStart(), that.getStart()),
|
||||
Math.min( getStart(), that.getStart() ),
|
||||
Math.max( getStop(), that.getStop()) );
|
||||
}
|
||||
|
||||
|
|
@ -465,4 +465,8 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenome
|
|||
private final static double overlapPercent(final GenomeLoc gl1, final GenomeLoc gl2) {
|
||||
return (1.0 * gl1.intersect(gl2).size()) / gl1.size();
|
||||
}
|
||||
|
||||
public long sizeOfOverlap( final GenomeLoc that ) {
|
||||
return ( this.overlapsP(that) ? Math.min( getStop(), that.getStop() ) - Math.max( getStart(), that.getStart() ) : 0L );
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,17 +56,62 @@ public class MathUtils {
|
|||
private MathUtils() {
|
||||
}
|
||||
|
||||
@Requires({"d > 0.0"})
|
||||
public static int fastPositiveRound(double d) {
|
||||
return (int) (d + 0.5);
|
||||
// A fast implementation of the Math.round() method. This method does not perform
|
||||
// under/overflow checking, so this shouldn't be used in the general case (but is fine
|
||||
// if one is already make those checks before calling in to the rounding).
|
||||
public static int fastRound(double d) {
|
||||
return (d > 0) ? (int)(d + 0.5d) : (int)(d - 0.5d);
|
||||
}
|
||||
|
||||
public static int fastRound(double d) {
|
||||
if (d > 0.0) {
|
||||
return fastPositiveRound(d);
|
||||
} else {
|
||||
return -1 * fastPositiveRound(-1 * d);
|
||||
public static double approximateLog10SumLog10(final double[] vals) {
|
||||
return approximateLog10SumLog10(vals, vals.length);
|
||||
}
|
||||
|
||||
public static double approximateLog10SumLog10(final double[] vals, final int endIndex) {
|
||||
|
||||
final int maxElementIndex = MathUtils.maxElementIndex(vals, endIndex);
|
||||
double approxSum = vals[maxElementIndex];
|
||||
if ( approxSum == Double.NEGATIVE_INFINITY )
|
||||
return approxSum;
|
||||
|
||||
for ( int i = 0; i < endIndex; i++ ) {
|
||||
if ( i == maxElementIndex || vals[i] == Double.NEGATIVE_INFINITY )
|
||||
continue;
|
||||
|
||||
final double diff = approxSum - vals[i];
|
||||
if ( diff < MathUtils.MAX_JACOBIAN_TOLERANCE ) {
|
||||
// See notes from the 2-inout implementation below
|
||||
final int ind = fastRound(diff / MathUtils.JACOBIAN_LOG_TABLE_STEP); // hard rounding
|
||||
approxSum += MathUtils.jacobianLogTable[ind];
|
||||
}
|
||||
}
|
||||
|
||||
return approxSum;
|
||||
}
|
||||
|
||||
public static double approximateLog10SumLog10(double small, double big) {
|
||||
// make sure small is really the smaller value
|
||||
if ( small > big ) {
|
||||
final double t = big;
|
||||
big = small;
|
||||
small = t;
|
||||
}
|
||||
|
||||
if ( small == Double.NEGATIVE_INFINITY || big == Double.NEGATIVE_INFINITY )
|
||||
return big;
|
||||
|
||||
final double diff = big - small;
|
||||
if ( diff >= MathUtils.MAX_JACOBIAN_TOLERANCE )
|
||||
return big;
|
||||
|
||||
// OK, so |y-x| < tol: we use the following identity then:
|
||||
// we need to compute log10(10^x + 10^y)
|
||||
// By Jacobian logarithm identity, this is equal to
|
||||
// max(x,y) + log10(1+10^-abs(x-y))
|
||||
// we compute the second term as a table lookup with integer quantization
|
||||
// we have pre-stored correction for 0,0.1,0.2,... 10.0
|
||||
final int ind = fastRound(diff / MathUtils.JACOBIAN_LOG_TABLE_STEP); // hard rounding
|
||||
return big + MathUtils.jacobianLogTable[ind];
|
||||
}
|
||||
|
||||
public static double sum(Collection<Number> numbers) {
|
||||
|
|
@ -541,11 +586,15 @@ public class MathUtils {
|
|||
return normalizeFromLog10(array, false);
|
||||
}
|
||||
|
||||
public static int maxElementIndex(double[] array) {
|
||||
public static int maxElementIndex(final double[] array) {
|
||||
return maxElementIndex(array, array.length);
|
||||
}
|
||||
|
||||
public static int maxElementIndex(final double[] array, final int endIndex) {
|
||||
if (array == null) throw new IllegalArgumentException("Array cannot be null!");
|
||||
|
||||
int maxI = -1;
|
||||
for (int i = 0; i < array.length; i++) {
|
||||
for (int i = 0; i < endIndex; i++) {
|
||||
if (maxI == -1 || array[i] > array[maxI])
|
||||
maxI = i;
|
||||
}
|
||||
|
|
@ -553,11 +602,15 @@ public class MathUtils {
|
|||
return maxI;
|
||||
}
|
||||
|
||||
public static int maxElementIndex(int[] array) {
|
||||
public static int maxElementIndex(final int[] array) {
|
||||
return maxElementIndex(array, array.length);
|
||||
}
|
||||
|
||||
public static int maxElementIndex(final int[] array, int endIndex) {
|
||||
if (array == null) throw new IllegalArgumentException("Array cannot be null!");
|
||||
|
||||
int maxI = -1;
|
||||
for (int i = 0; i < array.length; i++) {
|
||||
for (int i = 0; i < endIndex; i++) {
|
||||
if (maxI == -1 || array[i] > array[maxI])
|
||||
maxI = i;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,19 @@
|
|||
package org.broadinstitute.sting.utils.activeregion;
|
||||
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: 1/4/12
|
||||
*/
|
||||
|
||||
public class ActiveRead {
|
||||
final public GATKSAMRecord read;
|
||||
final public boolean isPrimaryRegion;
|
||||
|
||||
ActiveRead( final GATKSAMRecord read, final boolean isPrimaryRegion ) {
|
||||
this.read = read;
|
||||
this.isPrimaryRegion = isPrimaryRegion;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
package org.broadinstitute.sting.utils.activeregion;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.HasGenomeLocation;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: 1/4/12
|
||||
*/
|
||||
|
||||
public class ActiveRegion implements HasGenomeLocation {
|
||||
|
||||
private final ArrayList<ActiveRead> reads = new ArrayList<ActiveRead>();
|
||||
private byte[] reference = null;
|
||||
private final GenomeLoc loc;
|
||||
private GenomeLoc referenceLoc = null;
|
||||
private final GenomeLocParser genomeLocParser;
|
||||
public final boolean isActive;
|
||||
|
||||
public ActiveRegion( final GenomeLoc loc, final boolean isActive, final GenomeLocParser genomeLocParser ) {
|
||||
this.loc = loc;
|
||||
this.isActive = isActive;
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
referenceLoc = loc;
|
||||
}
|
||||
|
||||
// add each read to the bin and extend the reference genome loc if needed
|
||||
public void add( final GATKSAMRecord read, final boolean isPrimaryRegion ) {
|
||||
referenceLoc = referenceLoc.union( genomeLocParser.createGenomeLoc( read ) );
|
||||
reads.add( new ActiveRead(read, isPrimaryRegion) );
|
||||
}
|
||||
|
||||
public ArrayList<ActiveRead> getReads() { return reads; }
|
||||
|
||||
public byte[] getReference( final IndexedFastaSequenceFile referenceReader ) {
|
||||
// set up the reference if we haven't done so yet
|
||||
if ( reference == null ) {
|
||||
reference = referenceReader.getSubsequenceAt(referenceLoc.getContig(), referenceLoc.getStart(), referenceLoc.getStop()).getBases();
|
||||
}
|
||||
|
||||
return reference;
|
||||
}
|
||||
|
||||
public GenomeLoc getLocation() { return loc; }
|
||||
|
||||
public GenomeLoc getReferenceLocation() { return referenceLoc; }
|
||||
|
||||
public int size() { return reads.size(); }
|
||||
}
|
||||
|
|
@ -27,10 +27,8 @@ package org.broadinstitute.sting.utils.codecs.vcf;
|
|||
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.PriorityQueue;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.PriorityBlockingQueue;
|
||||
|
||||
/**
|
||||
* This class writes VCF files, allowing records to be passed in unsorted.
|
||||
|
|
@ -39,20 +37,26 @@ import java.util.TreeSet;
|
|||
public abstract class SortingVCFWriterBase implements VCFWriter {
|
||||
|
||||
// The VCFWriter to which to actually write the sorted VCF records
|
||||
private VCFWriter innerWriter = null;
|
||||
private final VCFWriter innerWriter;
|
||||
|
||||
// the current queue of un-emitted records
|
||||
private PriorityQueue<VCFRecord> queue = null;
|
||||
private final Queue<VCFRecord> queue;
|
||||
|
||||
// The locus until which we are permitted to write out (inclusive)
|
||||
protected Integer mostUpstreamWritableLoc;
|
||||
protected static final int BEFORE_MOST_UPSTREAM_LOC = 0; // No real locus index is <= 0
|
||||
|
||||
// The set of chromosomes already passed over and to which it is forbidden to return
|
||||
private Set<String> finishedChromosomes = null;
|
||||
private final Set<String> finishedChromosomes;
|
||||
|
||||
// Should we call innerWriter.close() in close()
|
||||
private boolean takeOwnershipOfInner;
|
||||
private final boolean takeOwnershipOfInner;
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Constructors
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* create a local-sorting VCF writer, given an inner VCF writer to write to
|
||||
|
|
@ -62,16 +66,27 @@ public abstract class SortingVCFWriterBase implements VCFWriter {
|
|||
*/
|
||||
public SortingVCFWriterBase(VCFWriter innerWriter, boolean takeOwnershipOfInner) {
|
||||
this.innerWriter = innerWriter;
|
||||
this.queue = new PriorityQueue<VCFRecord>(50, new VariantContextComparator());
|
||||
this.mostUpstreamWritableLoc = BEFORE_MOST_UPSTREAM_LOC;
|
||||
this.finishedChromosomes = new TreeSet<String>();
|
||||
this.takeOwnershipOfInner = takeOwnershipOfInner;
|
||||
|
||||
// has to be PriorityBlockingQueue to be thread-safe
|
||||
// see http://getsatisfaction.com/gsa/topics/missing_loci_output_in_multi_thread_mode_when_implement_sortingvcfwriterbase?utm_content=topic_link&utm_medium=email&utm_source=new_topic
|
||||
this.queue = new PriorityBlockingQueue<VCFRecord>(50, new VariantContextComparator());
|
||||
|
||||
this.mostUpstreamWritableLoc = BEFORE_MOST_UPSTREAM_LOC;
|
||||
}
|
||||
|
||||
public SortingVCFWriterBase(VCFWriter innerWriter) {
|
||||
this(innerWriter, false); // by default, don't own inner
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// public interface functions
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
@Override
|
||||
public void writeHeader(VCFHeader header) {
|
||||
innerWriter.writeHeader(header);
|
||||
}
|
||||
|
|
@ -79,6 +94,7 @@ public abstract class SortingVCFWriterBase implements VCFWriter {
|
|||
/**
|
||||
* attempt to close the VCF file; we need to flush the queue first
|
||||
*/
|
||||
@Override
|
||||
public void close() {
|
||||
stopWaitingToSort();
|
||||
|
||||
|
|
@ -86,27 +102,14 @@ public abstract class SortingVCFWriterBase implements VCFWriter {
|
|||
innerWriter.close();
|
||||
}
|
||||
|
||||
private void stopWaitingToSort() {
|
||||
emitRecords(true);
|
||||
mostUpstreamWritableLoc = BEFORE_MOST_UPSTREAM_LOC;
|
||||
}
|
||||
|
||||
protected void emitSafeRecords() {
|
||||
emitRecords(false);
|
||||
}
|
||||
|
||||
protected void noteCurrentRecord(VariantContext vc) {
|
||||
// did the user break the contract by giving a record too late?
|
||||
if (mostUpstreamWritableLoc != null && vc.getStart() < mostUpstreamWritableLoc) // went too far back, since may have already written anything that is <= mostUpstreamWritableLoc
|
||||
throw new IllegalArgumentException("Permitted to write any record upstream of position " + mostUpstreamWritableLoc + ", but a record at " + vc.getChr() + ":" + vc.getStart() + " was just added.");
|
||||
}
|
||||
|
||||
/**
|
||||
* add a record to the file
|
||||
*
|
||||
* @param vc the Variant Context object
|
||||
*/
|
||||
public void add(VariantContext vc) {
|
||||
@Override
|
||||
public synchronized void add(VariantContext vc) {
|
||||
/* Note that the code below does not prevent the successive add()-ing of: (chr1, 10), (chr20, 200), (chr15, 100)
|
||||
since there is no implicit ordering of chromosomes:
|
||||
*/
|
||||
|
|
@ -125,7 +128,43 @@ public abstract class SortingVCFWriterBase implements VCFWriter {
|
|||
emitSafeRecords();
|
||||
}
|
||||
|
||||
private void emitRecords(boolean emitUnsafe) {
|
||||
/**
|
||||
* Gets a string representation of this object.
|
||||
* @return a string representation of this object
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getName();
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// protected interface functions for subclasses to use
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private synchronized void stopWaitingToSort() {
|
||||
emitRecords(true);
|
||||
mostUpstreamWritableLoc = BEFORE_MOST_UPSTREAM_LOC;
|
||||
}
|
||||
|
||||
protected synchronized void emitSafeRecords() {
|
||||
emitRecords(false);
|
||||
}
|
||||
|
||||
protected void noteCurrentRecord(VariantContext vc) {
|
||||
// did the user break the contract by giving a record too late?
|
||||
if (mostUpstreamWritableLoc != null && vc.getStart() < mostUpstreamWritableLoc) // went too far back, since may have already written anything that is <= mostUpstreamWritableLoc
|
||||
throw new IllegalArgumentException("Permitted to write any record upstream of position " + mostUpstreamWritableLoc + ", but a record at " + vc.getChr() + ":" + vc.getStart() + " was just added.");
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// private implementation functions
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private synchronized void emitRecords(boolean emitUnsafe) {
|
||||
while (!queue.isEmpty()) {
|
||||
VCFRecord firstRec = queue.peek();
|
||||
|
||||
|
|
@ -140,15 +179,6 @@ public abstract class SortingVCFWriterBase implements VCFWriter {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a string representation of this object.
|
||||
* @return a string representation of this object
|
||||
*/
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getName();
|
||||
}
|
||||
|
||||
private static class VariantContextComparator implements Comparator<VCFRecord> {
|
||||
public int compare(VCFRecord r1, VCFRecord r2) {
|
||||
return r1.vc.getStart() - r2.vc.getStart();
|
||||
|
|
|
|||
|
|
@ -186,17 +186,21 @@ public class ReadUtils {
|
|||
* @return the reference coordinate for the adaptor boundary (effectively the first base IN the adaptor, closest to the read. NULL if the read is unmapped or the mate is mapped to another contig.
|
||||
*/
|
||||
public static Integer getAdaptorBoundary(final SAMRecord read) {
|
||||
final int MAXIMUM_ADAPTOR_LENGTH = 8;
|
||||
final int insertSize = Math.abs(read.getInferredInsertSize()); // the inferred insert size can be negative if the mate is mapped before the read (so we take the absolute value)
|
||||
|
||||
if (insertSize == 0 || read.getReadUnmappedFlag()) // no adaptors in reads with mates in another
|
||||
return null; // chromosome or unmapped pairs
|
||||
|
||||
int adaptorBoundary; // the reference coordinate for the adaptor boundary (effectively the first base IN the adaptor, closest to the read)
|
||||
if (insertSize == 0 || read.getReadUnmappedFlag()) // no adaptors in reads with mates in another chromosome or unmapped pairs
|
||||
return null;
|
||||
|
||||
Integer adaptorBoundary; // the reference coordinate for the adaptor boundary (effectively the first base IN the adaptor, closest to the read)
|
||||
if (read.getReadNegativeStrandFlag())
|
||||
adaptorBoundary = read.getMateAlignmentStart() - 1; // case 1 (see header)
|
||||
else
|
||||
adaptorBoundary = read.getAlignmentStart() + insertSize + 1; // case 2 (see header)
|
||||
|
||||
if ( (adaptorBoundary < read.getAlignmentStart() - MAXIMUM_ADAPTOR_LENGTH) || (adaptorBoundary > read.getAlignmentEnd() + MAXIMUM_ADAPTOR_LENGTH) )
|
||||
adaptorBoundary = null; // we are being conservative by not allowing the adaptor boundary to go beyond what we belive is the maximum size of an adaptor
|
||||
|
||||
return adaptorBoundary;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,20 +1,19 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialReadsTraversal;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMFileWriter;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
|
||||
import static org.testng.Assert.assertEquals;
|
||||
import static org.testng.Assert.assertTrue;
|
||||
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import static org.testng.Assert.assertEquals;
|
||||
import static org.testng.Assert.assertTrue;
|
||||
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
|
|
@ -44,11 +43,11 @@ import org.testng.annotations.Test;
|
|||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class PrintReadsWalkerUnitTest
|
||||
* Class PrintReadsUnitTest
|
||||
* <p/>
|
||||
* This tests the print reads walker, using the artificial reads traversal
|
||||
*/
|
||||
public class PrintReadsWalkerUnitTest extends BaseTest {
|
||||
public class PrintReadsUnitTest extends BaseTest {
|
||||
|
||||
/**
|
||||
* our private fake reads traversal. This traversal seeds the
|
||||
|
|
@ -60,19 +59,23 @@ public class PrintReadsWalkerUnitTest extends BaseTest {
|
|||
private ReferenceContext bases = null;
|
||||
//private ReferenceContext ref = new ReferenceContext()
|
||||
|
||||
PrintReadsWalker walker;
|
||||
ArtificialSAMFileWriter writer;
|
||||
|
||||
@BeforeMethod
|
||||
public void before() {
|
||||
trav = new ArtificialReadsTraversal();
|
||||
readTotal = ( ( trav.endingChr - trav.startingChr ) + 1 ) * trav.readsPerChr + trav.unMappedReads;
|
||||
|
||||
walker = new PrintReadsWalker();
|
||||
writer = new ArtificialSAMFileWriter();
|
||||
walker.out = writer;
|
||||
walker.initialize();
|
||||
}
|
||||
|
||||
/** test that we get out the same number of reads we put in */
|
||||
@Test
|
||||
public void testReadCount() {
|
||||
PrintReadsWalker walker = new PrintReadsWalker();
|
||||
ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter();
|
||||
walker.out = writer;
|
||||
|
||||
trav.traverse(walker, null, writer);
|
||||
assertEquals(writer.getRecords().size(), readTotal);
|
||||
}
|
||||
|
|
@ -80,10 +83,6 @@ public class PrintReadsWalkerUnitTest extends BaseTest {
|
|||
/** test that we're ok with a null read */
|
||||
@Test
|
||||
public void testNullRead() {
|
||||
PrintReadsWalker walker = new PrintReadsWalker();
|
||||
ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter();
|
||||
walker.out = writer;
|
||||
|
||||
SAMRecord rec = walker.map(bases, null, null);
|
||||
assertTrue(rec == null);
|
||||
}
|
||||
|
|
@ -91,10 +90,6 @@ public class PrintReadsWalkerUnitTest extends BaseTest {
|
|||
/** tes that we get the read we put into the map function */
|
||||
@Test
|
||||
public void testReturnRead() {
|
||||
PrintReadsWalker walker = new PrintReadsWalker();
|
||||
ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter();
|
||||
walker.out = writer;
|
||||
|
||||
SAMFileHeader head = ArtificialSAMUtils.createArtificialSamHeader(3,1,1000);
|
||||
GATKSAMRecord rec = ArtificialSAMUtils.createArtificialRead(head, "FakeRead", 1, 1, 50);
|
||||
SAMRecord ret = walker.map(bases, rec, null);
|
||||
|
|
@ -102,20 +97,6 @@ public class PrintReadsWalkerUnitTest extends BaseTest {
|
|||
assertTrue(ret.getReadName().equals(rec.getReadName()));
|
||||
}
|
||||
|
||||
/** test that the read makes it to the output source */
|
||||
@Test
|
||||
public void testReducingRead() {
|
||||
PrintReadsWalker walker = new PrintReadsWalker();
|
||||
ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter();
|
||||
walker.out = writer;
|
||||
|
||||
SAMFileHeader head = ArtificialSAMUtils.createArtificialSamHeader(3,1,1000);
|
||||
SAMRecord rec = ArtificialSAMUtils.createArtificialRead(head, "FakeRead", 1, 1, 50);
|
||||
SAMRecord ret = walker.map(bases, null,null);
|
||||
walker.reduce(ret,writer);
|
||||
|
||||
assertTrue(writer.getRecords().size() == 1);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -32,7 +32,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testHasAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("e70eb5f80c93e366dcbe3cf684c154e4"));
|
||||
Arrays.asList("604328867fc9aaf3e71fa0f9ca2ba5c9"));
|
||||
executeTest("test file has annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testNoAnnotsAsking1() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("1e52761fdff73a5361b5eb0a6e5d9dad"));
|
||||
Arrays.asList("bbde8c92d27ad2a7ec1ff2d095d459eb"));
|
||||
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -82,7 +82,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
public void testExcludeAnnotations() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
|
||||
Arrays.asList("bb4eebfaffc230cb8a31e62e7b53a300"));
|
||||
Arrays.asList("8ec9f79cab84f26d8250f00d99d18aac"));
|
||||
executeTest("test exclude annotations", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ public class CallableLociWalkerIntegrationTest extends WalkerTest {
|
|||
public void testCallableLociWalker3() {
|
||||
String gatk_args = commonArgs + " -format BED -L 1:10,000,000-11,000,000 -minDepth 10 -maxDepth 100 --minBaseQuality 10 --minMappingQuality 20 -summary %s";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2,
|
||||
Arrays.asList("4496551d4493857e5153d8172965e527", "b0667e31af9aec02eaf73ca73ec16937"));
|
||||
Arrays.asList("b7d26a470ef906590249f2fa45fd6bdd", "da431d393f7c2b2b3e27556b86c1dbc7"));
|
||||
executeTest("formatBed lots of arguments", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -55,25 +55,25 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest {
|
|||
spec.setOutputFileLocation(baseOutputFile);
|
||||
|
||||
// now add the expected files that get generated
|
||||
spec.addAuxFile("2f072fd8b41b5ac1108797f89376c797", baseOutputFile);
|
||||
spec.addAuxFile("d17ac7cc0b58ba801d2b0727a363d615", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_counts"));
|
||||
spec.addAuxFile("c05190c9e6239cdb1cd486edcbc23505", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_proportions"));
|
||||
spec.addAuxFile("0f9603eb1ca4a26828e82d8c8f4991f6", baseOutputFile);
|
||||
spec.addAuxFile("51e6c09a307654f43811af35238fb179", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_counts"));
|
||||
spec.addAuxFile("229b9b5bc2141c86dbc69c8acc9eba6a", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_proportions"));
|
||||
spec.addAuxFile("9cd395f47b329b9dd00ad024fcac9929", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_statistics"));
|
||||
spec.addAuxFile("c94a52b4e73a7995319e0b570c80d2f7", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary"));
|
||||
spec.addAuxFile("1970a44efb7ace4e51a37f0bd2dc84d1", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_statistics"));
|
||||
spec.addAuxFile("c321c542be25359d2e26d45cbeb6d7ab", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_summary"));
|
||||
spec.addAuxFile("9023cc8939777d515cd2895919a99688", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_counts"));
|
||||
spec.addAuxFile("3597b69e90742c5dd7c83fbc74d079f3", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_proportions"));
|
||||
spec.addAuxFile("e69ee59f447816c025c09a56e321cef8", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary"));
|
||||
spec.addAuxFile("fa054b665d1ae537ada719da7713e11b", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_statistics"));
|
||||
spec.addAuxFile("28dec9383b3a323a5ce7d96d62712917", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_summary"));
|
||||
spec.addAuxFile("a836b92ac17b8ff9788e2aaa9116b5d4", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_counts"));
|
||||
spec.addAuxFile("d32a8c425fadcc4c048bd8b48d0f61e5", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_proportions"));
|
||||
spec.addAuxFile("7b9d0e93bf5b5313995be7010ef1f528", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_statistics"));
|
||||
spec.addAuxFile("1a6ea3aa759fb154ccc4e171ebca9d02", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_summary"));
|
||||
spec.addAuxFile("b492644ff06b4ffb044d5075cd168abf", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_statistics"));
|
||||
spec.addAuxFile("77cef87dc4083a7b60b7a7b38b4c0bd8", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_summary"));
|
||||
spec.addAuxFile("8e1adbe37b98bb2271ba13932d5c947f", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_counts"));
|
||||
spec.addAuxFile("761d2f9daf2ebaf43abf65c8fd2fcd05", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_proportions"));
|
||||
spec.addAuxFile("4656c8797696cf5ef0cdc5971271236a", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_summary"));
|
||||
spec.addAuxFile("6f1d7f2120a4ac524c6026498f45295a", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_statistics"));
|
||||
spec.addAuxFile("69c424bca013159942337b67fdf31ff8", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_summary"));
|
||||
spec.addAuxFile("6909d50a7da337cd294828b32b945eb8", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_counts"));
|
||||
spec.addAuxFile("a395dafde101971d2b9e5ddb6cd4b7d0", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_proportions"));
|
||||
spec.addAuxFile("df0ba76e0e6082c0d29fcfd68efc6b77", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_statistics"));
|
||||
spec.addAuxFile("0582b4681dbc02ece2dfe2752dcfd228", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary"));
|
||||
spec.addAuxFile("0685214965bf1863f7ce8de2e38af060", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics"));
|
||||
spec.addAuxFile("7a0cd8a5ebaaa82621fd3b5aed9c32fe", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary"));
|
||||
spec.addAuxFile("185b910e499c08a8b88dd3ed1ac9e8ec", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary"));
|
||||
spec.addAuxFile("d5d11b686689467b5a8836f0a07f447d", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics"));
|
||||
spec.addAuxFile("ad1a2775a31b1634daf64e691676bb96", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary"));
|
||||
|
||||
execute("testBaseOutputNoFiltering",spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("66ed60c6c1190754abd8a0a9d1d8d61e"));
|
||||
Arrays.asList("d61c7055bd09024abb8902bde6bd3960"));
|
||||
executeTest("test MultiSample Pilot1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -189,7 +189,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("2b2729414ae855d390e7940956745bce"));
|
||||
Arrays.asList("f0fbe472f155baf594b1eeb58166edef"));
|
||||
|
||||
executeTest(String.format("test multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -208,7 +208,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -L 1:10,000,000-10,100,000" +
|
||||
" -baq CALCULATE_AS_NECESSARY",
|
||||
1,
|
||||
Arrays.asList("95c6120efb92e5a325a5cec7d77c2dab"));
|
||||
Arrays.asList("8c87c749a7bb5a76ed8504d4ec254272"));
|
||||
|
||||
executeTest(String.format("test calling with BAQ"), spec);
|
||||
}
|
||||
|
|
@ -265,7 +265,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("fa4f3ee67d98b64102a8a3ec81a3bc81"));
|
||||
Arrays.asList("c60a44ba94a80a0cb1fba8b6f90a13cd"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -275,7 +275,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("df90890e43d735573a3b3e4f289ca46b"));
|
||||
Arrays.asList("36ce53ae4319718ad9c8ae391deebc8c"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -285,7 +285,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
|
||||
Arrays.asList("cff6dd0f4eb1ef0b6fc476da6ffead19"));
|
||||
Arrays.asList("d356cbaf240d7025d1aecdabaff3a3e0"));
|
||||
executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
|
||||
}
|
||||
|
||||
|
|
@ -294,7 +294,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
|
||||
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
|
||||
Arrays.asList("1e2a4aab26e9ab0dae709d33a669e036"));
|
||||
Arrays.asList("fcd590a55f5fec2a9b7e628187d6b8a8"));
|
||||
executeTest("test MultiSample Phase1 indels with complicated records", spec4);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -34,8 +34,8 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
|
||||
new CCTest( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "ab4940a16ab990181bd8368c76b23853" );
|
||||
new CCTest( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "17d4b8001c982a70185e344929cf3941");
|
||||
new CCTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "36c0c467b6245c2c6c4e9c956443a154" );
|
||||
new CCTest( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "955a8fa2ddb2b04c406766ccd9ac45cc" );
|
||||
new CCTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "714e65d6cb51ae32221a77ce84cbbcdc" );
|
||||
new CCTest( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "932f0063abb2a23c22ec992ef8d36aa5" );
|
||||
return CCTest.getTests(CCTest.class);
|
||||
}
|
||||
|
||||
|
|
@ -91,8 +91,8 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
public Object[][] createTRTestData() {
|
||||
new TRTest( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "0b7123ae9f4155484b68e4a4f96c5504" );
|
||||
new TRTest( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "d04cf1f6df486e45226ebfbf93a188a5");
|
||||
new TRTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "b2f4757bc47cf23bd9a09f756c250787" );
|
||||
new TRTest( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "502c7df4d4923c4d078b014bf78bed34" );
|
||||
new TRTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "74314e5562c1a65547bb0edaacffe602" );
|
||||
new TRTest( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "41c2f82f7789421f3690ed3c35b8f2e4" );
|
||||
|
||||
return TRTest.getTests(TRTest.class);
|
||||
}
|
||||
|
|
@ -291,7 +291,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testCountCovariatesNoIndex() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "828d247c6e8ef5ebdf3603dc0ce79f61" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "aac7df368ca589dc0a66d5bd9ad007e3" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
@ -317,7 +317,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
@Test(dependsOnMethods = "testCountCovariatesNoIndex")
|
||||
public void testTableRecalibratorNoIndex() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "991f093a0e610df235d28ada418ebf33" );
|
||||
e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "02249d9933481052df75c58a2a1a8e63" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
String bam = entry.getKey();
|
||||
|
|
|
|||
|
|
@ -450,4 +450,21 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
);
|
||||
executeTest("testIntervalStrat", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testModernVCFWithLargeIndels() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
buildCommandLine(
|
||||
"-T VariantEval",
|
||||
"-R " + b37KGReference,
|
||||
"-eval " + validationDataLocation + "/NA12878.HiSeq.WGS.b37_decoy.indel.recalibrated.vcf",
|
||||
"-L 20",
|
||||
"-D " + b37dbSNP132,
|
||||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("a6f8b32fa732632da13dfe3ddcc73cef")
|
||||
);
|
||||
executeTest("testModernVCFWithLargeIndels", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -83,6 +83,28 @@ public class IntervalIntegrationTest extends WalkerTest {
|
|||
executeTest("testUnmappedReadInclusion",spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMixedMappedAndUnmapped() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T PrintReads" +
|
||||
" -I " + validationDataLocation + "MV1994.bam" +
|
||||
" -R " + validationDataLocation + "Escherichia_coli_K12_MG1655.fasta" +
|
||||
" -L Escherichia_coli_K12:4630000-4639675" +
|
||||
" -L unmapped" +
|
||||
" -U",
|
||||
0, // two output files
|
||||
Collections.<String>emptyList());
|
||||
|
||||
// our base file
|
||||
File baseOutputFile = createTempFile("testUnmappedReadInclusion",".bam");
|
||||
spec.setOutputFileLocation(baseOutputFile);
|
||||
spec.addAuxFile("083ef1e9ded868e0d12c05a1354c0319",createTempFileFromBase(baseOutputFile.getAbsolutePath()));
|
||||
spec.addAuxFile("fa90ff91ac0cc689c71a3460a3530b8b", createTempFileFromBase(baseOutputFile.getAbsolutePath().substring(0,baseOutputFile.getAbsolutePath().indexOf(".bam"))+".bai"));
|
||||
|
||||
executeTest("testUnmappedReadInclusion",spec);
|
||||
}
|
||||
|
||||
|
||||
@Test(enabled = false)
|
||||
public void testUnmappedReadExclusion() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
|
|||
|
|
@ -103,10 +103,31 @@ public class ReadUtilsUnitTest extends BaseTest {
|
|||
read.setReadNegativeStrandFlag(false);
|
||||
boundary = ReadUtils.getAdaptorBoundary(read);
|
||||
Assert.assertNull(boundary);
|
||||
read.setInferredInsertSize(10);
|
||||
|
||||
// Test case 6: read is unmapped
|
||||
read.setReadUnmappedFlag(true);
|
||||
boundary = ReadUtils.getAdaptorBoundary(read);
|
||||
Assert.assertNull(boundary);
|
||||
read.setReadUnmappedFlag(false);
|
||||
|
||||
// Test case 7: reads don't overlap and look like this:
|
||||
// <--------|
|
||||
// |------>
|
||||
// first read:
|
||||
myStart = 980;
|
||||
read.setAlignmentStart(myStart);
|
||||
read.setInferredInsertSize(20);
|
||||
read.setReadNegativeStrandFlag(true);
|
||||
boundary = ReadUtils.getAdaptorBoundary(read);
|
||||
Assert.assertNull(boundary);
|
||||
|
||||
// second read:
|
||||
myStart = 1000;
|
||||
read.setAlignmentStart(myStart);
|
||||
read.setMateAlignmentStart(980);
|
||||
read.setReadNegativeStrandFlag(false);
|
||||
boundary = ReadUtils.getAdaptorBoundary(read);
|
||||
Assert.assertNull(boundary);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,14 +29,14 @@ class DataProcessingPipeline extends QScript {
|
|||
var reference: File = _
|
||||
|
||||
@Input(doc="dbsnp ROD to use (must be in VCF format)", fullName="dbsnp", shortName="D", required=true)
|
||||
var dbSNP: List[File] = List()
|
||||
var dbSNP: Seq[File] = Seq()
|
||||
|
||||
/****************************************************************************
|
||||
* Optional Parameters
|
||||
****************************************************************************/
|
||||
|
||||
@Input(doc="extra VCF files to use as reference indels for Indel Realignment", fullName="extra_indels", shortName="indels", required=false)
|
||||
var indels: List[File] = List()
|
||||
var indels: Seq[File] = Seq()
|
||||
|
||||
@Input(doc="The path to the binary of bwa (usually BAM files have already been mapped - but if you want to remap this is the option)", fullName="path_to_bwa", shortName="bwa", required=false)
|
||||
var bwaPath: File = _
|
||||
|
|
@ -118,13 +118,13 @@ class DataProcessingPipeline extends QScript {
|
|||
// Because the realignment only happens after these scripts are executed, in case you are using
|
||||
// bwa realignment, this function will operate over the original bam files and output over the
|
||||
// (to be realigned) bam files.
|
||||
def createSampleFiles(bamFiles: List[File], realignedBamFiles: List[File]): Map[String, List[File]] = {
|
||||
def createSampleFiles(bamFiles: Seq[File], realignedBamFiles: Seq[File]): Map[String, Seq[File]] = {
|
||||
|
||||
// Creating a table with SAMPLE information from each input BAM file
|
||||
val sampleTable = scala.collection.mutable.Map.empty[String, List[File]]
|
||||
val sampleTable = scala.collection.mutable.Map.empty[String, Seq[File]]
|
||||
val realignedIterator = realignedBamFiles.iterator
|
||||
for (bam <- bamFiles) {
|
||||
val rBam = realignedIterator.next // advance to next element in the realignedBam list so they're in sync.
|
||||
val rBam = realignedIterator.next() // advance to next element in the realignedBam list so they're in sync.
|
||||
|
||||
val samReader = new SAMFileReader(bam)
|
||||
val header = samReader.getFileHeader
|
||||
|
|
@ -138,12 +138,12 @@ class DataProcessingPipeline extends QScript {
|
|||
for (rg <- readGroups) {
|
||||
val sample = rg.getSample
|
||||
if (!sampleTable.contains(sample))
|
||||
sampleTable(sample) = List(rBam)
|
||||
sampleTable(sample) = Seq(rBam)
|
||||
else if ( !sampleTable(sample).contains(rBam))
|
||||
sampleTable(sample) :+= rBam
|
||||
}
|
||||
}
|
||||
return sampleTable.toMap
|
||||
sampleTable.toMap
|
||||
}
|
||||
|
||||
// Rebuilds the Read Group string to give BWA
|
||||
|
|
@ -161,8 +161,8 @@ class DataProcessingPipeline extends QScript {
|
|||
|
||||
// Takes a list of processed BAM files and realign them using the BWA option requested (bwase or bwape).
|
||||
// Returns a list of realigned BAM files.
|
||||
def performAlignment(bams: List[File]): List[File] = {
|
||||
var realignedBams: List[File] = List()
|
||||
def performAlignment(bams: Seq[File]): Seq[File] = {
|
||||
var realignedBams: Seq[File] = Seq()
|
||||
var index = 1
|
||||
for (bam <- bams) {
|
||||
// first revert the BAM file to the original qualities
|
||||
|
|
@ -194,10 +194,10 @@ class DataProcessingPipeline extends QScript {
|
|||
realignedBams :+= rgRealignedBamFile
|
||||
index = index + 1
|
||||
}
|
||||
return realignedBams
|
||||
realignedBams
|
||||
}
|
||||
|
||||
def getIndelCleaningModel(): ConsensusDeterminationModel = {
|
||||
def getIndelCleaningModel: ConsensusDeterminationModel = {
|
||||
if (cleaningModel == "KNOWNS_ONLY")
|
||||
ConsensusDeterminationModel.KNOWNS_ONLY
|
||||
else if (cleaningModel == "USE_SW")
|
||||
|
|
@ -206,17 +206,17 @@ class DataProcessingPipeline extends QScript {
|
|||
ConsensusDeterminationModel.USE_READS
|
||||
}
|
||||
|
||||
def revertBams(bams: List[File], removeAlignmentInformation: Boolean): List[File] = {
|
||||
var revertedBAMList: List[File] = List()
|
||||
def revertBams(bams: Seq[File], removeAlignmentInformation: Boolean): Seq[File] = {
|
||||
var revertedBAMList: Seq[File] = Seq()
|
||||
for (bam <- bams)
|
||||
revertedBAMList :+= revertBAM(bam, removeAlignmentInformation)
|
||||
return revertedBAMList
|
||||
revertedBAMList
|
||||
}
|
||||
|
||||
def revertBAM(bam: File, removeAlignmentInformation: Boolean): File = {
|
||||
val revertedBAM = swapExt(bam, ".bam", ".reverted.bam")
|
||||
add(revert(bam, revertedBAM, removeAlignmentInformation))
|
||||
return revertedBAM
|
||||
revertedBAM
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
|
|
@ -224,22 +224,22 @@ class DataProcessingPipeline extends QScript {
|
|||
****************************************************************************/
|
||||
|
||||
|
||||
def script = {
|
||||
def script() {
|
||||
// final output list of processed bam files
|
||||
var cohortList: List[File] = List()
|
||||
var cohortList: Seq[File] = Seq()
|
||||
|
||||
// sets the model for the Indel Realigner
|
||||
cleanModelEnum = getIndelCleaningModel()
|
||||
cleanModelEnum = getIndelCleaningModel
|
||||
|
||||
// keep a record of the number of contigs in the first bam file in the list
|
||||
val bams = QScriptUtils.createListFromFile(input)
|
||||
val bams = QScriptUtils.createSeqFromFile(input)
|
||||
if (nContigs < 0)
|
||||
nContigs = QScriptUtils.getNumberOfContigs(bams(0))
|
||||
|
||||
val realignedBAMs = if (useBWApe || useBWAse || useBWAsw) {performAlignment(bams)} else {revertBams(bams, false)}
|
||||
|
||||
// generate a BAM file per sample joining all per lane files if necessary
|
||||
val sampleBAMFiles: Map[String, List[File]] = createSampleFiles(bams, realignedBAMs)
|
||||
val sampleBAMFiles: Map[String, Seq[File]] = createSampleFiles(bams, realignedBAMs)
|
||||
|
||||
// if this is a 'knowns only' indel realignment run, do it only once for all samples.
|
||||
val globalIntervals = new File(outputDir + projectName + ".intervals")
|
||||
|
|
@ -317,7 +317,7 @@ class DataProcessingPipeline extends QScript {
|
|||
this.maxRecordsInRam = 100000
|
||||
}
|
||||
|
||||
case class target (inBams: List[File], outIntervals: File) extends RealignerTargetCreator with CommandLineGATKArgs {
|
||||
case class target (inBams: Seq[File], outIntervals: File) extends RealignerTargetCreator with CommandLineGATKArgs {
|
||||
if (cleanModelEnum != ConsensusDeterminationModel.KNOWNS_ONLY)
|
||||
this.input_file = inBams
|
||||
this.out = outIntervals
|
||||
|
|
@ -330,7 +330,7 @@ class DataProcessingPipeline extends QScript {
|
|||
this.jobName = queueLogDir + outIntervals + ".target"
|
||||
}
|
||||
|
||||
case class clean (inBams: List[File], tIntervals: File, outBam: File) extends IndelRealigner with CommandLineGATKArgs {
|
||||
case class clean (inBams: Seq[File], tIntervals: File, outBam: File) extends IndelRealigner with CommandLineGATKArgs {
|
||||
this.input_file = inBams
|
||||
this.targetIntervals = tIntervals
|
||||
this.out = outBam
|
||||
|
|
@ -347,11 +347,11 @@ class DataProcessingPipeline extends QScript {
|
|||
|
||||
case class cov (inBam: File, outRecalFile: File) extends CountCovariates with CommandLineGATKArgs {
|
||||
this.knownSites ++= qscript.dbSNP
|
||||
this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate")
|
||||
this.covariate ++= Seq("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate")
|
||||
this.input_file :+= inBam
|
||||
this.recal_file = outRecalFile
|
||||
if (!defaultPlatform.isEmpty) this.default_platform = defaultPlatform
|
||||
if (!qscript.intervalString.isEmpty()) this.intervalsString ++= List(qscript.intervalString)
|
||||
if (!qscript.intervalString.isEmpty) this.intervalsString ++= Seq(qscript.intervalString)
|
||||
else if (qscript.intervals != null) this.intervals :+= qscript.intervals
|
||||
this.scatterCount = nContigs
|
||||
this.analysisName = queueLogDir + outRecalFile + ".covariates"
|
||||
|
|
@ -363,7 +363,7 @@ class DataProcessingPipeline extends QScript {
|
|||
this.recal_file = inRecalFile
|
||||
this.baq = CalculationMode.CALCULATE_AS_NECESSARY
|
||||
this.out = outBam
|
||||
if (!qscript.intervalString.isEmpty()) this.intervalsString ++= List(qscript.intervalString)
|
||||
if (!qscript.intervalString.isEmpty) this.intervalsString ++= Seq(qscript.intervalString)
|
||||
else if (qscript.intervals != null) this.intervals :+= qscript.intervals
|
||||
this.no_pg_tag = qscript.testMode
|
||||
this.scatterCount = nContigs
|
||||
|
|
@ -395,7 +395,7 @@ class DataProcessingPipeline extends QScript {
|
|||
this.jobName = queueLogDir + outBam + ".dedup"
|
||||
}
|
||||
|
||||
case class joinBams (inBams: List[File], outBam: File) extends MergeSamFiles with ExternalCommonArgs {
|
||||
case class joinBams (inBams: Seq[File], outBam: File) extends MergeSamFiles with ExternalCommonArgs {
|
||||
this.input = inBams
|
||||
this.output = outBam
|
||||
this.analysisName = queueLogDir + outBam + ".joinBams"
|
||||
|
|
@ -495,7 +495,7 @@ class DataProcessingPipeline extends QScript {
|
|||
this.jobName = queueLogDir + outBam + ".bwasw"
|
||||
}
|
||||
|
||||
case class writeList(inBams: List[File], outBamList: File) extends ListWriterFunction {
|
||||
case class writeList(inBams: Seq[File], outBamList: File) extends ListWriterFunction {
|
||||
this.inputFiles = inBams
|
||||
this.listFile = outBamList
|
||||
this.analysisName = queueLogDir + outBamList + ".bamList"
|
||||
|
|
|
|||
|
|
@ -46,20 +46,24 @@ class MethodsDevelopmentCallingPipeline extends QScript {
|
|||
val bamList: File,
|
||||
val goldStandard_VCF: File,
|
||||
val intervals: String,
|
||||
val titvTarget: Double,
|
||||
val trancheTarget: Double,
|
||||
val indelTranchTarget: Double,
|
||||
val snpTrancheTarget: Double,
|
||||
val isLowpass: Boolean,
|
||||
val isExome: Boolean,
|
||||
val nSamples: Int) {
|
||||
val name = qscript.outputDir + baseName
|
||||
val clusterFile = new File(name + ".clusters")
|
||||
val rawVCF = new File(name + ".raw.vcf")
|
||||
val rawSnpVCF = new File(name + ".raw.vcf")
|
||||
val rawIndelVCF = new File(name + ".raw.indel.vcf")
|
||||
val filteredIndelVCF = new File(name + ".filtered.indel.vcf")
|
||||
val recalibratedVCF = new File(name + ".recalibrated.vcf")
|
||||
val tranchesFile = new File(name + ".tranches")
|
||||
val vqsrRscript = name + ".vqsr.r"
|
||||
val recalFile = new File(name + ".tranches.recal")
|
||||
val recalibratedSnpVCF = new File(name + ".snp.recalibrated.vcf")
|
||||
val recalibratedIndelVCF = new File(name + ".indel.recalibrated.vcf")
|
||||
val tranchesSnpFile = new File(name + ".snp.tranches")
|
||||
val tranchesIndelFile = new File(name + ".indel.tranches")
|
||||
val vqsrSnpRscript = name + ".snp.vqsr.r"
|
||||
val vqsrIndelRscript = name + ".indel.vqsr.r"
|
||||
val recalSnpFile = new File(name + ".snp.tranches.recal")
|
||||
val recalIndelFile = new File(name + ".indel.tranches.recal")
|
||||
val goldStandardRecalibratedVCF = new File(name + "goldStandard.recalibrated.vcf")
|
||||
val goldStandardTranchesFile = new File(name + "goldStandard.tranches")
|
||||
val goldStandardRecalFile = new File(name + "goldStandard.tranches.recal")
|
||||
|
|
@ -88,6 +92,7 @@ class MethodsDevelopmentCallingPipeline extends QScript {
|
|||
val training_1000G = "/humgen/1kg/processing/official_release/phase1/projectConsensus/phase1.wgs.projectConsensus.v2b.recal.highQuality.vcf"
|
||||
val badSites_1000G = "/humgen/1kg/processing/official_release/phase1/projectConsensus/phase1.wgs.projectConsensus.v2b.recal.terrible.vcf"
|
||||
val projectConsensus_1000G = "/humgen/1kg/processing/official_release/phase1/projectConsensus/ALL.wgs.projectConsensus_v2b.20101123.snps.sites.vcf"
|
||||
val indelGoldStandardCallset = "/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/GoldStandardIndel/gold.standard.indel.MillsAnd1000G.b37.vcf"
|
||||
|
||||
val lowPass: Boolean = true
|
||||
val exome: Boolean = true
|
||||
|
|
@ -101,69 +106,69 @@ class MethodsDevelopmentCallingPipeline extends QScript {
|
|||
"NA12878_gold" -> new Target("NA12878.goldStandard", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
|
||||
new File("/humgen/gsa-hpprojects/dev/carneiro/NA12878/data/goldStandard.list"),
|
||||
new File("/humgen/gsa-hpprojects/dev/carneiro/NA12878/analysis/snps/NA12878.HiSeq19.filtered.vcf"), // ** There is no gold standard for the gold standard **
|
||||
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 2.14, 99.0, lowPass, !exome, 391),
|
||||
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 90.0, 99.0, lowPass, !exome, 391),
|
||||
"NA12878_wgs_b37" -> new Target("NA12878.HiSeq.WGS.b37", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
|
||||
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.bam"),
|
||||
new File("/humgen/gsa-hpprojects/dev/carneiro/NA12878/analysis/snps/NA12878.HiSeq19.filtered.vcf"),
|
||||
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 2.14, 99.0, !lowPass, !exome, 1),
|
||||
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 90.0, 99.0, !lowPass, !exome, 1),
|
||||
"NA12878_wgs_decoy" -> new Target("NA12878.HiSeq.WGS.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37,
|
||||
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37_decoy.NA12878.clean.dedup.recal.bam"),
|
||||
new File("/humgen/gsa-hpprojects/dev/carneiro/NA12878/analysis/snps/NA12878.HiSeq19.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
|
||||
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 2.14, 99.0, !lowPass, !exome, 1),
|
||||
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 90.0, 99.0, !lowPass, !exome, 1),
|
||||
"NA12878_wgs_hg18" -> new Target("NA12878.HiSeq.WGS.hg18", hg18, dbSNP_hg18_129, hapmap_hg18,
|
||||
"/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/HiSeq.WGS.cleaned.indels.10.mask",
|
||||
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam"),
|
||||
new File("/home/radon01/depristo/work/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/HiSeq.WGS.cleaned.ug.snpfiltered.indelfiltered.vcf"),
|
||||
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg18.intervals", 2.14, 99.0, !lowPass, !exome, 1),
|
||||
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg18.intervals", 90.0, 99.0, !lowPass, !exome, 1),
|
||||
"NA12878_wex_b37" -> new Target("NA12878.HiSeq.WEx.b37", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
|
||||
new File("/seq/picard_aggregation/C339/NA12878/v3/NA12878.bam"),
|
||||
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
|
||||
"/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 3.3, 98.0, !lowPass, exome, 1),
|
||||
"/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 90.0, 98.0, !lowPass, exome, 1),
|
||||
"NA12878_wex_hg18" -> new Target("NA12878.HiSeq.WEx.hg18", hg18, dbSNP_hg18_129, hapmap_hg18,
|
||||
"/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/GA2.WEx.cleaned.indels.10.mask",
|
||||
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.WEx.cleaned.recal.bam"),
|
||||
new File("/home/radon01/depristo/work/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.vcf"),
|
||||
"/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.targets.interval_list", 3.3, 98.0, !lowPass, exome, 1),
|
||||
"/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.targets.interval_list", 90.0, 98.0, !lowPass, exome, 1),
|
||||
"NA12878_wex_decoy" -> new Target("NA12878.HiSeq.WEx.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37,
|
||||
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WEx.b37_decoy.NA12878.clean.dedup.recal.bam"),
|
||||
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
|
||||
"/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 3.3, 98.0, !lowPass, exome, 1),
|
||||
"/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 90.0, 98.0, !lowPass, exome, 1),
|
||||
"CEUTrio_wex_b37" -> new Target("CEUTrio.HiSeq.WEx.b37", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
|
||||
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WEx.bwa.cleaned.recal.bam"),
|
||||
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"),
|
||||
"/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 3.3, 98.0, !lowPass, exome, 3),
|
||||
"/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 90.0, 98.0, !lowPass, exome, 3),
|
||||
"CEUTrio_wgs_b37" -> new Target("CEUTrio.HiSeq.WGS.b37", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
|
||||
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.bwa.cleaned.recal.bam"),
|
||||
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
|
||||
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 2.3, 99.0, !lowPass, !exome, 3),
|
||||
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 90.0, 99.0, !lowPass, !exome, 3),
|
||||
"CEUTrio_wex_decoy" -> new Target("CEUTrio.HiSeq.WEx.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37,
|
||||
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WEx.b37_decoy.list"),
|
||||
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
|
||||
"/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 3.3, 98.0, !lowPass, exome, 3),
|
||||
"/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 90.0, 98.0, !lowPass, exome, 3),
|
||||
"CEUTrio_wgs_decoy" -> new Target("CEUTrio.HiSeq.WGS.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37,
|
||||
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37_decoy.list"),
|
||||
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
|
||||
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 2.3, 99.0, !lowPass, !exome, 3),
|
||||
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 90.0, 99.0, !lowPass, !exome, 3),
|
||||
"GA2hg19" -> new Target("NA12878.GA2.hg19", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
|
||||
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.GA2.WGS.bwa.cleaned.hg19.bam"),
|
||||
new File("/humgen/gsa-hpprojects/dev/carneiro/NA12878/analysis/snps/NA12878.GA2.hg19.filtered.vcf"),
|
||||
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 2.14, 99.0, !lowPass, !exome, 1),
|
||||
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 90.0, 99.0, !lowPass, !exome, 1),
|
||||
"FIN" -> new Target("FIN", b37, dbSNP_b37, hapmap_b37, indelMask_b37,
|
||||
new File("/humgen/1kg/processing/pipeline_test_bams/FIN.79sample.Nov2010.chr20.bam"),
|
||||
new File("/humgen/gsa-hpprojects/dev/data/AugChr20Calls_v4_3state/ALL.august.v4.chr20.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
|
||||
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.chr20.hg19.intervals", 2.3, 99.0, lowPass, !exome, 79),
|
||||
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.chr20.hg19.intervals", 90.0, 99.0, lowPass, !exome, 79),
|
||||
"TGPWExGdA" -> new Target("1000G.WEx.GdA", b37, dbSNP_b37, hapmap_b37, indelMask_b37,
|
||||
new File("/humgen/1kg/processing/pipeline_test_bams/Barcoded_1000G_WEx_Reduced_Plate_1.cleaned.list"), // BUGBUG: reduce from 60 to 20 people
|
||||
new File("/humgen/gsa-scr1/delangel/NewUG/calls/AugustRelease.filtered_Q50_QD5.0_SB0.0.allSamples.SNPs_hg19.WEx_UG_newUG_MQC.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
|
||||
"/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 2.6, 99.0, !lowPass, exome, 96),
|
||||
"/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 90.0, 99.0, !lowPass, exome, 96),
|
||||
"LowPassN60" -> new Target("lowpass.N60", b36, dbSNP_b36, hapmap_b36, indelMask_b36,
|
||||
new File("/humgen/1kg/analysis/bamsForDataProcessingPapers/lowpass_b36/lowpass.chr20.cleaned.matefixed.bam"), // the bam list to call from
|
||||
new File("/home/radon01/depristo/work/oneOffProjects/VQSRCutByNRS/lowpass.N60.chr20.filtered.vcf"), // the gold standard VCF file to run through the VQSR
|
||||
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.chr20.b36.intervals", 2.3, 99.0, lowPass, !exome, 60), // chunked interval list to use with Queue's scatter/gather functionality
|
||||
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.chr20.b36.intervals", 90.0, 99.0, lowPass, !exome, 60), // chunked interval list to use with Queue's scatter/gather functionality
|
||||
"LowPassEUR363Nov" -> new Target("EUR.nov2010", b37, dbSNP_b37, hapmap_b37, indelMask_b37,
|
||||
new File("/humgen/1kg/processing/pipeline_test_bams/EUR.363sample.Nov2010.chr20.bam"),
|
||||
new File("/humgen/gsa-hpprojects/dev/data/AugChr20Calls_v4_3state/ALL.august.v4.chr20.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
|
||||
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.chr20.hg19.intervals", 2.3, 99.0, lowPass, !exome, 363)
|
||||
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.chr20.hg19.intervals", 90.0, 99.0, lowPass, !exome, 363)
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -181,15 +186,15 @@ class MethodsDevelopmentCallingPipeline extends QScript {
|
|||
val goldStandard = true
|
||||
for (target <- targets) {
|
||||
if( !skipCalling ) {
|
||||
if (!noIndels) add(new indelCall(target), new indelFilter(target), new indelEvaluation(target))
|
||||
if (!noIndels) add(new indelCall(target), new indelRecal(target), new indelCut(target), new indelEvaluation(target))
|
||||
add(new snpCall(target))
|
||||
add(new VQSR(target, !goldStandard))
|
||||
add(new applyVQSR(target, !goldStandard))
|
||||
add(new snpRecal(target, !goldStandard))
|
||||
add(new snpCut(target, !goldStandard))
|
||||
add(new snpEvaluation(target))
|
||||
}
|
||||
if ( runGoldStandard ) {
|
||||
add(new VQSR(target, goldStandard))
|
||||
add(new applyVQSR(target, goldStandard))
|
||||
add(new snpRecal(target, goldStandard))
|
||||
add(new snpCut(target, goldStandard))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -222,7 +227,7 @@ class MethodsDevelopmentCallingPipeline extends QScript {
|
|||
this.min_base_quality_score = minimumBaseQuality
|
||||
if (qscript.deletions >= 0)
|
||||
this.max_deletion_fraction = qscript.deletions
|
||||
this.out = t.rawVCF
|
||||
this.out = t.rawSnpVCF
|
||||
this.glm = org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel.Model.SNP
|
||||
this.baq = if (noBAQ || t.isExome) {org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.OFF} else {org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.CALCULATE_AS_NECESSARY}
|
||||
this.analysisName = t.name + "_UGs"
|
||||
|
|
@ -257,79 +262,115 @@ class MethodsDevelopmentCallingPipeline extends QScript {
|
|||
this.jobName = queueLogDir + t.name + ".indelfilter"
|
||||
}
|
||||
|
||||
// 3.) Variant Quality Score Recalibration - Generate Recalibration table
|
||||
class VQSR(t: Target, goldStandard: Boolean) extends VariantRecalibrator with UNIVERSAL_GATK_ARGS {
|
||||
class VQSRBase(t: Target) extends VariantRecalibrator with UNIVERSAL_GATK_ARGS {
|
||||
this.nt = 2
|
||||
this.reference_sequence = t.reference
|
||||
this.intervalsString ++= List(t.intervals)
|
||||
this.input :+= ( if ( goldStandard ) { t.goldStandard_VCF } else { t.rawVCF } )
|
||||
this.resource :+= new TaggedFile( t.hapmapFile, "training=true,truth=true,prior=15.0" )
|
||||
this.resource :+= new TaggedFile( omni_b37, "training=true,truth=true,prior=12.0" )
|
||||
this.resource :+= new TaggedFile( training_1000G, "training=true,prior=10.0" )
|
||||
this.allPoly = true
|
||||
this.tranche ++= List("100.0", "99.9", "99.5", "99.3", "99.0", "98.9", "98.8", "98.5", "98.4", "98.3", "98.2", "98.1", "98.0", "97.9", "97.8", "97.5", "97.0", "95.0", "90.0")
|
||||
}
|
||||
|
||||
class snpRecal(t: Target, goldStandard: Boolean) extends VQSRBase(t) with UNIVERSAL_GATK_ARGS {
|
||||
this.input :+= ( if ( goldStandard ) { t.goldStandard_VCF } else { t.rawSnpVCF } )
|
||||
this.resource :+= new TaggedFile( t.hapmapFile, "known=false,training=true,truth=true,prior=15.0" )
|
||||
this.resource :+= new TaggedFile( omni_b37, "known=false,training=true,truth=true,prior=12.0" )
|
||||
this.resource :+= new TaggedFile( training_1000G, "known=false,training=true,prior=10.0" )
|
||||
this.resource :+= new TaggedFile( t.dbsnpFile, "known=true,prior=2.0" )
|
||||
this.resource :+= new TaggedFile( projectConsensus_1000G, "prior=8.0" )
|
||||
this.use_annotation ++= List("QD", "HaplotypeScore", "MQRankSum", "ReadPosRankSum", "MQ", "FS")
|
||||
if(t.nSamples >= 10) { // InbreedingCoeff is a population-wide statistic that requires at least 10 samples to calculate
|
||||
this.use_annotation ++= List("InbreedingCoeff")
|
||||
}
|
||||
if(!t.isExome) {
|
||||
if(t.nSamples >= 10)
|
||||
this.use_annotation ++= List("InbreedingCoeff") // InbreedingCoeff is a population-wide statistic that requires at least 10 samples to calculate
|
||||
if(!t.isExome)
|
||||
this.use_annotation ++= List("DP")
|
||||
} else { // exome specific parameters
|
||||
else { // exome specific parameters
|
||||
this.resource :+= new TaggedFile( badSites_1000G, "bad=true,prior=2.0" )
|
||||
this.mG = 6
|
||||
if(t.nSamples <= 3) { // very few exome samples means very few variants
|
||||
if(t.nSamples <= 3) { // very few exome samples means very few variants
|
||||
this.mG = 4
|
||||
this.percentBad = 0.04
|
||||
}
|
||||
}
|
||||
this.tranches_file = if ( goldStandard ) { t.goldStandardTranchesFile } else { t.tranchesFile }
|
||||
this.recal_file = if ( goldStandard ) { t.goldStandardRecalFile } else { t.recalFile }
|
||||
this.allPoly = true
|
||||
this.tranche ++= List("100.0", "99.9", "99.5", "99.3", "99.0", "98.9", "98.8", "98.5", "98.4", "98.3", "98.2", "98.1", "98.0", "97.9", "97.8", "97.5", "97.0", "95.0", "90.0")
|
||||
this.rscript_file = t.vqsrRscript
|
||||
this.analysisName = t.name + "_VQSR"
|
||||
this.jobName = queueLogDir + t.name + ".VQSR"
|
||||
this.tranches_file = if ( goldStandard ) { t.goldStandardTranchesFile } else { t.tranchesSnpFile }
|
||||
this.recal_file = if ( goldStandard ) { t.goldStandardRecalFile } else { t.recalSnpFile }
|
||||
this.rscript_file = t.vqsrSnpRscript
|
||||
this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP
|
||||
this.analysisName = t.name + "_VQSRs"
|
||||
this.jobName = queueLogDir + t.name + ".snprecal"
|
||||
}
|
||||
|
||||
class indelRecal(t: Target) extends VQSRBase(t) with UNIVERSAL_GATK_ARGS {
|
||||
this.input :+= t.rawIndelVCF
|
||||
this.resource :+= new TaggedFile(indelGoldStandardCallset, "known=false,training=true,truth=true,prior=12.0" )
|
||||
this.resource :+= new TaggedFile( t.dbsnpFile, "known=true,prior=2.0" )
|
||||
this.use_annotation ++= List("QD", "HaplotypeScore", "ReadPosRankSum", "FS")
|
||||
if(t.nSamples >= 10)
|
||||
this.use_annotation ++= List("InbreedingCoeff") // InbreedingCoeff is a population-wide statistic that requires at least 10 samples to calculate
|
||||
this.tranches_file = t.tranchesIndelFile
|
||||
this.recal_file = t.recalIndelFile
|
||||
this.rscript_file = t.vqsrIndelRscript
|
||||
this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL
|
||||
this.analysisName = t.name + "_VQSRi"
|
||||
this.jobName = queueLogDir + t.name + ".indelrecal"
|
||||
}
|
||||
|
||||
|
||||
// 4.) Apply the recalibration table to the appropriate tranches
|
||||
class applyVQSR (t: Target, goldStandard: Boolean) extends ApplyRecalibration with UNIVERSAL_GATK_ARGS {
|
||||
class applyVQSRBase (t: Target) extends ApplyRecalibration with UNIVERSAL_GATK_ARGS {
|
||||
this.memoryLimit = 6
|
||||
this.reference_sequence = t.reference
|
||||
this.intervalsString ++= List(t.intervals)
|
||||
this.input :+= ( if ( goldStandard ) { t.goldStandard_VCF } else { t.rawVCF } )
|
||||
this.tranches_file = if ( goldStandard ) { t.goldStandardTranchesFile } else { t.tranchesFile}
|
||||
this.recal_file = if ( goldStandard ) { t.goldStandardRecalFile } else { t.recalFile }
|
||||
this.ts_filter_level = t.trancheTarget
|
||||
this.out = t.recalibratedVCF
|
||||
this.analysisName = t.name + "_AVQSR"
|
||||
this.jobName = queueLogDir + t.name + ".applyVQSR"
|
||||
}
|
||||
|
||||
class snpCut (t: Target, goldStandard: Boolean) extends applyVQSRBase(t) {
|
||||
this.input :+= ( if ( goldStandard ) { t.goldStandard_VCF } else { t.rawSnpVCF } )
|
||||
this.tranches_file = if ( goldStandard ) { t.goldStandardTranchesFile } else { t.tranchesSnpFile}
|
||||
this.recal_file = if ( goldStandard ) { t.goldStandardRecalFile } else { t.recalSnpFile }
|
||||
this.ts_filter_level = t.snpTrancheTarget
|
||||
this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP
|
||||
this.out = t.recalibratedSnpVCF
|
||||
this.analysisName = t.name + "_AVQSRs"
|
||||
this.jobName = queueLogDir + t.name + ".snpcut"
|
||||
}
|
||||
|
||||
class indelCut (t: Target) extends applyVQSRBase(t) {
|
||||
this.input :+= t.rawIndelVCF
|
||||
this.tranches_file = t.tranchesIndelFile
|
||||
this.recal_file = t.recalIndelFile
|
||||
this.ts_filter_level = t.indelTranchTarget
|
||||
this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL
|
||||
this.out = t.recalibratedIndelVCF
|
||||
this.analysisName = t.name + "_AVQSRi"
|
||||
this.jobName = queueLogDir + t.name + ".indelcut"
|
||||
}
|
||||
|
||||
|
||||
// 5.) Variant Evaluation Base(OPTIONAL)
|
||||
class EvalBase(t: Target) extends VariantEval with UNIVERSAL_GATK_ARGS {
|
||||
this.memoryLimit = 3
|
||||
this.reference_sequence = t.reference
|
||||
this.comp :+= new TaggedFile(t.hapmapFile, "hapmap" )
|
||||
this.intervalsString ++= List(t.intervals)
|
||||
this.D = new File(t.dbsnpFile)
|
||||
this.reference_sequence = t.reference
|
||||
this.intervalsString ++= List(t.intervals)
|
||||
this.sample = samples
|
||||
}
|
||||
|
||||
// 5a.) SNP Evaluation (OPTIONAL) based on the cut vcf
|
||||
class snpEvaluation(t: Target) extends EvalBase(t) {
|
||||
if (t.reference == b37 || t.reference == hg19) this.comp :+= new TaggedFile( omni_b37, "omni" )
|
||||
this.eval :+= t.recalibratedVCF
|
||||
this.eval :+= t.recalibratedSnpVCF
|
||||
this.out = t.evalFile
|
||||
this.analysisName = t.name + "_VEs"
|
||||
this.jobName = queueLogDir + t.name + ".snp.eval"
|
||||
this.jobName = queueLogDir + t.name + ".snpeval"
|
||||
}
|
||||
|
||||
// 5b.) Indel Evaluation (OPTIONAL)
|
||||
class indelEvaluation(t: Target) extends EvalBase(t) {
|
||||
this.eval :+= t.filteredIndelVCF
|
||||
this.evalModule :+= "IndelStatistics"
|
||||
this.eval :+= t.recalibratedIndelVCF
|
||||
this.comp :+= new TaggedFile(indelGoldStandardCallset, "indelGS" )
|
||||
this.noEV = true
|
||||
this.evalModule = List("CompOverlap", "CountVariants", "TiTvVariantEvaluator", "ValidationReport", "IndelStatistics")
|
||||
this.out = t.evalIndelFile
|
||||
this.analysisName = t.name + "_VEi"
|
||||
this.jobName = queueLogDir + queueLogDir + t.name + ".indel.eval"
|
||||
this.jobName = queueLogDir + queueLogDir + t.name + ".indeleval"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -53,9 +53,9 @@ class PacbioProcessingPipeline extends QScript {
|
|||
|
||||
val queueLogDir: String = ".qlog/"
|
||||
|
||||
def script = {
|
||||
def script() {
|
||||
|
||||
val fileList: List[File] = QScriptUtils.createListFromFile(input)
|
||||
val fileList: Seq[File] = QScriptUtils.createSeqFromFile(input)
|
||||
|
||||
for (file: File <- fileList) {
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -36,6 +36,7 @@ import org.broadinstitute.sting.utils.io.IOUtils
|
|||
import org.broadinstitute.sting.utils.help.ApplicationDetails
|
||||
import java.util.{ResourceBundle, Arrays}
|
||||
import org.broadinstitute.sting.utils.text.TextFormattingUtils
|
||||
import org.apache.commons.io.FilenameUtils
|
||||
|
||||
/**
|
||||
* Entry point of Queue. Compiles and runs QScripts passed in to the command line.
|
||||
|
|
@ -61,6 +62,7 @@ object QCommandLine extends Logging {
|
|||
CommandLineProgram.start(qCommandLine, argv)
|
||||
try {
|
||||
Runtime.getRuntime.removeShutdownHook(shutdownHook)
|
||||
qCommandLine.shutdown()
|
||||
} catch {
|
||||
case _ => /* ignore, example 'java.lang.IllegalStateException: Shutdown in progress' */
|
||||
}
|
||||
|
|
@ -78,10 +80,10 @@ object QCommandLine extends Logging {
|
|||
class QCommandLine extends CommandLineProgram with Logging {
|
||||
@Input(fullName="script", shortName="S", doc="QScript scala file", required=true)
|
||||
@ClassType(classOf[File])
|
||||
private var scripts = List.empty[File]
|
||||
var scripts = Seq.empty[File]
|
||||
|
||||
@ArgumentCollection
|
||||
private val settings = new QGraphSettings
|
||||
val settings = new QGraphSettings
|
||||
|
||||
private val qScriptManager = new QScriptManager
|
||||
private val qGraph = new QGraph
|
||||
|
|
@ -91,7 +93,7 @@ class QCommandLine extends CommandLineProgram with Logging {
|
|||
private lazy val pluginManager = {
|
||||
qScriptClasses = IOUtils.tempDir("Q-Classes-", "", settings.qSettings.tempDirectory)
|
||||
qScriptManager.loadScripts(scripts, qScriptClasses)
|
||||
new PluginManager[QScript](classOf[QScript], List(qScriptClasses.toURI.toURL))
|
||||
new PluginManager[QScript](classOf[QScript], Seq(qScriptClasses.toURI.toURL))
|
||||
}
|
||||
|
||||
QFunction.parsingEngine = new ParsingEngine(this)
|
||||
|
|
@ -101,12 +103,16 @@ class QCommandLine extends CommandLineProgram with Logging {
|
|||
* functions, and then builds and runs a QGraph based on the dependencies.
|
||||
*/
|
||||
def execute = {
|
||||
if (settings.qSettings.runName == null)
|
||||
settings.qSettings.runName = FilenameUtils.removeExtension(scripts.head.getName)
|
||||
|
||||
qGraph.settings = settings
|
||||
|
||||
val allQScripts = pluginManager.createAllTypes();
|
||||
for (script <- allQScripts) {
|
||||
logger.info("Scripting " + pluginManager.getName(script.getClass.asSubclass(classOf[QScript])))
|
||||
loadArgumentsIntoObject(script)
|
||||
script.qSettings = settings.qSettings
|
||||
try {
|
||||
script.script()
|
||||
} catch {
|
||||
|
|
@ -120,22 +126,34 @@ class QCommandLine extends CommandLineProgram with Logging {
|
|||
// Execute the job graph
|
||||
qGraph.run()
|
||||
|
||||
val functionsAndStatus = qGraph.getFunctionsAndStatus
|
||||
val success = qGraph.success
|
||||
|
||||
// walk over each script, calling onExecutionDone
|
||||
for (script <- allQScripts) {
|
||||
script.onExecutionDone(qGraph.getFunctionsAndStatus(script.functions), qGraph.success)
|
||||
if ( ! settings.disableJobReport ) {
|
||||
val jobStringName = (QScriptUtils.?(settings.jobReportFile)).getOrElse(settings.qSettings.jobNamePrefix + ".jobreport.txt")
|
||||
val scriptFunctions = functionsAndStatus.filterKeys(f => script.functions.contains(f))
|
||||
script.onExecutionDone(scriptFunctions, success)
|
||||
}
|
||||
|
||||
if (!shuttingDown) {
|
||||
val reportFile = new File(jobStringName)
|
||||
logger.info("Writing JobLogging GATKReport to file " + reportFile)
|
||||
QJobReport.printReport(qGraph.getFunctionsAndStatus(script.functions), reportFile)
|
||||
logger.info("Script %s with %d total jobs".format(if (success) "completed successfully" else "failed", functionsAndStatus.size))
|
||||
|
||||
if ( settings.run ) {
|
||||
val pdfFile = new File(jobStringName + ".pdf")
|
||||
logger.info("Plotting JobLogging GATKReport to file " + pdfFile)
|
||||
QJobReport.plotReport(reportFile, pdfFile)
|
||||
}
|
||||
if (!settings.disableJobReport) {
|
||||
val jobStringName = {
|
||||
if (settings.jobReportFile != null)
|
||||
settings.jobReportFile
|
||||
else
|
||||
settings.qSettings.runName + ".jobreport.txt"
|
||||
}
|
||||
|
||||
if (!shuttingDown) {
|
||||
val reportFile = IOUtils.absolute(settings.qSettings.runDirectory, jobStringName)
|
||||
logger.info("Writing JobLogging GATKReport to file " + reportFile)
|
||||
QJobReport.printReport(functionsAndStatus, reportFile)
|
||||
|
||||
if (settings.run) {
|
||||
val pdfFile = IOUtils.absolute(settings.qSettings.runDirectory, FilenameUtils.removeExtension(jobStringName) + ".pdf")
|
||||
logger.info("Plotting JobLogging GATKReport to file " + pdfFile)
|
||||
QJobReport.plotReport(reportFile, pdfFile)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -179,20 +197,20 @@ class QCommandLine extends CommandLineProgram with Logging {
|
|||
|
||||
override def getApplicationDetails : ApplicationDetails = {
|
||||
new ApplicationDetails(createQueueHeader(),
|
||||
List.empty[String],
|
||||
Seq.empty[String],
|
||||
ApplicationDetails.createDefaultRunningInstructions(getClass.asInstanceOf[Class[CommandLineProgram]]),
|
||||
"")
|
||||
}
|
||||
|
||||
private def createQueueHeader() : List[String] = {
|
||||
List(String.format("Queue v%s, Compiled %s", getQueueVersion, getBuildTimestamp),
|
||||
"Copyright (c) 2011 The Broad Institute",
|
||||
private def createQueueHeader() : Seq[String] = {
|
||||
Seq(String.format("Queue v%s, Compiled %s", getQueueVersion, getBuildTimestamp),
|
||||
"Copyright (c) 2012 The Broad Institute",
|
||||
"Please view our documentation at http://www.broadinstitute.org/gsa/wiki",
|
||||
"For support, please view our support site at http://getsatisfaction.com/gsa")
|
||||
}
|
||||
|
||||
private def getQueueVersion : String = {
|
||||
var stingResources : ResourceBundle = TextFormattingUtils.loadResourceBundle("StingText")
|
||||
val stingResources : ResourceBundle = TextFormattingUtils.loadResourceBundle("StingText")
|
||||
|
||||
if ( stingResources.containsKey("org.broadinstitute.sting.queue.QueueVersion.version") ) {
|
||||
stingResources.getString("org.broadinstitute.sting.queue.QueueVersion.version")
|
||||
|
|
@ -203,7 +221,7 @@ class QCommandLine extends CommandLineProgram with Logging {
|
|||
}
|
||||
|
||||
private def getBuildTimestamp : String = {
|
||||
var stingResources : ResourceBundle = TextFormattingUtils.loadResourceBundle("StingText")
|
||||
val stingResources : ResourceBundle = TextFormattingUtils.loadResourceBundle("StingText")
|
||||
|
||||
if ( stingResources.containsKey("build.timestamp") ) {
|
||||
stingResources.getString("build.timestamp")
|
||||
|
|
|
|||
|
|
@ -27,7 +27,6 @@ package org.broadinstitute.sting.queue
|
|||
import engine.JobRunInfo
|
||||
import org.broadinstitute.sting.queue.function.QFunction
|
||||
import annotation.target.field
|
||||
import io.Source
|
||||
import util.{StringFileConversions, PrimitiveOptionConversions, Logging}
|
||||
|
||||
/**
|
||||
|
|
@ -53,6 +52,11 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon
|
|||
type ArgumentCollection = org.broadinstitute.sting.commandline.ArgumentCollection @field
|
||||
type Gather = org.broadinstitute.sting.commandline.Gather @field
|
||||
|
||||
/**
|
||||
* Default settings for QFunctions
|
||||
*/
|
||||
var qSettings: QSettings = _
|
||||
|
||||
/**
|
||||
* Builds the CommandLineFunctions that will be used to run this script and adds them to this.functions directly or using the add() utility method.
|
||||
*/
|
||||
|
|
@ -60,18 +64,14 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon
|
|||
|
||||
/**
|
||||
* A default handler for the onExecutionDone() function. By default this doesn't do anything
|
||||
* except print out a fine status message.
|
||||
*/
|
||||
def onExecutionDone(jobs: Map[QFunction, JobRunInfo], success: Boolean) {
|
||||
logger.info("Script %s with %d total jobs".format(if (success) "completed successfully" else "failed", jobs.size))
|
||||
// this is too much output
|
||||
// for ( (f, info) <- jobs ) logger.info(" %s %s".format(f.jobName, info))
|
||||
}
|
||||
|
||||
/**
|
||||
* The command line functions that will be executed for this QScript.
|
||||
*/
|
||||
var functions = List.empty[QFunction]
|
||||
var functions = Seq.empty[QFunction]
|
||||
|
||||
/**
|
||||
* Exchanges the extension on a file.
|
||||
|
|
@ -98,22 +98,20 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon
|
|||
* Adds one or more command line functions to be run.
|
||||
* @param functions Functions to add.
|
||||
*/
|
||||
def add(functions: QFunction*) = {
|
||||
def add(functions: QFunction*) {
|
||||
functions.foreach(function => function.addOrder = QScript.nextAddOrder)
|
||||
this.functions ++= functions
|
||||
}
|
||||
|
||||
def addAll(functions: List[QFunction]) {
|
||||
def addAll(functions: Seq[QFunction]) {
|
||||
functions.foreach( f => add(f) )
|
||||
}
|
||||
|
||||
def extractFileEntries(in: File): List[File] = Source.fromFile(in).getLines().toList
|
||||
}
|
||||
|
||||
object QScript {
|
||||
private var addOrder = 0
|
||||
private def nextAddOrder = {
|
||||
addOrder += 1
|
||||
List(addOrder)
|
||||
Seq(addOrder)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ class QScriptManager() extends Logging {
|
|||
* Compiles and loads the scripts in the files into the current classloader.
|
||||
* Heavily based on scala/src/compiler/scala/tools/ant/Scalac.scala
|
||||
*/
|
||||
def loadScripts(scripts: List[File], tempDir: File) {
|
||||
def loadScripts(scripts: Seq[File], tempDir: File) {
|
||||
if (scripts.size > 0) {
|
||||
val settings = new Settings((error: String) => logger.error(error))
|
||||
settings.deprecation.value = true
|
||||
|
|
@ -36,7 +36,7 @@ class QScriptManager() extends Logging {
|
|||
|
||||
logger.info("Compiling %s QScript%s".format(scripts.size, plural(scripts.size)))
|
||||
logger.debug("Compilation directory: " + settings.outdir.value)
|
||||
run.compileFiles(scripts.map(new PlainFile(_)))
|
||||
run.compileFiles(scripts.toList.map(new PlainFile(_)))
|
||||
|
||||
reporter.printSummary()
|
||||
if (reporter.hasErrors) {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -25,15 +25,14 @@
|
|||
package org.broadinstitute.sting.queue
|
||||
|
||||
import java.io.File
|
||||
import org.broadinstitute.sting.commandline.{ArgumentCollection, Argument}
|
||||
import org.broadinstitute.sting.queue.util.{SystemUtils, EmailSettings}
|
||||
import org.broadinstitute.sting.commandline.Argument
|
||||
|
||||
/**
|
||||
* Default settings settable on the command line and passed to CommandLineFunctions.
|
||||
*/
|
||||
class QSettings {
|
||||
@Argument(fullName="job_name_prefix", shortName="jobPrefix", doc="Default name prefix for compute farm jobs.", required=false)
|
||||
var jobNamePrefix: String = QSettings.processNamePrefix
|
||||
@Argument(fullName="run_name", shortName="runName", doc="A name for this run used for various status messages.", required=false)
|
||||
var runName: String = _
|
||||
|
||||
@Argument(fullName="job_project", shortName="jobProject", doc="Default project for compute farm jobs.", required=false)
|
||||
var jobProject: String = _
|
||||
|
|
@ -45,13 +44,13 @@ class QSettings {
|
|||
var jobPriority: Option[Int] = None
|
||||
|
||||
@Argument(fullName="job_native_arg", shortName="jobNative", doc="Native arguments to pass to the job runner.", required=false)
|
||||
var jobNativeArgs: List[String] = Nil
|
||||
var jobNativeArgs: Seq[String] = Nil
|
||||
|
||||
@Argument(fullName="job_resource_request", shortName="jobResReq", doc="Resource requests to pass to the job runner.", required=false)
|
||||
var jobResourceRequests: List[String] = Nil
|
||||
var jobResourceRequests: Seq[String] = Nil
|
||||
|
||||
@Argument(fullName="job_environment_name", shortName="jobEnv", doc="Environment names for the job runner.", required=false)
|
||||
var jobEnvironmentNames: List[String] = Nil
|
||||
var jobEnvironmentNames: Seq[String] = Nil
|
||||
|
||||
@Argument(fullName="memory_limit", shortName="memLimit", doc="Default memory limit for jobs, in gigabytes.", required=false)
|
||||
var memoryLimit: Option[Double] = None
|
||||
|
|
@ -77,15 +76,4 @@ class QSettings {
|
|||
|
||||
@Argument(fullName="job_scatter_gather_directory", shortName="jobSGDir", doc="Default directory to place scatter gather output for compute farm jobs.", required=false)
|
||||
var jobScatterGatherDirectory: File = _
|
||||
|
||||
@ArgumentCollection
|
||||
val emailSettings = new EmailSettings
|
||||
}
|
||||
|
||||
/**
|
||||
* Default settings settable on the command line and passed to CommandLineFunctions.
|
||||
*/
|
||||
object QSettings {
|
||||
/** A semi-unique job prefix using the host name and the process id. */
|
||||
private val processNamePrefix = "Q-" + SystemUtils.pidAtHost
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,27 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.engine
|
||||
|
||||
import org.broadinstitute.sting.queue.function.QFunction
|
||||
|
|
@ -28,15 +52,18 @@ class FunctionEdge(val function: QFunction, val inputs: QNode, val outputs: QNod
|
|||
|
||||
val myRunInfo: JobRunInfo = JobRunInfo.default // purely for dryRun testing
|
||||
|
||||
/**
|
||||
* When using reset status this variable tracks the old status
|
||||
*/
|
||||
var resetFromStatus: RunnerStatus.Value = null
|
||||
|
||||
/**
|
||||
* Initializes with the current status of the function.
|
||||
*/
|
||||
private var currentStatus = {
|
||||
val isDone = function.isDone
|
||||
val isFail = function.isFail
|
||||
if (isFail.isDefined && isFail.get)
|
||||
if (function.isFail)
|
||||
RunnerStatus.FAILED
|
||||
else if (isDone.isDefined && isDone.get)
|
||||
else if (function.isDone)
|
||||
RunnerStatus.DONE
|
||||
else
|
||||
RunnerStatus.PENDING
|
||||
|
|
@ -136,13 +163,15 @@ class FunctionEdge(val function: QFunction, val inputs: QNode, val outputs: QNod
|
|||
* Resets the edge to pending status.
|
||||
*/
|
||||
def resetToPending(cleanOutputs: Boolean) {
|
||||
if (resetFromStatus == null)
|
||||
resetFromStatus = currentStatus
|
||||
currentStatus = RunnerStatus.PENDING
|
||||
if (cleanOutputs)
|
||||
function.deleteOutputs()
|
||||
runner = null
|
||||
}
|
||||
|
||||
override def dotString = function.dotString
|
||||
override def shortDescription = function.shortDescription
|
||||
|
||||
/**
|
||||
* Returns the path to the file to use for logging errors.
|
||||
|
|
|
|||
|
|
@ -3,7 +3,8 @@ package org.broadinstitute.sting.queue.engine
|
|||
import org.broadinstitute.sting.queue.function.InProcessFunction
|
||||
import java.util.Date
|
||||
import org.broadinstitute.sting.utils.Utils
|
||||
import org.apache.commons.io.FileUtils
|
||||
import org.apache.commons.io.{IOUtils, FileUtils}
|
||||
import java.io.PrintStream
|
||||
|
||||
/**
|
||||
* Runs a function that executes in process and does not fork out an external process.
|
||||
|
|
@ -16,12 +17,24 @@ class InProcessRunner(val function: InProcessFunction) extends JobRunner[InProce
|
|||
getRunInfo.exechosts = Utils.resolveHostname()
|
||||
runStatus = RunnerStatus.RUNNING
|
||||
|
||||
function.run()
|
||||
function.jobOutputStream = new PrintStream(FileUtils.openOutputStream(function.jobOutputFile))
|
||||
function.jobErrorStream = {
|
||||
if (function.jobErrorFile != null)
|
||||
new PrintStream(FileUtils.openOutputStream(function.jobErrorFile))
|
||||
else
|
||||
function.jobOutputStream
|
||||
}
|
||||
try {
|
||||
function.run()
|
||||
function.jobOutputStream.println("%s%nDone.".format(function.description))
|
||||
} finally {
|
||||
IOUtils.closeQuietly(function.jobOutputStream)
|
||||
if (function.jobErrorFile != null)
|
||||
IOUtils.closeQuietly(function.jobErrorStream)
|
||||
}
|
||||
|
||||
getRunInfo.doneTime = new Date()
|
||||
val content = "%s%nDone.".format(function.description)
|
||||
FileUtils.writeStringToFile(function.jobOutputFile, content)
|
||||
runStatus = RunnerStatus.DONE
|
||||
getRunInfo.doneTime = new Date()
|
||||
}
|
||||
|
||||
def status = runStatus
|
||||
|
|
|
|||
|
|
@ -1,3 +1,27 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.engine
|
||||
|
||||
/**
|
||||
|
|
@ -10,5 +34,5 @@ class MappingEdge(val inputs: QNode, val outputs: QNode) extends QEdge {
|
|||
* @return <map>
|
||||
*/
|
||||
override def toString = "<map>"
|
||||
override def dotString = "<map>"
|
||||
override def shortDescription = ""
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,27 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.engine
|
||||
|
||||
/**
|
||||
|
|
@ -15,9 +39,9 @@ trait QEdge {
|
|||
def outputs: QNode
|
||||
|
||||
/**
|
||||
* The function description in .dot files
|
||||
* The short description
|
||||
*/
|
||||
def dotString = ""
|
||||
def shortDescription = ""
|
||||
|
||||
override def hashCode = inputs.hashCode + outputs.hashCode
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -30,7 +30,6 @@ import scala.collection.JavaConversions._
|
|||
import org.jgrapht.alg.CycleDetector
|
||||
import org.jgrapht.EdgeFactory
|
||||
import org.jgrapht.ext.DOTExporter
|
||||
import java.io.File
|
||||
import org.jgrapht.event.{TraversalListenerAdapter, EdgeTraversalEvent}
|
||||
import org.broadinstitute.sting.queue.QException
|
||||
import org.broadinstitute.sting.queue.function.{InProcessFunction, CommandLineFunction, QFunction}
|
||||
|
|
@ -40,7 +39,8 @@ import collection.immutable.{TreeSet, TreeMap}
|
|||
import org.broadinstitute.sting.queue.function.scattergather.{ScatterFunction, CloneFunction, GatherFunction, ScatterGatherableFunction}
|
||||
import java.util.Date
|
||||
import org.broadinstitute.sting.utils.Utils
|
||||
import org.broadinstitute.sting.utils.io.IOUtils
|
||||
import org.apache.commons.io.{FileUtils, IOUtils}
|
||||
import java.io.{OutputStreamWriter, File}
|
||||
|
||||
/**
|
||||
* The internal dependency tracker between sets of function input and output files.
|
||||
|
|
@ -69,7 +69,7 @@ class QGraph extends Logging {
|
|||
private val commandLinePluginManager = new CommandLinePluginManager
|
||||
private var commandLineManager: CommandLineJobManager[CommandLineJobRunner] = _
|
||||
private val inProcessManager = new InProcessJobManager
|
||||
private def managers = List[Any](inProcessManager, commandLineManager)
|
||||
private def managers = Seq[Any](inProcessManager, commandLineManager)
|
||||
|
||||
private class StatusCounts {
|
||||
var pending = 0
|
||||
|
|
@ -88,9 +88,14 @@ class QGraph extends Logging {
|
|||
runningLock.synchronized {
|
||||
if (running) {
|
||||
command.qSettings = settings.qSettings
|
||||
command.freeze
|
||||
val inputs = getQNode(command.inputs.toList.sorted(fileOrdering))
|
||||
val outputs = getQNode(command.outputs.toList.sorted(fileOrdering))
|
||||
command.freeze()
|
||||
val inputFiles = command.inputs
|
||||
var outputFiles = command.outputs
|
||||
outputFiles :+= command.jobOutputFile
|
||||
if (command.jobErrorFile != null)
|
||||
outputFiles :+= command.jobErrorFile
|
||||
val inputs = getQNode(inputFiles.sorted(fileOrdering))
|
||||
val outputs = getQNode(outputFiles.sorted(fileOrdering))
|
||||
addEdge(new FunctionEdge(command, inputs, outputs))
|
||||
}
|
||||
}
|
||||
|
|
@ -106,8 +111,8 @@ class QGraph extends Logging {
|
|||
def run() {
|
||||
runningLock.synchronized {
|
||||
if (running) {
|
||||
IOUtils.checkTempDir(settings.qSettings.tempDirectory)
|
||||
fillGraph
|
||||
org.broadinstitute.sting.utils.io.IOUtils.checkTempDir(settings.qSettings.tempDirectory)
|
||||
fillGraph()
|
||||
val isReady = numMissingValues == 0
|
||||
|
||||
if (this.jobGraph.edgeSet.isEmpty) {
|
||||
|
|
@ -133,11 +138,11 @@ class QGraph extends Logging {
|
|||
}
|
||||
}
|
||||
|
||||
private def fillGraph {
|
||||
private def fillGraph() {
|
||||
logger.info("Generating graph.")
|
||||
fill
|
||||
if (settings.dotFile != null)
|
||||
renderToDot(settings.dotFile)
|
||||
fill()
|
||||
if (settings.graphvizFile != null)
|
||||
renderGraph(settings.graphvizFile)
|
||||
validate()
|
||||
|
||||
if (running && numMissingValues == 0) {
|
||||
|
|
@ -145,7 +150,7 @@ class QGraph extends Logging {
|
|||
if (!scatterGathers.isEmpty) {
|
||||
logger.info("Generating scatter gather jobs.")
|
||||
|
||||
var addedFunctions = List.empty[QFunction]
|
||||
var addedFunctions = Seq.empty[QFunction]
|
||||
for (scatterGather <- scatterGathers) {
|
||||
val functions = scatterGather.asInstanceOf[FunctionEdge]
|
||||
.function.asInstanceOf[ScatterGatherableFunction]
|
||||
|
|
@ -161,10 +166,10 @@ class QGraph extends Logging {
|
|||
addedFunctions.foreach(function => if (running) this.add(function))
|
||||
|
||||
logger.info("Regenerating graph.")
|
||||
fill
|
||||
val scatterGatherDotFile = if (settings.expandedDotFile != null) settings.expandedDotFile else settings.dotFile
|
||||
fill()
|
||||
val scatterGatherDotFile = if (settings.graphvizScatterGatherFile != null) settings.graphvizScatterGatherFile else settings.graphvizFile
|
||||
if (scatterGatherDotFile != null)
|
||||
renderToDot(scatterGatherDotFile)
|
||||
renderGraph(scatterGatherDotFile)
|
||||
validate()
|
||||
}
|
||||
}
|
||||
|
|
@ -187,8 +192,8 @@ class QGraph extends Logging {
|
|||
* @param edge Graph edge to examine for the previous functions.
|
||||
* @return A list of prior function edges.
|
||||
*/
|
||||
private def previousFunctions(edge: QEdge): List[FunctionEdge] = {
|
||||
var previous = List.empty[FunctionEdge]
|
||||
private def previousFunctions(edge: QEdge): Seq[FunctionEdge] = {
|
||||
var previous = Seq.empty[FunctionEdge]
|
||||
val source = this.jobGraph.getEdgeSource(edge)
|
||||
for (incomingEdge <- this.jobGraph.incomingEdgesOf(source)) {
|
||||
incomingEdge match {
|
||||
|
|
@ -208,8 +213,8 @@ class QGraph extends Logging {
|
|||
* @param edge Graph edge to examine for the next functions.
|
||||
* @return A list of prior function edges.
|
||||
*/
|
||||
private def nextFunctions(edge: QEdge): List[FunctionEdge] = {
|
||||
var next = List.empty[FunctionEdge]
|
||||
private def nextFunctions(edge: QEdge): Seq[FunctionEdge] = {
|
||||
var next = Seq.empty[FunctionEdge]
|
||||
val target = this.jobGraph.getEdgeTarget(edge)
|
||||
for (outgoingEdge <- this.jobGraph.outgoingEdgesOf(target)) {
|
||||
outgoingEdge match {
|
||||
|
|
@ -238,7 +243,7 @@ class QGraph extends Logging {
|
|||
*/
|
||||
private def fillIn() {
|
||||
// clone since edgeSet is backed by the graph
|
||||
asScalaSet(jobGraph.edgeSet).clone.foreach(edge => {
|
||||
asScalaSet(jobGraph.edgeSet).clone().foreach(edge => {
|
||||
if (running) edge match {
|
||||
case cmd: FunctionEdge => {
|
||||
addCollectionOutputs(cmd.outputs)
|
||||
|
|
@ -249,7 +254,7 @@ class QGraph extends Logging {
|
|||
})
|
||||
}
|
||||
|
||||
private def getReadyJobs(): Set[FunctionEdge] = {
|
||||
private def getReadyJobs: Set[FunctionEdge] = {
|
||||
jobGraph.edgeSet.filter{
|
||||
case f: FunctionEdge =>
|
||||
this.previousFunctions(f).forall(_.status == RunnerStatus.DONE) && f.status == RunnerStatus.PENDING
|
||||
|
|
@ -317,33 +322,39 @@ class QGraph extends Logging {
|
|||
|
||||
updateGraphStatus(false)
|
||||
|
||||
var readyJobs = getReadyJobs()
|
||||
var readyJobs = getReadyJobs
|
||||
while (running && readyJobs.size > 0) {
|
||||
logger.debug("+++++++")
|
||||
foreachFunction(readyJobs.toList, edge => {
|
||||
foreachFunction(readyJobs.toSeq, edge => {
|
||||
if (running) {
|
||||
edge.myRunInfo.startTime = new Date()
|
||||
edge.getRunInfo.exechosts = Utils.resolveHostname()
|
||||
logEdge(edge)
|
||||
edge.myRunInfo.doneTime = new Date()
|
||||
edge.markAsDone
|
||||
edge.markAsDone()
|
||||
}
|
||||
})
|
||||
readyJobs = getReadyJobs()
|
||||
readyJobs = getReadyJobs
|
||||
}
|
||||
}
|
||||
|
||||
private def logEdge(edge: FunctionEdge) {
|
||||
logger.info("-------")
|
||||
logger.info("%-8s %s".format(StringUtils.capitalize(edge.status.toString) + ":", edge.function.description))
|
||||
if (logger.isDebugEnabled) {
|
||||
logger.debug("Inputs: " + edge.inputs)
|
||||
logger.debug("Inputs: " + edge.inputs)
|
||||
logger.debug("Outputs: " + edge.outputs)
|
||||
logger.debug("Done+: " + edge.function.doneOutputs.filter(_.exists()))
|
||||
logger.debug("Done-: " + edge.function.doneOutputs.filterNot(_.exists()))
|
||||
logger.debug("CmdDir: " + edge.function.commandDirectory)
|
||||
logger.debug("Temp?: " + edge.function.isIntermediate)
|
||||
logger.debug("Prev: " +
|
||||
(if (edge.resetFromStatus == null) "none" else StringUtils.capitalize(edge.resetFromStatus.toString)) +
|
||||
" (reset = " + (edge.resetFromStatus != null && edge.resetFromStatus != edge.status) + ")" )
|
||||
}
|
||||
logger.info(StringUtils.capitalize(edge.status.toString) + ": " + edge.function.description)
|
||||
if (logger.isDebugEnabled)
|
||||
logger.debug(edge.function.commandDirectory + " > " + edge.function.description)
|
||||
logger.info("Log: " + edge.function.jobOutputFile.getAbsolutePath)
|
||||
logger.info("Log: " + edge.function.jobOutputFile.getAbsolutePath)
|
||||
if (edge.function.jobErrorFile != null)
|
||||
logger.info("Error: " + edge.function.jobErrorFile.getAbsolutePath)
|
||||
logger.info("Error: " + edge.function.jobErrorFile.getAbsolutePath)
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -380,7 +391,7 @@ class QGraph extends Logging {
|
|||
updateGraphStatus(true)
|
||||
|
||||
var readyJobs = TreeSet.empty[FunctionEdge](functionOrdering)
|
||||
readyJobs ++= getReadyJobs()
|
||||
readyJobs ++= getReadyJobs
|
||||
runningJobs = Set.empty[FunctionEdge]
|
||||
var lastRunningCheck = System.currentTimeMillis
|
||||
var logNextStatusCounts = true
|
||||
|
|
@ -407,7 +418,7 @@ class QGraph extends Logging {
|
|||
statusCounts.running += startedJobs.size
|
||||
|
||||
if (logNextStatusCounts)
|
||||
logStatusCounts
|
||||
logStatusCounts()
|
||||
logNextStatusCounts = false
|
||||
|
||||
deleteCleanup(lastRunningCheck)
|
||||
|
|
@ -456,10 +467,10 @@ class QGraph extends Logging {
|
|||
checkRetryJobs(failedJobs)
|
||||
}
|
||||
|
||||
readyJobs ++= getReadyJobs()
|
||||
readyJobs ++= getReadyJobs
|
||||
}
|
||||
|
||||
logStatusCounts
|
||||
logStatusCounts()
|
||||
deleteCleanup(-1)
|
||||
} catch {
|
||||
case e =>
|
||||
|
|
@ -476,7 +487,7 @@ class QGraph extends Logging {
|
|||
private def nextRunningCheck(lastRunningCheck: Long) =
|
||||
((30 * 1000L) - (System.currentTimeMillis - lastRunningCheck))
|
||||
|
||||
private def logStatusCounts {
|
||||
private def logStatusCounts() {
|
||||
logger.info("%d Pend, %d Run, %d Fail, %d Done".format(
|
||||
statusCounts.pending, statusCounts.running, statusCounts.failed, statusCounts.done))
|
||||
}
|
||||
|
|
@ -532,7 +543,8 @@ class QGraph extends Logging {
|
|||
}
|
||||
|
||||
if (edge.status == RunnerStatus.DONE || edge.status == RunnerStatus.SKIPPED) {
|
||||
logger.debug("Already done: " + edge.function.description)
|
||||
if (logger.isDebugEnabled)
|
||||
logEdge(edge)
|
||||
addCleanup(edge)
|
||||
}
|
||||
}
|
||||
|
|
@ -546,12 +558,12 @@ class QGraph extends Logging {
|
|||
}
|
||||
|
||||
/**
|
||||
* Checks if the function should have their outptus removed after they finish running
|
||||
* @param edges Function to check
|
||||
* Checks if the function should have their outputs removed after they finish running
|
||||
* @param edge Function to check
|
||||
*/
|
||||
private def addCleanup(edge: FunctionEdge) {
|
||||
if (!settings.keepIntermediates)
|
||||
if (edge.function.isIntermediate && edge.function.deleteIntermediateOutputs)
|
||||
if (edge.function.isIntermediate)
|
||||
cleanupJobs += edge
|
||||
}
|
||||
|
||||
|
|
@ -601,14 +613,16 @@ class QGraph extends Logging {
|
|||
* From the previous edges, resets any that are marked as skipped to pending.
|
||||
* If those that are reset have skipped edges, those skipped edges are recursively also set
|
||||
* to pending.
|
||||
* Any edges after this edge are also reset to pending.
|
||||
* @param edge Dependent edge.
|
||||
* @param previous Previous edges that provide inputs to edge.
|
||||
* @param cleanOutputs If true will clean up the output files when resetting skipped jobs to pending.
|
||||
* @param cleanOutputs If true will clean up the output files when resetting jobs to pending.
|
||||
*/
|
||||
private def resetPreviousSkipped(edge: FunctionEdge, previous: List[FunctionEdge], cleanOutputs: Boolean) {
|
||||
for (previousEdge <- previous.filter(_.status == RunnerStatus.SKIPPED)) {
|
||||
previousEdge.resetToPending(cleanOutputs)
|
||||
resetPreviousSkipped(previousEdge, this.previousFunctions(previousEdge), cleanOutputs)
|
||||
private def resetPreviousSkipped(edge: FunctionEdge, previous: Seq[FunctionEdge], cleanOutputs: Boolean) {
|
||||
val edges = previous.filter(_.status == RunnerStatus.SKIPPED) ++ this.nextFunctions(edge).filter(_.status != RunnerStatus.PENDING)
|
||||
for (resetEdge <- edges) {
|
||||
resetEdge.resetToPending(cleanOutputs)
|
||||
resetPreviousSkipped(resetEdge, this.previousFunctions(resetEdge), cleanOutputs)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -628,9 +642,9 @@ class QGraph extends Logging {
|
|||
val emailMessage = new EmailMessage
|
||||
emailMessage.from = settings.statusEmailFrom
|
||||
emailMessage.to = settings.statusEmailTo
|
||||
emailMessage.subject = "Queue function: Started: " + settings.qSettings.jobNamePrefix
|
||||
addStartedFunctions(emailMessage, started.toList)
|
||||
emailMessage.trySend(settings.qSettings.emailSettings)
|
||||
emailMessage.subject = "Queue function: Started: " + settings.qSettings.runName
|
||||
addStartedFunctions(emailMessage, started.toSeq)
|
||||
emailMessage.trySend(settings.emailSettings)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -639,9 +653,9 @@ class QGraph extends Logging {
|
|||
val emailMessage = new EmailMessage
|
||||
emailMessage.from = settings.statusEmailFrom
|
||||
emailMessage.to = settings.statusEmailTo
|
||||
emailMessage.subject = "Queue function: Failure: " + settings.qSettings.jobNamePrefix
|
||||
addFailedFunctions(emailMessage, failed.toList)
|
||||
emailMessage.trySend(settings.qSettings.emailSettings)
|
||||
emailMessage.subject = "Queue function: Failure: " + settings.qSettings.runName
|
||||
addFailedFunctions(emailMessage, failed.toSeq)
|
||||
emailMessage.trySend(settings.emailSettings)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -665,7 +679,7 @@ class QGraph extends Logging {
|
|||
|
||||
private def emailStatus() {
|
||||
if (running && settings.statusEmailTo.size > 0) {
|
||||
var failed = List.empty[FunctionEdge]
|
||||
var failed = Seq.empty[FunctionEdge]
|
||||
foreachFunction(edge => {
|
||||
if (edge.status == RunnerStatus.FAILED) {
|
||||
failed :+= edge
|
||||
|
|
@ -677,16 +691,16 @@ class QGraph extends Logging {
|
|||
emailMessage.to = settings.statusEmailTo
|
||||
emailMessage.body = getStatus + nl
|
||||
if (failed.size == 0) {
|
||||
emailMessage.subject = "Queue run: Success: " + settings.qSettings.jobNamePrefix
|
||||
emailMessage.subject = "Queue run: Success: " + settings.qSettings.runName
|
||||
} else {
|
||||
emailMessage.subject = "Queue run: Failure: " + settings.qSettings.jobNamePrefix
|
||||
emailMessage.subject = "Queue run: Failure: " + settings.qSettings.runName
|
||||
addFailedFunctions(emailMessage, failed)
|
||||
}
|
||||
emailMessage.trySend(settings.qSettings.emailSettings)
|
||||
emailMessage.trySend(settings.emailSettings)
|
||||
}
|
||||
}
|
||||
|
||||
private def addStartedFunctions(emailMessage: EmailMessage, started: List[FunctionEdge]) {
|
||||
private def addStartedFunctions(emailMessage: EmailMessage, started: Seq[FunctionEdge]) {
|
||||
if (emailMessage.body == null)
|
||||
emailMessage.body = ""
|
||||
emailMessage.body += """
|
||||
|
|
@ -697,7 +711,7 @@ class QGraph extends Logging {
|
|||
started.map(edge => emailDescription(edge)).mkString(nl+nl))
|
||||
}
|
||||
|
||||
private def addFailedFunctions(emailMessage: EmailMessage, failed: List[FunctionEdge]) {
|
||||
private def addFailedFunctions(emailMessage: EmailMessage, failed: Seq[FunctionEdge]) {
|
||||
val logs = failed.flatMap(edge => logFiles(edge))
|
||||
|
||||
if (emailMessage.body == null)
|
||||
|
|
@ -725,7 +739,7 @@ class QGraph extends Logging {
|
|||
}
|
||||
|
||||
private def logFiles(edge: FunctionEdge) = {
|
||||
var failedOutputs = List.empty[File]
|
||||
var failedOutputs = Seq.empty[File]
|
||||
failedOutputs :+= edge.function.jobOutputFile
|
||||
if (edge.function.jobErrorFile != null)
|
||||
failedOutputs :+= edge.function.jobErrorFile
|
||||
|
|
@ -762,14 +776,14 @@ class QGraph extends Logging {
|
|||
private def getStatus = {
|
||||
val buffer = new StringBuilder
|
||||
doStatus(status => buffer.append(status).append(nl))
|
||||
buffer.toString
|
||||
buffer.toString()
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets job statuses by traversing the graph and looking for status-related files
|
||||
*/
|
||||
private def doStatus(statusFunc: String => Unit) = {
|
||||
var statuses = List.empty[AnalysisStatus]
|
||||
private def doStatus(statusFunc: String => Unit) {
|
||||
var statuses = Seq.empty[AnalysisStatus]
|
||||
var maxWidth = 0
|
||||
foreachFunction(edge => {
|
||||
val name = edge.function.analysisName
|
||||
|
|
@ -860,7 +874,7 @@ class QGraph extends Logging {
|
|||
private def newGraph = new SimpleDirectedGraph[QNode, QEdge](new EdgeFactory[QNode, QEdge] {
|
||||
def createEdge(input: QNode, output: QNode) = new MappingEdge(input, output)})
|
||||
|
||||
private def getQNode(files: List[File]) = {
|
||||
private def getQNode(files: Seq[File]) = {
|
||||
nodeMap.get(files) match {
|
||||
case Some(node) =>
|
||||
node
|
||||
|
|
@ -888,7 +902,7 @@ class QGraph extends Logging {
|
|||
if (inputs.files.size > 1)
|
||||
for (file <- inputs.files) {
|
||||
if (running) {
|
||||
val input = getQNode(List(file))
|
||||
val input = getQNode(Seq(file))
|
||||
if (!jobGraph.containsEdge(input, inputs))
|
||||
addEdge(new MappingEdge(input, inputs))
|
||||
}
|
||||
|
|
@ -903,7 +917,7 @@ class QGraph extends Logging {
|
|||
if (outputs.files.size > 1)
|
||||
for (file <- outputs.files) {
|
||||
if (running) {
|
||||
val output = getQNode(List(file))
|
||||
val output = getQNode(Seq(file))
|
||||
if (!jobGraph.containsEdge(outputs, output))
|
||||
addEdge(new MappingEdge(outputs, output))
|
||||
}
|
||||
|
|
@ -937,37 +951,36 @@ class QGraph extends Logging {
|
|||
|
||||
/**
|
||||
* Utility function for running a method over all function edges.
|
||||
* @param edgeFunction Function to run for each FunctionEdge.
|
||||
* @param f Function to run for each FunctionEdge.
|
||||
*/
|
||||
private def foreachFunction(f: (FunctionEdge) => Unit) {
|
||||
foreachFunction(jobGraph.edgeSet.toList.filter(_.isInstanceOf[FunctionEdge]).asInstanceOf[List[FunctionEdge]], f)
|
||||
foreachFunction(jobGraph.edgeSet.toSeq.filter(_.isInstanceOf[FunctionEdge]).asInstanceOf[Seq[FunctionEdge]], f)
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility function for running a method over a list of function edges.
|
||||
* @param edegs Edges to traverse.
|
||||
* @param edgeFunction Function to run for each FunctionEdge.
|
||||
* @param edges Edges to traverse.
|
||||
* @param f Function to run for each FunctionEdge.
|
||||
*/
|
||||
private def foreachFunction(edges: List[FunctionEdge], f: (FunctionEdge) => Unit) {
|
||||
private def foreachFunction(edges: Seq[FunctionEdge], f: (FunctionEdge) => Unit) {
|
||||
edges.sorted(functionOrdering).foreach(edge => if (running) f(edge))
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility function for running a method over all function edges.
|
||||
* @param edgeFunction Function to run for each FunctionEdge.
|
||||
* Utility function returning all function edges.
|
||||
*/
|
||||
private def getFunctionEdges: List[FunctionEdge] = {
|
||||
jobGraph.edgeSet.toList.filter(_.isInstanceOf[FunctionEdge]).asInstanceOf[List[FunctionEdge]]
|
||||
private def getFunctionEdges: Seq[FunctionEdge] = {
|
||||
jobGraph.edgeSet.toSeq.filter(_.isInstanceOf[FunctionEdge]).asInstanceOf[Seq[FunctionEdge]]
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility function for running a method over all functions, but traversing the nodes in order of dependency.
|
||||
* @param edgeFunction Function to run for each FunctionEdge.
|
||||
* @param f Function to run for each FunctionEdge.
|
||||
*/
|
||||
private def traverseFunctions(f: (FunctionEdge) => Unit) {
|
||||
val iterator = new TopologicalOrderIterator(this.jobGraph)
|
||||
iterator.addTraversalListener(new TraversalListenerAdapter[QNode, QEdge] {
|
||||
override def edgeTraversed(event: EdgeTraversalEvent[QNode, QEdge]) = {
|
||||
override def edgeTraversed(event: EdgeTraversalEvent[QNode, QEdge]) {
|
||||
if (running) {
|
||||
event.getEdge match {
|
||||
case functionEdge: FunctionEdge => f(functionEdge)
|
||||
|
|
@ -980,23 +993,44 @@ class QGraph extends Logging {
|
|||
}
|
||||
|
||||
/**
|
||||
* Outputs the graph to a .dot file.
|
||||
* Outputs the graph to a .gv DOT file.
|
||||
* http://www.graphviz.org/Documentation.php
|
||||
* http://en.wikipedia.org/wiki/DOT_language
|
||||
* @param file Path to output the .dot file.
|
||||
* @param file Path to output the .gv file.
|
||||
*/
|
||||
private def renderToDot(file: java.io.File) {
|
||||
val out = new java.io.FileWriter(file)
|
||||
|
||||
// todo -- we need a nice way to visualize the key pieces of information about commands. Perhaps a
|
||||
// todo -- visualizeString() command, or something that shows inputs / outputs
|
||||
val ve = new org.jgrapht.ext.EdgeNameProvider[QEdge] {
|
||||
def getEdgeName(function: QEdge) = if (function.dotString == null) "" else function.dotString.replace("\"", "\\\"")
|
||||
private def renderGraph(file: java.io.File) {
|
||||
val vertexIDProvider = new org.jgrapht.ext.VertexNameProvider[QNode] {
|
||||
def getVertexName(node: QNode) = node.id.toString
|
||||
}
|
||||
|
||||
//val iterator = new TopologicalOrderIterator(qGraph.jobGraph)
|
||||
(new DOTExporter(new org.jgrapht.ext.IntegerNameProvider[QNode](), null, ve)).export(out, jobGraph)
|
||||
val vertexLabelProvider = new org.jgrapht.ext.VertexNameProvider[QNode] {
|
||||
// The QGraph fills in with single file nodes between nodes that contain more than one file.
|
||||
// We only need to display the single element nodes.
|
||||
def getVertexName(node: QNode) = {
|
||||
if (!node.files.isEmpty && node.files.tail.isEmpty)
|
||||
node.files.head.getName
|
||||
else
|
||||
""
|
||||
}
|
||||
}
|
||||
|
||||
out.close
|
||||
val edgeNameProvider = new org.jgrapht.ext.EdgeNameProvider[QEdge] {
|
||||
def getEdgeName(edge: QEdge) = {
|
||||
if (edge.shortDescription != null)
|
||||
edge.shortDescription.replace("\"", "\\\"")
|
||||
else
|
||||
""
|
||||
}
|
||||
}
|
||||
|
||||
val exporter = new DOTExporter(vertexIDProvider, vertexLabelProvider, edgeNameProvider)
|
||||
|
||||
val out = new OutputStreamWriter(FileUtils.openOutputStream(file))
|
||||
try {
|
||||
exporter.export(out, jobGraph)
|
||||
} finally {
|
||||
IOUtils.closeQuietly(out)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -1054,7 +1088,7 @@ class QGraph extends Logging {
|
|||
*/
|
||||
def isShutdown = !running
|
||||
|
||||
def getFunctionsAndStatus(functions: List[QFunction]): Map[QFunction, JobRunInfo] = {
|
||||
def getFunctionsAndStatus: Map[QFunction, JobRunInfo] = {
|
||||
getFunctionEdges.map(edge => (edge.function, edge.getRunInfo)).toMap
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -26,7 +26,7 @@ package org.broadinstitute.sting.queue.engine
|
|||
|
||||
import java.io.File
|
||||
import org.broadinstitute.sting.queue.QSettings
|
||||
import org.broadinstitute.sting.queue.util.SystemUtils
|
||||
import org.broadinstitute.sting.queue.util.{EmailSettings, SystemUtils}
|
||||
import org.broadinstitute.sting.commandline.{Advanced, ArgumentCollection, Argument}
|
||||
|
||||
/**
|
||||
|
|
@ -58,16 +58,16 @@ class QGraphSettings {
|
|||
var keepIntermediates = false
|
||||
|
||||
@Argument(fullName="status_email_to", shortName="statusTo", doc="Email address to send emails to upon completion or on error.", required=false)
|
||||
var statusEmailTo: List[String] = Nil
|
||||
var statusEmailTo: Seq[String] = Nil
|
||||
|
||||
@Argument(fullName="status_email_from", shortName="statusFrom", doc="Email address to send emails from upon completion or on error.", required=false)
|
||||
var statusEmailFrom: String = System.getProperty("user.name") + "@" + SystemUtils.mailName
|
||||
|
||||
@Argument(fullName="dot_graph", shortName="dot", doc="Outputs the queue graph to a .dot file. See: http://en.wikipedia.org/wiki/DOT_language", required=false)
|
||||
var dotFile: File = _
|
||||
@Argument(fullName="graphviz", shortName="gv", doc="Outputs the queue graph to a Graphviz .gv file. See: http://www.graphviz.org/Documentation.php", required=false)
|
||||
var graphvizFile: File = _
|
||||
|
||||
@Argument(fullName="expanded_dot_graph", shortName="expandedDot", doc="Outputs the queue graph of scatter gather to a .dot file. Otherwise overwrites the dot_graph", required=false)
|
||||
var expandedDotFile: File = _
|
||||
@Argument(fullName="graphviz_scatter_gather", shortName="gvsg", doc="Outputs the scatter/gather queue graph to a Graphviz .gv file. Otherwise overwrites the --graphviz file.", required=false)
|
||||
var graphvizScatterGatherFile: File = _
|
||||
|
||||
@Argument(fullName="jobReport", shortName="jobReport", doc="File where we will write the Queue job report", required=false)
|
||||
var jobReportFile: String = _
|
||||
|
|
@ -76,6 +76,9 @@ class QGraphSettings {
|
|||
@Argument(fullName="disableJobReport", shortName="disabpleJobReport", doc="If provided, we will not create a job report", required=false)
|
||||
var disableJobReport: Boolean = false
|
||||
|
||||
@ArgumentCollection
|
||||
val emailSettings = new EmailSettings
|
||||
|
||||
@ArgumentCollection
|
||||
val qSettings = new QSettings
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,27 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.engine
|
||||
|
||||
import java.io.File
|
||||
|
|
@ -6,7 +30,7 @@ import java.io.File
|
|||
* Represents a state between QFunctions the directed acyclic QGraph
|
||||
* @param files The list of files that represent this node state ordered by file name.
|
||||
*/
|
||||
class QNode (val id: Int, val files: List[File]) {
|
||||
class QNode (val id: Int, val files: Seq[File]) {
|
||||
override def equals(obj: Any) = {
|
||||
obj match {
|
||||
case other: QNode => this.id == other.id
|
||||
|
|
@ -16,5 +40,5 @@ class QNode (val id: Int, val files: List[File]) {
|
|||
|
||||
override def hashCode = id
|
||||
|
||||
override def toString = files.toString
|
||||
override def toString = files.toString()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -69,7 +69,7 @@ class GridEngineJobRunner(session: Session, function: CommandLineFunction) exten
|
|||
if ( function.nCoresRequest.getOrElse(1) > 1 ) {
|
||||
if ( function.qSettings.dontRequestMultipleCores )
|
||||
logger.warn("Sending multicore job %s to farm without requesting appropriate number of cores (%d)".format(
|
||||
function.jobName, function.nCoresRequest.get))
|
||||
function.shortDescription, function.nCoresRequest.get))
|
||||
else
|
||||
nativeSpec += " -pe %s %d".format(function.qSettings.parallelEnvironmentName, function.nCoresRequest.get)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -35,8 +35,8 @@ import org.broadinstitute.sting.queue.engine.{RunnerStatus, CommandLineJobRunner
|
|||
import java.util.regex.Pattern
|
||||
import java.lang.StringBuffer
|
||||
import java.util.Date
|
||||
import com.sun.jna.{Pointer, Structure, StringArray, NativeLong}
|
||||
import com.sun.jna.ptr.{PointerByReference, IntByReference}
|
||||
import com.sun.jna.{Structure, StringArray, NativeLong}
|
||||
import com.sun.jna.ptr.IntByReference
|
||||
|
||||
/**
|
||||
* Runs jobs on an LSF compute cluster.
|
||||
|
|
@ -60,7 +60,6 @@ class Lsf706JobRunner(val function: CommandLineFunction) extends CommandLineJobR
|
|||
|
||||
/**
|
||||
* Dispatches the function on the LSF cluster.
|
||||
* @param function Command to run.
|
||||
*/
|
||||
def start() {
|
||||
Lsf706JobRunner.lsfLibLock.synchronized {
|
||||
|
|
@ -110,7 +109,7 @@ class Lsf706JobRunner(val function: CommandLineFunction) extends CommandLineJobR
|
|||
if ( function.nCoresRequest.getOrElse(1) > 1 ) {
|
||||
if ( function.qSettings.dontRequestMultipleCores )
|
||||
logger.warn("Sending multicore job %s to farm without requesting appropriate number of cores (%d)".format(
|
||||
function.jobName, function.nCoresRequest.get))
|
||||
function.shortDescription, function.nCoresRequest.get))
|
||||
else {
|
||||
request.numProcessors = function.nCoresRequest.get
|
||||
request.maxNumProcessors = request.numProcessors
|
||||
|
|
@ -298,7 +297,7 @@ object Lsf706JobRunner extends Logging {
|
|||
runner.getRunInfo.doneTime = new Date(jobInfo.endTime.longValue * 1000)
|
||||
val exHostsRaw = jobInfo.exHosts.getStringArray(0)
|
||||
//logger.warn("exHostsRaw = " + exHostsRaw)
|
||||
val exHostsList = exHostsRaw.toList
|
||||
val exHostsList = exHostsRaw.toSeq
|
||||
//logger.warn("exHostsList = " + exHostsList)
|
||||
val exHosts = exHostsList.reduceLeft(_ + "," + _)
|
||||
//logger.warn("exHosts = " + exHosts)
|
||||
|
|
@ -363,7 +362,7 @@ object Lsf706JobRunner extends Logging {
|
|||
/**
|
||||
* Returns the run limit in seconds for the queue.
|
||||
* If the queue name is null returns the length of the default queue.
|
||||
* @param queue Name of the queue or null for the default queue.
|
||||
* @param queueName Name of the queue or null for the default queue.
|
||||
* @return the run limit in seconds for the queue.
|
||||
*/
|
||||
private def getRlimitRun(queueName: String) = {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.runtime.{ProcessSettings, OutputStreamSett
|
|||
|
||||
/**
|
||||
* Runs jobs one at a time locally
|
||||
* @param function Command to run.
|
||||
*/
|
||||
class ShellJobRunner(val function: CommandLineFunction) extends CommandLineJobRunner {
|
||||
// Controller on the thread that started the job
|
||||
|
|
@ -39,13 +40,12 @@ class ShellJobRunner(val function: CommandLineFunction) extends CommandLineJobRu
|
|||
|
||||
/**
|
||||
* Runs the function on the local shell.
|
||||
* @param function Command to run.
|
||||
*/
|
||||
def start() {
|
||||
val commandLine = Array("sh", jobScript.getAbsolutePath)
|
||||
val stdoutSettings = new OutputStreamSettings
|
||||
val stderrSettings = new OutputStreamSettings
|
||||
val mergeError = (function.jobErrorFile != null)
|
||||
val mergeError = (function.jobErrorFile == null)
|
||||
|
||||
stdoutSettings.setOutputFile(function.jobOutputFile, true)
|
||||
if (function.jobErrorFile != null)
|
||||
|
|
|
|||
|
|
@ -53,6 +53,9 @@ class BamGatherFunction extends GatherFunction with PicardBamFunction {
|
|||
val disableIndex = QFunction.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.DISABLE_INDEXING_FULLNAME)
|
||||
this.createIndex = Some(!originalGATK.getFieldValue(disableIndex).asInstanceOf[Boolean])
|
||||
|
||||
super.freezeFieldValues
|
||||
val enableMD5 = QFunction.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.ENABLE_MD5_FULLNAME)
|
||||
this.createMD5 = Some(originalGATK.getFieldValue(enableMD5).asInstanceOf[Boolean])
|
||||
|
||||
super.freezeFieldValues()
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -32,9 +32,8 @@ import net.sf.samtools.SAMFileHeader
|
|||
import java.util.Collections
|
||||
import org.broadinstitute.sting.utils.{GenomeLoc, GenomeLocSortedSet, GenomeLocParser}
|
||||
|
||||
case class GATKIntervals(reference: File, intervals: List[String]) {
|
||||
case class GATKIntervals(reference: File, intervals: Seq[String]) {
|
||||
private lazy val referenceDataSource = new ReferenceDataSource(reference)
|
||||
// private var splitsBySize = Map.empty[Int, java.util.List[java.lang.Integer]]
|
||||
|
||||
lazy val samFileHeader = {
|
||||
val header = new SAMFileHeader
|
||||
|
|
@ -53,13 +52,5 @@ case class GATKIntervals(reference: File, intervals: List[String]) {
|
|||
Collections.unmodifiableList(parsedLocs)
|
||||
}
|
||||
|
||||
lazy val contigs = locs.map(_.getContig).distinct.toList
|
||||
|
||||
// def getSplits(size: Int) = {
|
||||
// splitsBySize.getOrElse(size, {
|
||||
// val splits: java.util.List[java.lang.Integer] = IntervalUtils.splitFixedIntervals(locs, size)
|
||||
// splitsBySize += size -> splits
|
||||
// splits
|
||||
// })
|
||||
// }
|
||||
lazy val contigs = locs.map(_.getContig).distinct.toSeq
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -26,7 +26,6 @@ package org.broadinstitute.sting.queue.extensions.gatk
|
|||
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils
|
||||
import java.io.File
|
||||
import collection.JavaConversions._
|
||||
import org.broadinstitute.sting.utils.io.IOUtils
|
||||
import org.broadinstitute.sting.queue.function.scattergather.{CloneFunction, ScatterFunction}
|
||||
import org.broadinstitute.sting.commandline.Output
|
||||
|
|
@ -39,7 +38,7 @@ trait GATKScatterFunction extends ScatterFunction {
|
|||
private final val intervalsStringField = "intervalsString"
|
||||
|
||||
@Output(doc="Scatter function outputs")
|
||||
var scatterOutputFiles: List[File] = Nil
|
||||
var scatterOutputFiles: Seq[File] = Nil
|
||||
|
||||
/** The original GATK function. */
|
||||
protected var originalGATK: CommandLineGATK = _
|
||||
|
|
@ -48,7 +47,7 @@ trait GATKScatterFunction extends ScatterFunction {
|
|||
protected var referenceSequence: File = _
|
||||
|
||||
/** The list of interval files ("/path/to/interval.list") or interval strings ("chr1", "chr2") to parse into smaller parts. */
|
||||
protected var intervals: List[String] = Nil
|
||||
protected var intervals: Seq[String] = Nil
|
||||
|
||||
/** Whether the last scatter job should also include any unmapped reads. */
|
||||
protected var includeUnmapped: Boolean = _
|
||||
|
|
@ -57,7 +56,7 @@ trait GATKScatterFunction extends ScatterFunction {
|
|||
this.originalGATK = this.originalFunction.asInstanceOf[CommandLineGATK]
|
||||
this.referenceSequence = this.originalGATK.reference_sequence
|
||||
if (this.originalGATK.intervals.isEmpty && (this.originalGATK.intervalsString == null || this.originalGATK.intervalsString.isEmpty)) {
|
||||
this.intervals ++= GATKScatterFunction.getGATKIntervals(this.referenceSequence, List.empty[String]).contigs
|
||||
this.intervals ++= GATKScatterFunction.getGATKIntervals(this.referenceSequence, Seq.empty[String]).contigs
|
||||
} else {
|
||||
this.intervals ++= this.originalGATK.intervals.map(_.toString)
|
||||
this.intervals ++= this.originalGATK.intervalsString.filterNot(interval => IntervalUtils.isUnmapped(interval))
|
||||
|
|
@ -70,16 +69,16 @@ trait GATKScatterFunction extends ScatterFunction {
|
|||
}
|
||||
|
||||
override def initCloneInputs(cloneFunction: CloneFunction, index: Int) {
|
||||
cloneFunction.setFieldValue(this.intervalsField, List(new File("scatter.intervals")))
|
||||
cloneFunction.setFieldValue(this.intervalsField, Seq(new File("scatter.intervals")))
|
||||
if (index == this.scatterCount && this.includeUnmapped)
|
||||
cloneFunction.setFieldValue(this.intervalsStringField, List("unmapped"))
|
||||
cloneFunction.setFieldValue(this.intervalsStringField, Seq("unmapped"))
|
||||
else
|
||||
cloneFunction.setFieldValue(this.intervalsStringField, List.empty[String])
|
||||
cloneFunction.setFieldValue(this.intervalsStringField, Seq.empty[String])
|
||||
}
|
||||
|
||||
override def bindCloneInputs(cloneFunction: CloneFunction, index: Int) {
|
||||
val scatterPart = cloneFunction.getFieldValue(this.intervalsField)
|
||||
.asInstanceOf[List[File]]
|
||||
.asInstanceOf[Seq[File]]
|
||||
.map(file => IOUtils.absolute(cloneFunction.commandDirectory, file))
|
||||
cloneFunction.setFieldValue(this.intervalsField, scatterPart)
|
||||
this.scatterOutputFiles ++= scatterPart
|
||||
|
|
@ -100,9 +99,9 @@ trait GATKScatterFunction extends ScatterFunction {
|
|||
}
|
||||
|
||||
object GATKScatterFunction {
|
||||
var gatkIntervals = List.empty[GATKIntervals]
|
||||
var gatkIntervals = Seq.empty[GATKIntervals]
|
||||
|
||||
def getGATKIntervals(reference: File, intervals: List[String]) = {
|
||||
def getGATKIntervals(reference: File, intervals: Seq[String]) = {
|
||||
gatkIntervals.find(gi => gi.reference == reference && gi.intervals == intervals) match {
|
||||
case Some(gi) => gi
|
||||
case None =>
|
||||
|
|
|
|||
|
|
@ -1,41 +0,0 @@
|
|||
package org.broadinstitute.sting.queue.extensions.gatk
|
||||
|
||||
import java.io.File
|
||||
import org.broadinstitute.sting.utils.io.FileExtension
|
||||
import java.lang.String
|
||||
|
||||
/**
|
||||
* Used to provide -B rodBind arguments to the GATK.
|
||||
*/
|
||||
class RodBind(var trackName: String, var trackType: String, path: String, val tag: String) extends File(path) with FileExtension {
|
||||
def this(trackName: String, trackType: String, path: String) =
|
||||
this(trackName, trackType, path, null)
|
||||
def this(trackName: String, trackType: String, file: File, tag: String) =
|
||||
this(trackName, trackType, file.getPath, tag)
|
||||
def this(trackName: String, trackType: String, file: File) =
|
||||
this(trackName, trackType, file.getPath, null)
|
||||
require(trackName != null, "RodBind trackName cannot be null")
|
||||
require(trackType != null, "RodBind trackType cannot be null")
|
||||
def withPath(newPath: String) = new RodBind(trackName, trackType, newPath, tag)
|
||||
}
|
||||
|
||||
/**
|
||||
* Used to provide -B rodBind arguments to the GATK.
|
||||
*/
|
||||
object RodBind {
|
||||
def apply(trackName: String, trackType: String, path: String, tag: String) = new RodBind(trackName, trackType, path, tag)
|
||||
def apply(trackName: String, trackType: String, path: String) = new RodBind(trackName, trackType, path, null)
|
||||
def apply(trackName: String, trackType: String, file: File, tag: String) = new RodBind(trackName, trackType, file, tag)
|
||||
def apply(trackName: String, trackType: String, file: File) = new RodBind(trackName, trackType, file, null)
|
||||
|
||||
def formatCommandLineParameter( cmdLineParam: String, value: Any ) = {
|
||||
value match {
|
||||
case rodBind: RodBind if (rodBind.tag != null) =>
|
||||
"%s:%s,%s,%s".format(cmdLineParam, rodBind.trackName, rodBind.trackType, rodBind.tag)
|
||||
case rodBind: RodBind =>
|
||||
"%s:%s,%s".format(cmdLineParam, rodBind.trackName, rodBind.trackType)
|
||||
case x =>
|
||||
""
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,3 +1,27 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.extensions.picard
|
||||
|
||||
import org.broadinstitute.sting.commandline._
|
||||
|
|
@ -15,7 +39,7 @@ class AddOrReplaceReadGroups extends org.broadinstitute.sting.queue.function.Jav
|
|||
javaMainClass = "net.sf.picard.sam.AddOrReplaceReadGroups"
|
||||
|
||||
@Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true)
|
||||
var input: List[File] = Nil
|
||||
var input: Seq[File] = Nil
|
||||
|
||||
@Output(doc="The output BAM file with the modified/added read groups", shortName = "output", fullName = "output_bam_file", required = true)
|
||||
var output: File = _
|
||||
|
|
|
|||
|
|
@ -1,3 +1,27 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.extensions.picard
|
||||
|
||||
import org.broadinstitute.sting.commandline._
|
||||
|
|
@ -15,7 +39,7 @@ class MarkDuplicates extends org.broadinstitute.sting.queue.function.JavaCommand
|
|||
javaMainClass = "net.sf.picard.sam.MarkDuplicates"
|
||||
|
||||
@Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true)
|
||||
var input: List[File] = Nil
|
||||
var input: Seq[File] = Nil
|
||||
|
||||
@Output(doc="The output file to write marked records to", shortName = "output", fullName = "output_bam_file", required = true)
|
||||
var output: File = _
|
||||
|
|
|
|||
|
|
@ -1,9 +1,32 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.extensions.picard
|
||||
|
||||
import org.broadinstitute.sting.commandline._
|
||||
|
||||
import java.io.File
|
||||
import org.broadinstitute.sting.queue.QScript._
|
||||
|
||||
/*
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -16,7 +39,7 @@ class MergeSamFiles extends org.broadinstitute.sting.queue.function.JavaCommandL
|
|||
javaMainClass = "net.sf.picard.sam.MergeSamFiles"
|
||||
|
||||
@Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true)
|
||||
var input: List[File] = Nil
|
||||
var input: Seq[File] = Nil
|
||||
|
||||
@Output(doc="The output merged BAM file", shortName = "output", fullName = "output_bam_file", required = true)
|
||||
var output: File = _
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -41,20 +41,22 @@ trait PicardBamFunction extends JavaCommandLineFunction {
|
|||
var sortOrder = SortOrder.coordinate
|
||||
var compressionLevel: Option[Int] = None
|
||||
var createIndex: Option[Boolean] = None
|
||||
var createMD5: Option[Boolean] = None
|
||||
var maxRecordsInRam: Option[Int] = None
|
||||
var assumeSorted: Option[Boolean] = None
|
||||
|
||||
protected def inputBams: List[File]
|
||||
protected def inputBams: Seq[File]
|
||||
protected def outputBam: File
|
||||
|
||||
abstract override def commandLine = super.commandLine +
|
||||
repeat("INPUT=", inputBams, spaceSeparated=false) +
|
||||
required("TMP_DIR=" + jobTempDir) +
|
||||
optional("OUTPUT=", outputBam, spaceSeparated=false) +
|
||||
optional("COMPRESSION_LEVEL=", compressionLevel, spaceSeparated=false) +
|
||||
optional("VALIDATION_STRINGENCY=", validationStringency, spaceSeparated=false) +
|
||||
optional("SO=", sortOrder, spaceSeparated=false) +
|
||||
optional("MAX_RECORDS_IN_RAM=", maxRecordsInRam, spaceSeparated=false) +
|
||||
optional("ASSUME_SORTED=", assumeSorted, spaceSeparated=false) +
|
||||
optional("CREATE_INDEX=", createIndex, spaceSeparated=false)
|
||||
repeat("INPUT=", inputBams, spaceSeparated=false) +
|
||||
required("TMP_DIR=" + jobTempDir) +
|
||||
optional("OUTPUT=", outputBam, spaceSeparated=false) +
|
||||
optional("COMPRESSION_LEVEL=", compressionLevel, spaceSeparated=false) +
|
||||
optional("VALIDATION_STRINGENCY=", validationStringency, spaceSeparated=false) +
|
||||
optional("SO=", sortOrder, spaceSeparated=false) +
|
||||
optional("MAX_RECORDS_IN_RAM=", maxRecordsInRam, spaceSeparated=false) +
|
||||
optional("ASSUME_SORTED=", assumeSorted, spaceSeparated=false) +
|
||||
optional("CREATE_INDEX=", createIndex, spaceSeparated=false) +
|
||||
optional("CREATE_MD5_FILE=", createMD5, spaceSeparated=false)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,27 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.extensions.picard
|
||||
|
||||
import org.broadinstitute.sting.commandline._
|
||||
|
|
@ -14,7 +38,7 @@ class ReorderSam extends org.broadinstitute.sting.queue.function.JavaCommandLine
|
|||
javaMainClass = "net.sf.picard.sam.ReorderSam"
|
||||
|
||||
@Input(doc="Input file (bam or sam) to extract reads from.", shortName = "input", fullName = "input_bam_files", required = true)
|
||||
var input: List[File] = Nil
|
||||
var input: Seq[File] = Nil
|
||||
|
||||
@Output(doc="Output file (bam or sam) to write extracted reads to.", shortName = "output", fullName = "output_bam_file", required = true)
|
||||
var output: File = _
|
||||
|
|
|
|||
|
|
@ -1,3 +1,27 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.extensions.picard
|
||||
|
||||
import org.broadinstitute.sting.commandline._
|
||||
|
|
@ -15,7 +39,7 @@ class RevertSam extends org.broadinstitute.sting.queue.function.JavaCommandLineF
|
|||
javaMainClass = "net.sf.picard.sam.RevertSam"
|
||||
|
||||
@Input(shortName = "input", fullName = "input_bam_files", required = true, doc = "The input SAM or BAM files to revert.")
|
||||
var input: List[File] = Nil
|
||||
var input: Seq[File] = Nil
|
||||
|
||||
@Output(shortName = "output", fullName = "output_bam_file", required = true, doc = "The reverted BAM or SAM output file.")
|
||||
var output: File = _
|
||||
|
|
@ -33,7 +57,7 @@ class RevertSam extends org.broadinstitute.sting.queue.function.JavaCommandLineF
|
|||
var removeAlignmentInformation: Boolean = true
|
||||
|
||||
@Argument(shortName = "atc", fullName = "attributes_to_clear", required = false, doc = "When removing alignment information, the set of optional tags to remove.")
|
||||
var attributesToClear: List[String] = Nil
|
||||
var attributesToClear: Seq[String] = Nil
|
||||
|
||||
@Argument(shortName = "sa", fullName = "sample_alias", required = false, doc = "The sample alias to use in the reverted output file. This will override the existing sample alias in the file and is used only if all the read groups in the input file have the same sample alias.")
|
||||
var sampleAlias: String = null
|
||||
|
|
|
|||
|
|
@ -1,3 +1,27 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.extensions.picard
|
||||
|
||||
import org.broadinstitute.sting.commandline._
|
||||
|
|
@ -15,7 +39,7 @@ class SamToFastq extends org.broadinstitute.sting.queue.function.JavaCommandLine
|
|||
javaMainClass = "net.sf.picard.sam.SamToFastq"
|
||||
|
||||
@Input(shortName = "input", fullName = "input_bam_files", required = true, doc = "Input SAM/BAM file to extract reads from.")
|
||||
var input: List[File] = Nil
|
||||
var input: Seq[File] = Nil
|
||||
|
||||
@Output(shortName = "fastq", fullName = "output_fastq_file", required = true, doc = "Output fastq file (single-end fastq or, if paired, first end of the pair fastq).")
|
||||
var fastq: File = _
|
||||
|
|
|
|||
|
|
@ -1,9 +1,32 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.extensions.picard
|
||||
|
||||
import org.broadinstitute.sting.commandline._
|
||||
|
||||
import java.io.File
|
||||
import org.broadinstitute.sting.queue.QScript._
|
||||
|
||||
/*
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -16,7 +39,7 @@ class SortSam extends org.broadinstitute.sting.queue.function.JavaCommandLineFun
|
|||
javaMainClass = "net.sf.picard.sam.SortSam"
|
||||
|
||||
@Input(doc="The input SAM or BAM files to sort.", shortName = "input", fullName = "input_bam_files", required = true)
|
||||
var input: List[File] = Nil
|
||||
var input: Seq[File] = Nil
|
||||
|
||||
@Output(doc="The sorted BAM or SAM output file.", shortName = "output", fullName = "output_bam_file", required = true)
|
||||
var output: File = _
|
||||
|
|
|
|||
|
|
@ -1,3 +1,27 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.extensions.picard
|
||||
|
||||
import org.broadinstitute.sting.commandline._
|
||||
|
|
@ -17,7 +41,7 @@ class ValidateSamFile extends org.broadinstitute.sting.queue.function.JavaComman
|
|||
javaMainClass = "net.sf.picard.sam.ValidateSamFile"
|
||||
|
||||
@Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true)
|
||||
var input: List[File] = Nil
|
||||
var input: Seq[File] = Nil
|
||||
|
||||
@Output(doc="Send output to a file instead of stdout", shortName = "output", fullName = "output_file", required = false)
|
||||
var output: File = _
|
||||
|
|
@ -26,7 +50,7 @@ class ValidateSamFile extends org.broadinstitute.sting.queue.function.JavaComman
|
|||
var MODE: Mode = Mode.VERBOSE
|
||||
|
||||
@Argument(doc="List of validation error types to ignore.", shortName = "ignore", fullName = "ignore_error_types", required = false)
|
||||
var IGNORE: List[String] = Nil
|
||||
var IGNORE: Seq[String] = Nil
|
||||
|
||||
@Argument(doc = "The maximum number of lines output in verbose mode.", shortName = "max", fullName = "max_output", required = false)
|
||||
var MAX_OUTPUT: Int = 100
|
||||
|
|
|
|||
|
|
@ -52,6 +52,4 @@ class SamtoolsIndexFunction extends SamtoolsCommandLineFunction {
|
|||
required("index") +
|
||||
required(bamFile) +
|
||||
required(bamFileIndex)
|
||||
|
||||
override def dotString = "Index: %s".format(bamFile.getName)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -34,7 +34,7 @@ class SamtoolsMergeFunction extends SamtoolsCommandLineFunction {
|
|||
analysisName = "samtools merge"
|
||||
|
||||
@Input(doc="BAM file input")
|
||||
var inputBams: List[File] = Nil
|
||||
var inputBams: Seq[File] = Nil
|
||||
|
||||
@Output(doc="BAM file output")
|
||||
var outputBam: File = _
|
||||
|
|
@ -43,10 +43,10 @@ class SamtoolsMergeFunction extends SamtoolsCommandLineFunction {
|
|||
var region: String = _
|
||||
|
||||
@Input(doc="BAM file input indexes")
|
||||
var inputBamIndexes: List[File] = Nil
|
||||
var inputBamIndexes: Seq[File] = Nil
|
||||
|
||||
override def freezeFieldValues = {
|
||||
super.freezeFieldValues
|
||||
override def freezeFieldValues() {
|
||||
super.freezeFieldValues()
|
||||
inputBamIndexes ++= inputBams
|
||||
.filter(orig => orig != null && orig.getName.endsWith(".bam"))
|
||||
.flatMap(orig => Array(
|
||||
|
|
|
|||
|
|
@ -1,3 +1,27 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.function
|
||||
|
||||
import org.broadinstitute.sting.queue.util._
|
||||
|
|
@ -27,13 +51,13 @@ trait CommandLineFunction extends QFunction with Logging {
|
|||
var jobQueue: String = _
|
||||
|
||||
/** Native arguments to pass to the job runner */
|
||||
var jobNativeArgs: List[String] = Nil
|
||||
var jobNativeArgs: Seq[String] = Nil
|
||||
|
||||
/** Native arguments to pass to the job runner */
|
||||
var jobResourceRequests: List[String] = Nil
|
||||
var jobResourceRequests: Seq[String] = Nil
|
||||
|
||||
/** Environment names to pass to the job runner */
|
||||
var jobEnvironmentNames: List[String] = Nil
|
||||
var jobEnvironmentNames: Seq[String] = Nil
|
||||
|
||||
override def copySettingsTo(function: QFunction) {
|
||||
super.copySettingsTo(function)
|
||||
|
|
@ -270,7 +294,7 @@ trait CommandLineFunction extends QFunction with Logging {
|
|||
}
|
||||
|
||||
// Trim leading and trailing whitespace off our three tokens, and unwrap Some(x) to x for the param
|
||||
val trimmedValues : List[String] = List((if ( prefix != null ) prefix.trim else ""),
|
||||
val trimmedValues : Seq[String] = Seq((if ( prefix != null ) prefix.trim else ""),
|
||||
(param match {
|
||||
case Some(x) => paramFormat.format(x).trim
|
||||
case x => paramFormat.format(x).trim
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -24,10 +24,24 @@
|
|||
|
||||
package org.broadinstitute.sting.queue.function
|
||||
|
||||
import java.io.PrintStream
|
||||
|
||||
|
||||
/**
|
||||
* Runs a function in process.
|
||||
*/
|
||||
trait InProcessFunction extends QFunction {
|
||||
analysisName = this.getClass.getSimpleName
|
||||
|
||||
def run()
|
||||
def description = this.getClass.getSimpleName + " " + this.commandOutputs.mkString(" ")
|
||||
|
||||
/**
|
||||
* During run() this stream will write to the stdout.
|
||||
*/
|
||||
var jobOutputStream: PrintStream = null
|
||||
|
||||
/**
|
||||
* Write errors to this stream run().
|
||||
*/
|
||||
var jobErrorStream: PrintStream = null
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -42,7 +42,7 @@ trait JavaCommandLineFunction extends CommandLineFunction {
|
|||
* Class path for the main class.
|
||||
* Defaults to the current classpath.
|
||||
*/
|
||||
var javaClasspath: List[String] = Nil
|
||||
var javaClasspath: Seq[String] = Nil
|
||||
|
||||
/**
|
||||
* Memory limit for the java executable, or if None will use the default memoryLimit.
|
||||
|
|
@ -82,5 +82,5 @@ trait JavaCommandLineFunction extends CommandLineFunction {
|
|||
|
||||
object JavaCommandLineFunction {
|
||||
val currentClasspath = System.getProperty("java.class.path")
|
||||
.split(File.pathSeparatorChar).map(path => IOUtils.absolute(new File(path)).getPath).toList
|
||||
.split(File.pathSeparatorChar).map(path => IOUtils.absolute(new File(path)).getPath).toSeq
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,27 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.function
|
||||
|
||||
import org.broadinstitute.sting.commandline.{Input, Output}
|
||||
|
|
@ -9,10 +33,12 @@ import org.apache.commons.io.IOUtils
|
|||
* Custom formats can override addFile.
|
||||
*/
|
||||
class ListWriterFunction extends InProcessFunction {
|
||||
@Input(doc="input files") var inputFiles: List[File] = Nil
|
||||
analysisName = "WriteList"
|
||||
|
||||
@Input(doc="input files") var inputFiles: Seq[File] = Nil
|
||||
@Output(doc="output file") var listFile: File = _
|
||||
|
||||
def run {
|
||||
def run() {
|
||||
val writer = new PrintWriter(listFile)
|
||||
try {
|
||||
for (inputFile <- inputFiles)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -29,7 +29,7 @@ import java.lang.annotation.Annotation
|
|||
import org.broadinstitute.sting.commandline._
|
||||
import org.broadinstitute.sting.queue.{QException, QSettings}
|
||||
import collection.JavaConversions._
|
||||
import org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction
|
||||
import java.lang.IllegalStateException
|
||||
import org.broadinstitute.sting.queue.util._
|
||||
import org.broadinstitute.sting.utils.io.IOUtils
|
||||
|
||||
|
|
@ -39,13 +39,18 @@ import org.broadinstitute.sting.utils.io.IOUtils
|
|||
* Inputs are matched to other outputs by using .equals()
|
||||
*/
|
||||
trait QFunction extends Logging with QJobReport {
|
||||
/** A short description of this step in the graph */
|
||||
/**
|
||||
* A short description of what this class of function does.
|
||||
* By default does not include the output specific to this function.
|
||||
* See shortDescription for a description of what this instance of the function outputs.
|
||||
*/
|
||||
var analysisName: String = "<function>"
|
||||
|
||||
/** Prefix for automatic job name creation */
|
||||
var jobNamePrefix: String = _
|
||||
|
||||
/** The name name of the job */
|
||||
/**
|
||||
* The name name of the job, must be file system safe and unique to the graph.
|
||||
* Defaults to "runName-<order_function_added>".
|
||||
* Use shortDescription for an alternative that is display friendly.
|
||||
*/
|
||||
var jobName: String = _
|
||||
|
||||
/** Default settings */
|
||||
|
|
@ -58,7 +63,7 @@ trait QFunction extends Logging with QJobReport {
|
|||
var jobTempDir: File = null
|
||||
|
||||
/** Order the function was added to the graph. */
|
||||
var addOrder: List[Int] = Nil
|
||||
var addOrder: Seq[Int] = Nil
|
||||
|
||||
/** Job priority */
|
||||
var jobPriority: Option[Int] = None
|
||||
|
|
@ -78,12 +83,6 @@ trait QFunction extends Logging with QJobReport {
|
|||
*/
|
||||
var isIntermediate = false
|
||||
|
||||
/**
|
||||
* If true and isIntermediate is true, the files listed
|
||||
* via outputs will deleted after the command completes.
|
||||
*/
|
||||
var deleteIntermediateOutputs = true
|
||||
|
||||
// -------------------------------------------------------
|
||||
//
|
||||
// job run information
|
||||
|
|
@ -95,8 +94,6 @@ trait QFunction extends Logging with QJobReport {
|
|||
* @param function QFunction to copy values to.
|
||||
*/
|
||||
override def copySettingsTo(function: QFunction) {
|
||||
function.analysisName = this.analysisName
|
||||
function.jobName = this.jobName
|
||||
function.qSettings = this.qSettings
|
||||
function.commandDirectory = this.commandDirectory
|
||||
function.jobTempDir = this.jobTempDir
|
||||
|
|
@ -105,79 +102,80 @@ trait QFunction extends Logging with QJobReport {
|
|||
function.jobRestartable = this.jobRestartable
|
||||
function.updateJobRun = this.updateJobRun
|
||||
function.isIntermediate = this.isIntermediate
|
||||
function.deleteIntermediateOutputs = this.deleteIntermediateOutputs
|
||||
function.reportGroup = this.reportGroup
|
||||
function.reportFeatures = this.reportFeatures
|
||||
}
|
||||
|
||||
/** File to redirect any output. Defaults to <jobName>.out */
|
||||
@Output(doc="File to redirect any output", required=false)
|
||||
@Gather(classOf[SimpleTextGatherFunction])
|
||||
var jobOutputFile: File = _
|
||||
|
||||
/** File to redirect any errors. Defaults to <jobName>.out */
|
||||
@Output(doc="File to redirect any errors", required=false)
|
||||
@Gather(classOf[SimpleTextGatherFunction])
|
||||
var jobErrorFile: File = _
|
||||
|
||||
/**
|
||||
* Description of this command line function.
|
||||
*/
|
||||
def description: String
|
||||
def description: String = "%s: %s > %s".format(analysisName, inputs, outputs)
|
||||
|
||||
/**
|
||||
* The function description in .dot files
|
||||
* A short description of the function.
|
||||
*/
|
||||
def dotString = jobName + " => " + description
|
||||
def shortDescription = {
|
||||
firstOutput match {
|
||||
case file: File => analysisName + ": " + file.getName
|
||||
case _ => analysisName
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the function is done, false if it's
|
||||
* not done and None if the done status is unknown.
|
||||
* Returns true if the function is done.
|
||||
*/
|
||||
def isDone = {
|
||||
def isDone: Boolean = {
|
||||
val files = doneOutputs
|
||||
if (files.size == 0)
|
||||
None
|
||||
else
|
||||
Some(files.forall(_.exists))
|
||||
throw new IllegalStateException("Function should have at least one output: " + analysisName)
|
||||
files.forall(_.exists)
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the function has failed, false if it
|
||||
* has not failed and None if the fail status is unknown.
|
||||
* Returns true if the function has failed.
|
||||
*/
|
||||
def isFail = {
|
||||
def isFail: Boolean = {
|
||||
val files = failOutputs
|
||||
if (files.size == 0)
|
||||
None
|
||||
else
|
||||
Some(files.exists(_.exists))
|
||||
throw new IllegalStateException("Function should have at least one output: " + analysisName)
|
||||
files.exists(_.exists)
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the file is a log file for this function.
|
||||
* Returns files to track for hidden done/fail files.
|
||||
* @return Seq[String] files.
|
||||
*/
|
||||
protected def isLogFile(file: File) =
|
||||
file == jobOutputFile || file == jobErrorFile
|
||||
protected def statusPaths = {
|
||||
var paths = outputs
|
||||
paths :+= jobOutputFile
|
||||
if (jobErrorFile != null)
|
||||
paths :+= jobErrorFile
|
||||
paths
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns prefixes for hidden done/fail files.
|
||||
* @return prefixes.
|
||||
*/
|
||||
private def statusPrefixes = statusPaths.map(file => file.getParentFile + "/." + file.getName)
|
||||
|
||||
/**
|
||||
* Returns the output files for this function.
|
||||
* @return Set[File] outputs for this function.
|
||||
* @return outputs for this function.
|
||||
*/
|
||||
private def statusPaths =
|
||||
commandOutputs.map(file => file.getParentFile + "/." + file.getName)
|
||||
|
||||
/**
|
||||
* Returns the output files for this function.
|
||||
* @return Set[File] outputs for this function.
|
||||
*/
|
||||
def doneOutputs = statusPaths.map(path => new File(path + ".done"))
|
||||
def doneOutputs: Seq[File] = statusPrefixes.map(path => new File(path + ".done"))
|
||||
|
||||
/**
|
||||
* Returns the output files for this function.
|
||||
* @return Set[File] outputs for this function.
|
||||
* @return outputs for this function.
|
||||
*/
|
||||
def failOutputs = statusPaths.map(path => new File(path + ".fail"))
|
||||
def failOutputs: Seq[File] = statusPrefixes.map(path => new File(path + ".fail"))
|
||||
|
||||
/** The complete list of fields on this CommandLineFunction. */
|
||||
def functionFields = QFunction.classFields(this.functionFieldClass).functionFields
|
||||
|
|
@ -195,21 +193,21 @@ trait QFunction extends Logging with QJobReport {
|
|||
|
||||
/**
|
||||
* Returns the input files for this function.
|
||||
* @return Set[File] inputs for this function.
|
||||
* @return inputs for this function.
|
||||
*/
|
||||
def inputs = getFieldFiles(inputFields)
|
||||
def inputs: Seq[File] = getFieldFiles(inputFields)
|
||||
|
||||
/**
|
||||
* Returns the output files for this function.
|
||||
* @return Set[File] outputs for this function.
|
||||
* @return outputs for this function.
|
||||
*/
|
||||
def outputs = getFieldFiles(outputFields)
|
||||
def outputs: Seq[File] = getFieldFiles(outputFields)
|
||||
|
||||
/**
|
||||
* Returns the non-log outputs for this function.
|
||||
* @return the non-log outputs for this function.
|
||||
* Returns the first output file.
|
||||
* @return first output for this function.
|
||||
*/
|
||||
def commandOutputs = outputs.filterNot(file => isLogFile(file))
|
||||
def firstOutput: File = outputs.headOption.getOrElse(null)
|
||||
|
||||
/**
|
||||
* Returns the set of directories where files may be written.
|
||||
|
|
@ -218,6 +216,9 @@ trait QFunction extends Logging with QJobReport {
|
|||
var dirs = Set.empty[File]
|
||||
dirs += commandDirectory
|
||||
dirs += jobTempDir
|
||||
dirs += jobOutputFile.getParentFile
|
||||
if (jobErrorFile != null)
|
||||
dirs += jobErrorFile.getParentFile
|
||||
dirs ++= outputs.map(_.getParentFile)
|
||||
dirs
|
||||
}
|
||||
|
|
@ -235,7 +236,7 @@ trait QFunction extends Logging with QJobReport {
|
|||
* Deletes the output files and all the status files for this function.
|
||||
*/
|
||||
def deleteOutputs() {
|
||||
commandOutputs.foreach(file => IOUtils.tryDelete(file))
|
||||
outputs.foreach(file => IOUtils.tryDelete(file))
|
||||
doneOutputs.foreach(file => IOUtils.tryDelete(file))
|
||||
failOutputs.foreach(file => IOUtils.tryDelete(file))
|
||||
}
|
||||
|
|
@ -252,63 +253,63 @@ trait QFunction extends Logging with QJobReport {
|
|||
|
||||
/**
|
||||
* Returns fields that do not have values which are required.
|
||||
* @return List[String] names of fields missing values.
|
||||
* @return Seq[String] names of fields missing values.
|
||||
*/
|
||||
def missingFields: List[String] = {
|
||||
def missingFields: Seq[String] = {
|
||||
val missingInputs = missingFields(inputFields, classOf[Input])
|
||||
val missingOutputs = missingFields(outputFields, classOf[Output])
|
||||
val missingArguments = missingFields(argumentFields, classOf[Argument])
|
||||
(missingInputs | missingOutputs | missingArguments).toList.sorted
|
||||
(missingInputs ++ missingOutputs ++ missingArguments).distinct.sorted
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns fields that do not have values which are required.
|
||||
* @param sources Fields to check.
|
||||
* @param annotation Annotation.
|
||||
* @return Set[String] names of fields missing values.
|
||||
* @return names of fields missing values.
|
||||
*/
|
||||
private def missingFields(sources: List[ArgumentSource], annotation: Class[_ <: Annotation]): Set[String] = {
|
||||
var missing = Set.empty[String]
|
||||
private def missingFields(sources: Seq[ArgumentSource], annotation: Class[_ <: Annotation]): Seq[String] = {
|
||||
var missing: Seq[String] = Nil
|
||||
for (source <- sources) {
|
||||
if (isRequired(source, annotation))
|
||||
if (!hasFieldValue(source))
|
||||
if (!exclusiveOf(source, annotation).exists(otherSource => hasFieldValue(otherSource)))
|
||||
missing += "@%s: %s - %s".format(annotation.getSimpleName, source.field.getName, doc(source, annotation))
|
||||
missing :+= "@%s: %s - %s".format(annotation.getSimpleName, source.field.getName, doc(source, annotation))
|
||||
}
|
||||
missing
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the files from the fields. The fields must be a File, a FileExtension, or a List or Set of either.
|
||||
* Gets the files from the fields. The fields must be a File, a FileExtension, or a Seq or Set of either.
|
||||
* @param fields Fields to get files.
|
||||
* @return Set[File] for the fields.
|
||||
* @return for the fields.
|
||||
*/
|
||||
private def getFieldFiles(fields: List[ArgumentSource]): Set[File] = {
|
||||
var files = Set.empty[File]
|
||||
private def getFieldFiles(fields: Seq[ArgumentSource]): Seq[File] = {
|
||||
var files: Seq[File] = Nil
|
||||
for (field <- fields)
|
||||
files ++= getFieldFiles(field)
|
||||
files
|
||||
files.distinct
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the files from the field. The field must be a File, a FileExtension, or a List or Set of either.
|
||||
* @param fields Field to get files.
|
||||
* @return Set[File] for the field.
|
||||
* Gets the files from the field. The field must be a File, a FileExtension, or a Seq or Set of either.
|
||||
* @param field Field to get files.
|
||||
* @return for the field.
|
||||
*/
|
||||
def getFieldFiles(field: ArgumentSource): Set[File] = {
|
||||
var files = Set.empty[File]
|
||||
def getFieldFiles(field: ArgumentSource): Seq[File] = {
|
||||
var files: Seq[File] = Nil
|
||||
CollectionUtils.foreach(getFieldValue(field), (fieldValue) => {
|
||||
val file = fieldValueToFile(field, fieldValue)
|
||||
if (file != null)
|
||||
files += file
|
||||
files :+= file
|
||||
})
|
||||
files
|
||||
files.distinct
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the file from the field. The field must be a File or a FileExtension and not a List or Set.
|
||||
* Gets the file from the field. The field must be a File or a FileExtension and not a Seq or Set.
|
||||
* @param field Field to get the file.
|
||||
* @return File for the field.
|
||||
* @return for the field.
|
||||
*/
|
||||
def getFieldFile(field: ArgumentSource): File =
|
||||
fieldValueToFile(field, getFieldValue(field))
|
||||
|
|
@ -340,14 +341,15 @@ trait QFunction extends Logging with QJobReport {
|
|||
* Sets all field values.
|
||||
*/
|
||||
def freezeFieldValues() {
|
||||
if (jobNamePrefix == null)
|
||||
jobNamePrefix = qSettings.jobNamePrefix
|
||||
|
||||
if (jobName == null)
|
||||
jobName = QFunction.nextJobName(jobNamePrefix)
|
||||
jobName = qSettings.runName + "-" + this.addOrder.mkString("-")
|
||||
|
||||
if (jobOutputFile == null)
|
||||
jobOutputFile = new File(jobName + ".out")
|
||||
if (jobOutputFile == null) {
|
||||
jobOutputFile = firstOutput match {
|
||||
case file: File => new File(file.getParentFile, file.getName + ".out")
|
||||
case _ => new File(jobName + ".out")
|
||||
}
|
||||
}
|
||||
|
||||
if (jobTempDir == null)
|
||||
jobTempDir = qSettings.tempDirectory
|
||||
|
|
@ -378,6 +380,10 @@ trait QFunction extends Logging with QJobReport {
|
|||
fieldValue = CollectionUtils.updated(fieldValue, canon).asInstanceOf[AnyRef]
|
||||
this.setFieldValue(field, fieldValue)
|
||||
}
|
||||
|
||||
this.jobOutputFile = canon(this.jobOutputFile).asInstanceOf[File]
|
||||
if (this.jobErrorFile != null)
|
||||
this.jobErrorFile = canon(this.jobErrorFile).asInstanceOf[File]
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -443,7 +449,7 @@ trait QFunction extends Logging with QJobReport {
|
|||
|
||||
/**
|
||||
* Returns false if the value is null or an empty collection.
|
||||
* @param value Value to test for null, or a collection to test if it is empty.
|
||||
* @param param Value to test for null, or a collection to test if it is empty.
|
||||
* @return false if the value is null, or false if the collection is empty, otherwise true.
|
||||
*/
|
||||
protected def hasValue(param: Any) = CollectionUtils.isNotNullOrNotEmpty(param)
|
||||
|
|
@ -472,28 +478,15 @@ trait QFunction extends Logging with QJobReport {
|
|||
}
|
||||
|
||||
object QFunction {
|
||||
/** Job index counter for this run of Queue. */
|
||||
private var jobIndex = 0
|
||||
|
||||
var parsingEngine: ParsingEngine = _
|
||||
|
||||
/**
|
||||
* Returns the next job name using the prefix.
|
||||
* @param prefix Prefix of the job name.
|
||||
* @return the next job name.
|
||||
*/
|
||||
private def nextJobName(prefix: String) = {
|
||||
jobIndex += 1
|
||||
prefix + "-" + jobIndex
|
||||
}
|
||||
|
||||
/**
|
||||
* The list of fields defined on a class
|
||||
* @param clazz The class to lookup fields.
|
||||
*/
|
||||
private class ClassFields(clazz: Class[_]) {
|
||||
/** The complete list of fields on this CommandLineFunction. */
|
||||
val functionFields: List[ArgumentSource] = parsingEngine.extractArgumentSources(clazz).toList
|
||||
val functionFields: Seq[ArgumentSource] = parsingEngine.extractArgumentSources(clazz).toSeq
|
||||
/** The @Input fields on this CommandLineFunction. */
|
||||
val inputFields = functionFields.filter(source => ReflectionUtils.hasAnnotation(source.field, classOf[Input]))
|
||||
/** The @Output fields on this CommandLineFunction. */
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -25,7 +25,6 @@
|
|||
package org.broadinstitute.sting.queue.function.scattergather
|
||||
|
||||
import org.broadinstitute.sting.commandline.ArgumentSource
|
||||
import java.io.File
|
||||
import org.broadinstitute.sting.queue.function.{QFunction, CommandLineFunction}
|
||||
|
||||
/**
|
||||
|
|
@ -62,9 +61,8 @@ class CloneFunction extends CommandLineFunction {
|
|||
}
|
||||
}
|
||||
|
||||
override def commandOutputs = withScatterPart(() => originalFunction.commandOutputs)
|
||||
override def dotString = withScatterPart(() => originalFunction.dotString)
|
||||
override def description = withScatterPart(() => originalFunction.description)
|
||||
override def shortDescription = withScatterPart(() => originalFunction.shortDescription)
|
||||
override protected def functionFieldClass = originalFunction.getClass
|
||||
|
||||
def commandLine = withScatterPart(() => originalFunction.commandLine)
|
||||
|
|
@ -75,30 +73,22 @@ class CloneFunction extends CommandLineFunction {
|
|||
}
|
||||
|
||||
override def getFieldValue(source: ArgumentSource): AnyRef = {
|
||||
source.field.getName match {
|
||||
case "jobOutputFile" => jobOutputFile
|
||||
case "jobErrorFile" => jobErrorFile
|
||||
case _ => overriddenFields.get(source) match {
|
||||
case Some(value) => value.asInstanceOf[AnyRef]
|
||||
case None => {
|
||||
val value = originalFunction.getFieldValue(source)
|
||||
overriddenFields += source -> value
|
||||
value
|
||||
}
|
||||
overriddenFields.get(source) match {
|
||||
case Some(value) => value.asInstanceOf[AnyRef]
|
||||
case None => {
|
||||
val value = originalFunction.getFieldValue(source)
|
||||
overriddenFields += source -> value
|
||||
value
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def setFieldValue(field: String, value: Any): Unit = {
|
||||
def setFieldValue(field: String, value: Any) {
|
||||
val source = QFunction.findField(originalFunction.getClass, field)
|
||||
setFieldValue(source, value)
|
||||
}
|
||||
|
||||
override def setFieldValue(source: ArgumentSource, value: Any): Unit = {
|
||||
source.field.getName match {
|
||||
case "jobOutputFile" => jobOutputFile = value.asInstanceOf[File]
|
||||
case "jobErrorFile" => jobErrorFile = value.asInstanceOf[File]
|
||||
case _ => overriddenFields += source -> value
|
||||
}
|
||||
override def setFieldValue(source: ArgumentSource, value: Any) {
|
||||
overriddenFields += source -> value
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -22,15 +22,34 @@
|
|||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.extensions.gatk
|
||||
package org.broadinstitute.sting.queue.function.scattergather
|
||||
|
||||
import org.broadinstitute.sting.queue.function.scattergather.GatherFunction
|
||||
import org.broadinstitute.sting.queue.function.InProcessFunction
|
||||
import org.broadinstitute.sting.queue.QException
|
||||
import org.broadinstitute.sting.commandline.Input
|
||||
import org.apache.commons.io.FileUtils
|
||||
import java.io.File
|
||||
import collection.JavaConversions._
|
||||
|
||||
/**
|
||||
* A no-op for index files that were automatically generated during the gather step.
|
||||
* TODO: Allow graph to know that this isn't needed, and/or that one gather job can actually gather N-outputs, and/or look more into generic source->sinks.
|
||||
* Concatenate log files to the jobOutputFile.
|
||||
*/
|
||||
class AutoIndexGatherFunction extends InProcessFunction with GatherFunction {
|
||||
def run() {}
|
||||
class ConcatenateLogsFunction extends InProcessFunction {
|
||||
analysisName = "Concat"
|
||||
|
||||
@Input(doc="Parts to gather back into the original output")
|
||||
var logs: Seq[File] = Nil
|
||||
|
||||
override def description = "%s: %s > %s".format(analysisName, logs, jobOutputFile)
|
||||
override def shortDescription = analysisName + ": " + jobOutputFile.getName
|
||||
|
||||
def run() {
|
||||
val missing = org.broadinstitute.sting.utils.io.IOUtils.waitFor(logs, 120)
|
||||
if (!missing.isEmpty)
|
||||
throw new QException("Unable to find log: " + missing.mkString(", "))
|
||||
logs.foreach(log => {
|
||||
FileUtils.copyFile(log, this.jobOutputStream)
|
||||
this.jobOutputStream.println()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
@ -1,3 +1,27 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.function.scattergather
|
||||
|
||||
import java.io.File
|
||||
|
|
@ -11,22 +35,31 @@ import collection.JavaConversions._
|
|||
* Base class for Gather command line functions.
|
||||
*/
|
||||
trait GatherFunction extends QFunction {
|
||||
analysisName = "Gather"
|
||||
|
||||
var originalFunction: ScatterGatherableFunction = _
|
||||
|
||||
@Output(doc="The original output of the scattered function")
|
||||
var originalOutput: File = _
|
||||
|
||||
@Input(doc="Parts to gather back into the original output")
|
||||
var gatherParts: List[File] = Nil
|
||||
|
||||
@Input(doc="Other log files that will be gathered before this output", required=false)
|
||||
var originalLogFiles: List[File] = Nil
|
||||
var gatherParts: Seq[File] = Nil
|
||||
|
||||
/**
|
||||
* Called to initialize the gather function values after all other values have been setup for this function.
|
||||
*/
|
||||
def init() {}
|
||||
|
||||
/**
|
||||
* Don't include this @Gather's log file when tracking .done.
|
||||
* The done files for original log file being produced will do.
|
||||
*
|
||||
* The logs from the scatter/gather jobs are concatenated together into the original log.
|
||||
* Without removing the logs a .done file would be created for the logs. If a SGF is switched
|
||||
* from scatterCount=1 to >1 then this Gather would be "missing" its logs and re-run.
|
||||
*/
|
||||
override protected def statusPaths = outputs
|
||||
|
||||
/**
|
||||
* Waits for gather parts to propagate over NFS or throws an exception.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -32,11 +32,12 @@ import collection.JavaConversions._
|
|||
* Runs a Gatherer in process.
|
||||
*/
|
||||
class GathererFunction(gathererClass: Class[_ <: Gatherer]) extends InProcessFunction with GatherFunction {
|
||||
analysisName = this.gathererClass.getSimpleName
|
||||
|
||||
def run() {
|
||||
val gatherer = gathererClass.newInstance
|
||||
if (gatherer.waitForInputs)
|
||||
waitForGatherParts
|
||||
waitForGatherParts()
|
||||
gatherer.gather(this.gatherParts, this.originalOutput)
|
||||
}
|
||||
override def description = this.gathererClass.getSimpleName + " " + this.commandOutputs.mkString(" ")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,14 +32,17 @@ import org.broadinstitute.sting.queue.function.QFunction
|
|||
* Base class for Scatter functions.
|
||||
*/
|
||||
trait ScatterFunction extends QFunction {
|
||||
analysisName = "Scatter"
|
||||
|
||||
var originalFunction: ScatterGatherableFunction = _
|
||||
|
||||
@Input(doc="Original inputs to scatter")
|
||||
var originalInputs: Set[File] = _
|
||||
|
||||
override def shortDescription = analysisName + ": %s ...".format(firstOutput.getName)
|
||||
|
||||
/**
|
||||
* Called to initialize scatter function values after all other values have been setup for this function.
|
||||
* @param originalFunction The original function to with inputs bind to this scatter function.
|
||||
*/
|
||||
def init() {}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -30,6 +30,7 @@ import org.broadinstitute.sting.commandline.{Gatherer, Gather, ArgumentSource}
|
|||
import org.broadinstitute.sting.queue.function.{QFunction, CommandLineFunction}
|
||||
import org.broadinstitute.sting.queue.QException
|
||||
import org.broadinstitute.sting.utils.io.IOUtils
|
||||
import collection.immutable.ListMap
|
||||
|
||||
/**
|
||||
* A function that can be run faster by splitting it up into pieces and then joining together the results.
|
||||
|
|
@ -47,28 +48,28 @@ trait ScatterGatherableFunction extends CommandLineFunction {
|
|||
|
||||
/**
|
||||
* Function that returns the class to use for gathering a directory. If it returns null then @Gather annotation will be used.
|
||||
* @param gatherField Field that is to be gathered.
|
||||
* PartialFunction param gatherField Field that is to be gathered.
|
||||
* @return The class of the GatherFunction to be used or null.
|
||||
*/
|
||||
var gatherClass: PartialFunction[ArgumentSource, Class[_ <: GatherFunction]] = _
|
||||
|
||||
/**
|
||||
* Allows external modification of the ScatterFunction that will create the scatter pieces in the temporary directories.
|
||||
* @param scatterFunction The function that will create the scatter pieces in the temporary directories.
|
||||
* PartialFunction param scatterFunction The function that will create the scatter pieces in the temporary directories.
|
||||
*/
|
||||
var setupScatterFunction: PartialFunction[ScatterFunction, Unit] = _
|
||||
|
||||
/**
|
||||
* Allows external modification of the GatherFunction that will collect the gather pieces in the temporary directories.
|
||||
* @param gatherFunction The function that will merge the gather pieces from the temporary directories.
|
||||
* @param gatherField The output field being gathered.
|
||||
* PartialFunction param gatherFunction The function that will merge the gather pieces from the temporary directories.
|
||||
* PartialFunction param gatherField The output field being gathered.
|
||||
*/
|
||||
var setupGatherFunction: PartialFunction[(GatherFunction, ArgumentSource), Unit] = _
|
||||
|
||||
/**
|
||||
* Allows external modification of the cloned function.
|
||||
* @param cloneFunction A clone wrapper of this ScatterGatherableFunction
|
||||
* @param index The one based index (from 1..scatterCount inclusive) of the scatter piece.
|
||||
* PartialFunction param cloneFunction A clone wrapper of this ScatterGatherableFunction
|
||||
* PartialFunction param index The one based index (from 1..scatterCount inclusive) of the scatter piece.
|
||||
*/
|
||||
var setupCloneFunction: PartialFunction[(CloneFunction, Int), Unit] = _
|
||||
|
||||
|
|
@ -108,8 +109,9 @@ trait ScatterGatherableFunction extends CommandLineFunction {
|
|||
scatterFunction.originalFunction = this
|
||||
scatterFunction.originalInputs = inputFiles
|
||||
scatterFunction.commandDirectory = this.scatterGatherTempDir("scatter")
|
||||
scatterFunction.isIntermediate = true
|
||||
scatterFunction.jobOutputFile = new File("scatter.out")
|
||||
scatterFunction.addOrder = this.addOrder :+ 1
|
||||
scatterFunction.isIntermediate = true
|
||||
|
||||
initScatterFunction(scatterFunction)
|
||||
scatterFunction.absoluteCommandDirectory()
|
||||
|
|
@ -121,69 +123,61 @@ trait ScatterGatherableFunction extends CommandLineFunction {
|
|||
* Returns a list of scatter / gather and clones of this function
|
||||
* that can be run in parallel to produce the same output as this
|
||||
* command line function.
|
||||
* @return List[QFunction] to run instead of this function.
|
||||
* @return Seq[QFunction] to run instead of this function.
|
||||
*/
|
||||
def generateFunctions() = {
|
||||
var functions = List.empty[QFunction]
|
||||
|
||||
// Only gather up fields that will have a value
|
||||
val outputFieldsWithValues = this.outputFields.filter(hasFieldValue(_))
|
||||
|
||||
// Create the scatter function based on @Scatter
|
||||
functions :+= scatterFunction
|
||||
|
||||
// Ask the scatter function how many clones to create.
|
||||
val numClones = scatterFunction.scatterCount
|
||||
|
||||
// List of the log files that are output by this function.
|
||||
var logFiles = List(this.jobOutputFile)
|
||||
if (this.jobErrorFile != null)
|
||||
logFiles :+= this.jobErrorFile
|
||||
|
||||
// Create the gather functions for each output field
|
||||
var gatherFunctions = Map.empty[ArgumentSource, GatherFunction]
|
||||
var gatherOutputs = Map.empty[ArgumentSource, File]
|
||||
var gatherFunctions = ListMap.empty[ArgumentSource, GatherFunction]
|
||||
var gatherOutputs = ListMap.empty[ArgumentSource, File]
|
||||
var gatherAddOrder = numClones + 2
|
||||
|
||||
// Only track fields that will have a value
|
||||
val outputFieldsWithValues = this.outputFields.filter(hasFieldValue(_))
|
||||
|
||||
for (gatherField <- outputFieldsWithValues) {
|
||||
val gatherOutput = getFieldFile(gatherField)
|
||||
gatherOutputs += gatherField -> getFieldFile(gatherField)
|
||||
}
|
||||
|
||||
// Only gather fields that are @Gather(enabled=true)
|
||||
val outputFieldsWithGathers = outputFieldsWithValues.filter(hasGatherFunction(_))
|
||||
|
||||
for (gatherField <- outputFieldsWithGathers) {
|
||||
val gatherOutput = gatherOutputs(gatherField)
|
||||
|
||||
val gatherFunction = this.newGatherFunction(gatherField)
|
||||
this.copySettingsTo(gatherFunction)
|
||||
gatherFunction.originalFunction = this
|
||||
gatherFunction.originalOutput = gatherOutput
|
||||
gatherFunction.commandDirectory = this.scatterGatherTempDir("gather-" + gatherField.field.getName)
|
||||
// If this is a gather for a log file, make the gather intermediate just in case the log file name changes
|
||||
// Otherwise have the regular output function wait on the log files to gather
|
||||
if (isLogFile(gatherOutput)) {
|
||||
gatherFunction.isIntermediate = true
|
||||
// Only delete the log files if the original function is an intermediate
|
||||
// and the intermediate files are supposed to be deleted
|
||||
gatherFunction.deleteIntermediateOutputs = this.isIntermediate && this.deleteIntermediateOutputs
|
||||
} else {
|
||||
gatherFunction.originalLogFiles = logFiles
|
||||
}
|
||||
gatherFunction.jobOutputFile = new File("gather-" + gatherOutput.getName + ".out")
|
||||
gatherFunction.addOrder = this.addOrder :+ gatherAddOrder
|
||||
|
||||
initGatherFunction(gatherFunction, gatherField)
|
||||
gatherFunction.absoluteCommandDirectory()
|
||||
gatherFunction.init()
|
||||
|
||||
functions :+= gatherFunction
|
||||
gatherFunctions += gatherField -> gatherFunction
|
||||
gatherOutputs += gatherField -> gatherOutput
|
||||
|
||||
gatherAddOrder += 1
|
||||
}
|
||||
|
||||
// Create the clone functions for running the parallel jobs
|
||||
var cloneFunctions = List.empty[CloneFunction]
|
||||
var cloneFunctions = Seq.empty[CloneFunction]
|
||||
val dirFormat = "temp_%%0%dd_of_%d".format(numClones.toString.length(), numClones)
|
||||
for (i <- 1 to numClones) {
|
||||
val cloneFunction = this.newCloneFunction()
|
||||
|
||||
this.copySettingsTo(cloneFunction)
|
||||
cloneFunction.originalFunction = this
|
||||
cloneFunction.analysisName = this.analysisName
|
||||
cloneFunction.cloneIndex = i
|
||||
cloneFunction.commandDirectory = this.scatterGatherTempDir("temp-"+i)
|
||||
cloneFunction.commandDirectory = this.scatterGatherTempDir(dirFormat.format(i))
|
||||
cloneFunction.jobOutputFile = new File(this.jobOutputFile.getName)
|
||||
if (this.jobErrorFile != null)
|
||||
cloneFunction.jobErrorFile = new File(this.jobErrorFile.getName)
|
||||
cloneFunction.addOrder = this.addOrder :+ (i+1)
|
||||
cloneFunction.isIntermediate = true
|
||||
|
||||
|
|
@ -200,17 +194,39 @@ trait ScatterGatherableFunction extends CommandLineFunction {
|
|||
// If the command directory is relative, insert the run directory ahead of it.
|
||||
cloneFunction.absoluteCommandDirectory()
|
||||
|
||||
// Get absolute paths to the files and bind the sg functions to the clone function via the absolute paths.
|
||||
// Allow the scatter function to set the specific input for this clone
|
||||
scatterFunction.bindCloneInputs(cloneFunction, i)
|
||||
|
||||
// Set each of the clone outputs to be absolute paths.
|
||||
for (gatherField <- outputFieldsWithValues) {
|
||||
val gatherPart = IOUtils.absolute(cloneFunction.commandDirectory, cloneFunction.getFieldFile(gatherField))
|
||||
cloneFunction.setFieldValue(gatherField, gatherPart)
|
||||
gatherFunctions(gatherField).gatherParts :+= gatherPart
|
||||
}
|
||||
|
||||
// For the outputs that are being gathered add this clone's output to be gathered.
|
||||
for (gatherField <- outputFieldsWithGathers) {
|
||||
gatherFunctions(gatherField).gatherParts :+= cloneFunction.getFieldFile(gatherField)
|
||||
}
|
||||
|
||||
cloneFunctions :+= cloneFunction
|
||||
}
|
||||
functions ++= cloneFunctions
|
||||
|
||||
// Track the functions starting with the scatter function.
|
||||
var functions: Seq[QFunction] = Seq(scatterFunction) ++ cloneFunctions ++ gatherFunctions.values
|
||||
|
||||
// Make all log file paths absolute.
|
||||
for (function <- functions) {
|
||||
function.jobOutputFile = IOUtils.absolute(function.commandDirectory, function.jobOutputFile)
|
||||
if (function.jobErrorFile != null)
|
||||
function.jobErrorFile = IOUtils.absolute(function.commandDirectory, function.jobErrorFile)
|
||||
}
|
||||
|
||||
val jobOutputGather = gatherLogFile(_.jobOutputFile, functions, gatherAddOrder)
|
||||
if (this.jobErrorFile != null) {
|
||||
val jobErrorGather = gatherLogFile(_.jobErrorFile, functions, gatherAddOrder + 1)
|
||||
functions :+= jobErrorGather
|
||||
}
|
||||
functions :+= jobOutputGather
|
||||
|
||||
// Return all the various created functions.
|
||||
functions
|
||||
|
|
@ -237,6 +253,25 @@ trait ScatterGatherableFunction extends CommandLineFunction {
|
|||
this.setupScatterFunction(scatterFunction)
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the field should be gathered.
|
||||
* @param gatherField Field that defined @Gather.
|
||||
* @return true if the field should be gathered.
|
||||
*/
|
||||
protected def hasGatherFunction(gatherField: ArgumentSource) : Boolean = {
|
||||
// Check if there is a function that will return the gather class for this field.
|
||||
if (this.gatherClass != null && this.gatherClass.isDefinedAt(gatherField))
|
||||
true
|
||||
|
||||
// Check for an annotation defining the gather class.
|
||||
else if (ReflectionUtils.hasAnnotation(gatherField.field, classOf[Gather]))
|
||||
ReflectionUtils.getAnnotation(gatherField.field, classOf[Gather]).enabled
|
||||
|
||||
// Nothing else to disable this field.
|
||||
else
|
||||
true
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new GatherFunction for the gatherField.
|
||||
* @param gatherField Field that defined @Gather.
|
||||
|
|
@ -255,16 +290,18 @@ trait ScatterGatherableFunction extends CommandLineFunction {
|
|||
if (ReflectionUtils.hasAnnotation(gatherField.field, classOf[Gather])) {
|
||||
gatherClass = ReflectionUtils.getAnnotation(gatherField.field, classOf[Gather]).value
|
||||
} else {
|
||||
throw new QException("Missing @Gather annotation: " + gatherField.field)
|
||||
throw new QException("Missing @Gather annotation on %s".format(gatherField.field))
|
||||
}
|
||||
}
|
||||
|
||||
if (classOf[GatherFunction].isAssignableFrom(gatherClass)) {
|
||||
if (gatherClass == classOf[GatherFunction]) {
|
||||
throw new QException("@Gather did not specify class type on %s".format(gatherField.field))
|
||||
} else if (classOf[GatherFunction].isAssignableFrom(gatherClass)) {
|
||||
gatherClass.newInstance.asInstanceOf[GatherFunction]
|
||||
} else if (classOf[Gatherer].isAssignableFrom(gatherClass)) {
|
||||
new GathererFunction(gatherClass.asSubclass(classOf[Gatherer]))
|
||||
} else {
|
||||
throw new QException("Unsupported @Gather class type: " + gatherClass)
|
||||
throw new QException("Unsupported @Gather class type on %s: %s".format(gatherField.field, gatherClass))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -298,10 +335,27 @@ trait ScatterGatherableFunction extends CommandLineFunction {
|
|||
this.setupCloneFunction(cloneFunction, index)
|
||||
}
|
||||
|
||||
/**
|
||||
* Gathers up the logs files from other functions.
|
||||
* @param logFile Takes the QFunction and return the log file.
|
||||
* @param functions The functions for which the logs will be concatenated.
|
||||
* @param addOrder The order this function should be added in the graph.
|
||||
*/
|
||||
private def gatherLogFile(logFile: (QFunction) => File, functions: Seq[QFunction], addOrder: Int) = {
|
||||
val gatherLogFunction = new ConcatenateLogsFunction
|
||||
this.copySettingsTo(gatherLogFunction)
|
||||
gatherLogFunction.logs = functions.map(logFile).filter(_ != null)
|
||||
gatherLogFunction.jobOutputFile = logFile(this)
|
||||
gatherLogFunction.commandDirectory = this.scatterGatherTempDir()
|
||||
gatherLogFunction.addOrder = this.addOrder :+ addOrder
|
||||
gatherLogFunction.isIntermediate = false
|
||||
gatherLogFunction
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a temporary directory under this scatter gather directory.
|
||||
* @param Sub directory under the scatter gather directory.
|
||||
* @param subDir directory under the scatter gather directory.
|
||||
* @return temporary directory under this scatter gather directory.
|
||||
*/
|
||||
private def scatterGatherTempDir(subDir: String) = IOUtils.absolute(this.scatterGatherDirectory, this.jobName + "-sg/" + subDir)
|
||||
private def scatterGatherTempDir(subDir: String = "") = IOUtils.absolute(this.scatterGatherDirectory, this.jobName + "-sg/" + subDir)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -34,12 +34,12 @@ import scala.collection.JavaConversions._
|
|||
*/
|
||||
class EmailMessage extends Logging {
|
||||
var from: String = _
|
||||
var to: List[String] = Nil
|
||||
var cc: List[String] = Nil
|
||||
var bcc: List[String] = Nil
|
||||
var to: Seq[String] = Nil
|
||||
var cc: Seq[String] = Nil
|
||||
var bcc: Seq[String] = Nil
|
||||
var subject: String = _
|
||||
var body: String = _
|
||||
var attachments: List[File] = Nil
|
||||
var attachments: Seq[File] = Nil
|
||||
|
||||
/**
|
||||
* Sends the email and throws an exception if the email can't be sent.
|
||||
|
|
@ -111,10 +111,10 @@ class EmailMessage extends Logging {
|
|||
/**
|
||||
* Converts the email addresses to a collection of InternetAddress which can bypass client side validation,
|
||||
* specifically that the domain is specified.
|
||||
* @param addresses List of email addresses.
|
||||
* @param addresses Seq of email addresses.
|
||||
* @return java.util.List of InternetAddress'es
|
||||
*/
|
||||
private def convert(addresses: List[String]): java.util.List[InternetAddress] = {
|
||||
private def convert(addresses: Seq[String]): java.util.List[InternetAddress] = {
|
||||
addresses.map(address => new InternetAddress(address, false))
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -28,9 +28,10 @@ import org.broadinstitute.sting.queue.function.QFunction
|
|||
import org.broadinstitute.sting.gatk.report.{GATKReportTable, GATKReport}
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException
|
||||
import org.broadinstitute.sting.queue.engine.JobRunInfo
|
||||
import java.io.{FileOutputStream, PrintStream, File}
|
||||
import java.io.{PrintStream, File}
|
||||
import org.broadinstitute.sting.utils.R.{RScriptLibrary, RScriptExecutor}
|
||||
import org.broadinstitute.sting.utils.io.Resource
|
||||
import org.apache.commons.io.{IOUtils, FileUtils}
|
||||
|
||||
/**
|
||||
* A mixin to add Job info to the class
|
||||
|
|
@ -67,7 +68,7 @@ trait QJobReport extends Logging {
|
|||
def getReportGroup = self.analysisName.replaceAll(GATKReportTable.INVALID_TABLE_NAME_REGEX, "_")
|
||||
def getReportFeatures = reportFeatures
|
||||
|
||||
def getReportFeatureNames: List[String] = getReportFeatures.keys.toList
|
||||
def getReportFeatureNames: Seq[String] = getReportFeatures.keys.toSeq
|
||||
def getReportFeature(key: String): String = {
|
||||
getReportFeatures.get(key) match {
|
||||
case Some(x) => x
|
||||
|
|
@ -102,9 +103,12 @@ object QJobReport {
|
|||
def printReport(jobsRaw: Map[QFunction, JobRunInfo], dest: File) {
|
||||
val jobs = jobsRaw.filter(_._2.isFilledIn).filter(_._1.includeInReport)
|
||||
jobs foreach {case (qf, info) => qf.setRunInfo(info)}
|
||||
val stream = new PrintStream(new FileOutputStream(dest))
|
||||
printJobLogging(jobs.keys.toList, stream)
|
||||
stream.close()
|
||||
val stream = new PrintStream(FileUtils.openOutputStream(dest))
|
||||
try {
|
||||
printJobLogging(jobs.keys.toSeq, stream)
|
||||
} finally {
|
||||
IOUtils.closeQuietly(stream)
|
||||
}
|
||||
}
|
||||
|
||||
def plotReport(reportFile: File, pdfFile: File) {
|
||||
|
|
@ -129,7 +133,7 @@ object QJobReport {
|
|||
* Prints the JobLogging logs to a GATKReport. First splits up the
|
||||
* logs by group, and for each group generates a GATKReportTable
|
||||
*/
|
||||
private def printJobLogging(logs: List[QFunction], stream: PrintStream) {
|
||||
private def printJobLogging(logs: Seq[QFunction], stream: PrintStream) {
|
||||
// create the report
|
||||
val report: GATKReport = new GATKReport
|
||||
|
||||
|
|
@ -151,11 +155,11 @@ object QJobReport {
|
|||
report.print(stream)
|
||||
}
|
||||
|
||||
private def groupLogs(logs: List[QFunction]): Map[String, List[QFunction]] = {
|
||||
private def groupLogs(logs: Seq[QFunction]): Map[String, Seq[QFunction]] = {
|
||||
logs.groupBy(_.getReportGroup)
|
||||
}
|
||||
|
||||
private def logKeys(logs: List[QFunction]): Set[String] = {
|
||||
private def logKeys(logs: Seq[QFunction]): Set[String] = {
|
||||
// the keys should be the same for each log, but we will check that
|
||||
val keys = Set[String](logs(0).getReportFeatureNames : _*)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,27 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.util
|
||||
|
||||
import java.io.File
|
||||
|
|
@ -12,23 +36,22 @@ import collection.JavaConversions._
|
|||
* User: carneiro
|
||||
* Date: 7/14/11
|
||||
* Time: 4:57 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
|
||||
object QScriptUtils {
|
||||
|
||||
/**
|
||||
* Takes a bam list file and produces a scala list with each file allowing the bam list
|
||||
* Takes a bam list file and produces a scala sequence with each file allowing the bam list
|
||||
* to have empty lines and comment lines (lines starting with #).
|
||||
*/
|
||||
def createListFromFile(in: File):List[File] = {
|
||||
def createSeqFromFile(in: File):Seq[File] = {
|
||||
// If the file provided ends with .bam, .fasta or .fq, it is not a bam list, we treat it as a single file.
|
||||
// and return a list with only this file.
|
||||
if (in.toString.endsWith(".bam") || in.toString.endsWith(".fasta") || in.toString.endsWith(".fq"))
|
||||
return List(in)
|
||||
return Seq(in)
|
||||
|
||||
var list: List[File] = List()
|
||||
for (file <- fromFile(in).getLines)
|
||||
var list: Seq[File] = Seq()
|
||||
for (file <- fromFile(in).getLines())
|
||||
if (!file.startsWith("#") && !file.isEmpty )
|
||||
list :+= new File(file.trim())
|
||||
list.sortWith(_.compareTo(_) < 0)
|
||||
|
|
@ -55,8 +78,4 @@ object QScriptUtils {
|
|||
}
|
||||
false
|
||||
}
|
||||
|
||||
|
||||
def ?[A <: AnyRef](ref: A): Option[A] =
|
||||
if (ref eq null) None else Some(ref)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,27 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.util
|
||||
|
||||
import org.broadinstitute.sting.queue.QException
|
||||
|
|
@ -64,17 +88,17 @@ object ReflectionUtils {
|
|||
/**
|
||||
* Returns all the declared fields on a class in order of sub type to super type.
|
||||
* @param clazz Base class to start looking for fields.
|
||||
* @return List[Field] found on the class and all super classes.
|
||||
* @return Seq[Field] found on the class and all super classes.
|
||||
*/
|
||||
def getAllFields(clazz: Class[_]) = getAllTypes(clazz).map(_.getDeclaredFields).flatMap(_.toList)
|
||||
def getAllFields(clazz: Class[_]) = getAllTypes(clazz).map(_.getDeclaredFields).flatMap(_.toSeq)
|
||||
|
||||
/**
|
||||
* Gets all the types on a class in order of sub type to super type.
|
||||
* @param clazz Base class.
|
||||
* @return List[Class] including the class and all super classes.
|
||||
* @return Seq[Class] including the class and all super classes.
|
||||
*/
|
||||
def getAllTypes(clazz: Class[_]) = {
|
||||
var types = List.empty[Class[_]]
|
||||
var types = Seq.empty[Class[_]]
|
||||
var c = clazz
|
||||
while (c != null) {
|
||||
types :+= c
|
||||
|
|
|
|||
|
|
@ -1,3 +1,27 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.util
|
||||
|
||||
import collection.JavaConversions._
|
||||
|
|
@ -14,32 +38,34 @@ class ScalaCompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
/**
|
||||
* Checks if the class type is a scala collection.
|
||||
* @param classType Class type to check.
|
||||
* @return true if the class is a List, Set, or an Option.
|
||||
* @return true if the class is a Seq, Set, or an Option.
|
||||
*/
|
||||
def supports(classType: Class[_]) = isCompound(classType)
|
||||
|
||||
/**
|
||||
* Checks if the class type is a scala collection.
|
||||
* @param source Argument source to check.
|
||||
* @return true if the source is a List, Set, or an Option.
|
||||
* @return true if the source is a Seq, Set, or an Option.
|
||||
*/
|
||||
override def isMultiValued(source: ArgumentSource) = isCompound(source.field.getType)
|
||||
|
||||
/**
|
||||
* Checks if the class type is a scala collection.
|
||||
* @param classType Class type to check.
|
||||
* @return true if the class is a List, Set, or an Option.
|
||||
* @return true if the class is a Seq, Set, or an Option.
|
||||
*/
|
||||
private def isCompound(classType: Class[_]) = {
|
||||
classOf[List[_]].isAssignableFrom(classType) ||
|
||||
classOf[Seq[_]].isAssignableFrom(classType) ||
|
||||
classOf[List[_]].isAssignableFrom(classType) || // see comment below re: List vs. Seq
|
||||
classOf[Set[_]].isAssignableFrom(classType) ||
|
||||
classOf[Option[_]].isAssignableFrom(classType)
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the argument matches based on the class type of the argument source's field.
|
||||
* @param parsingEngine Parsing engine.
|
||||
* @param source Argument source that contains the field being populated.
|
||||
* @param classType Class type being parsed.
|
||||
* @param typeType Type of the argument source's field.
|
||||
* @param argumentMatches The argument match strings that were found for this argument source.
|
||||
* @return The parsed object.
|
||||
*/
|
||||
|
|
@ -51,7 +77,15 @@ class ScalaCompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
val componentType = ReflectionUtils.getCollectionType(source.field)
|
||||
val componentArgumentParser = parsingEngine.selectBestTypeDescriptor(componentType)
|
||||
|
||||
if (classOf[List[_]].isAssignableFrom(classType)) {
|
||||
if (classOf[Seq[_]].isAssignableFrom(classType)) {
|
||||
var seq = Seq.empty[Any]
|
||||
for (argumentMatch <- argumentMatches)
|
||||
for (value <- argumentMatch)
|
||||
seq :+= componentArgumentParser.parse(parsingEngine, source, componentType, new ArgumentMatches(value))
|
||||
seq
|
||||
} else if (classOf[List[_]].isAssignableFrom(classType)) {
|
||||
// QScripts should be using the interface Seq instead of the class List.
|
||||
// Leaving this here for now for legacy support until the effects of switching have been tested for a while. -ks
|
||||
var list = List.empty[Any]
|
||||
for (argumentMatch <- argumentMatches)
|
||||
for (value <- argumentMatch)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -44,7 +44,7 @@ object StringFileConversions {
|
|||
// and mixins all correct so this doesn't have to be duplicated with concrete implementations?
|
||||
// http://programming-scala.labs.oreilly.com/ch12.html is your friend.
|
||||
|
||||
implicit def stringsAsFiles(x: List[Comparable[_ >: String with File <: Comparable[_ >: String with File <: Serializable] with Serializable] with Serializable]): List[File] = {
|
||||
implicit def stringsAsFiles(x: Seq[Comparable[_ >: String with File <: Comparable[_ >: String with File <: Serializable] with Serializable] with Serializable]): Seq[File] = {
|
||||
x.map(_ match {
|
||||
case string: String => stringAsFile(string)
|
||||
case file: File => file
|
||||
|
|
@ -52,7 +52,23 @@ object StringFileConversions {
|
|||
})
|
||||
}
|
||||
|
||||
implicit def filesAsStrings(x: List[Comparable[_ >: File with String <: Comparable[_ >: File with String <: Serializable] with Serializable] with Serializable]): List[String] = {
|
||||
implicit def filesAsStrings(x: Seq[Comparable[_ >: File with String <: Comparable[_ >: File with String <: Serializable] with Serializable] with Serializable]): Seq[String] = {
|
||||
x.map(_ match {
|
||||
case file: File => fileAsString(file)
|
||||
case string: String => string
|
||||
case null => null
|
||||
})
|
||||
}
|
||||
|
||||
implicit def stringsAsFilesList(x: List[Comparable[_ >: String with File <: Comparable[_ >: String with File <: Serializable] with Serializable] with Serializable]): List[File] = {
|
||||
x.map(_ match {
|
||||
case string: String => stringAsFile(string)
|
||||
case file: File => file
|
||||
case null => null
|
||||
})
|
||||
}
|
||||
|
||||
implicit def filesAsStringsList(x: List[Comparable[_ >: File with String <: Comparable[_ >: File with String <: Serializable] with Serializable] with Serializable]): List[String] = {
|
||||
x.map(_ match {
|
||||
case file: File => fileAsString(file)
|
||||
case string: String => string
|
||||
|
|
@ -91,14 +107,22 @@ trait StringFileConversions {
|
|||
StringFileConversions.fileAsString(x)
|
||||
}
|
||||
|
||||
implicit def stringsAsFiles(x: List[Comparable[_ >: String with File <: Comparable[_ >: String with File <: Serializable] with Serializable] with Serializable]): List[File] = {
|
||||
implicit def stringsAsFiles(x: Seq[Comparable[_ >: String with File <: Comparable[_ >: String with File <: Serializable] with Serializable] with Serializable]): Seq[File] = {
|
||||
StringFileConversions.stringsAsFiles(x)
|
||||
}
|
||||
|
||||
implicit def filesAsStrings(x: List[Comparable[_ >: File with String <: Comparable[_ >: File with String <: Serializable] with Serializable] with Serializable]): List[String] = {
|
||||
implicit def filesAsStrings(x: Seq[Comparable[_ >: File with String <: Comparable[_ >: File with String <: Serializable] with Serializable] with Serializable]): Seq[String] = {
|
||||
StringFileConversions.filesAsStrings(x)
|
||||
}
|
||||
|
||||
implicit def stringsAsFilesList(x: List[Comparable[_ >: String with File <: Comparable[_ >: String with File <: Serializable] with Serializable] with Serializable]): List[File] = {
|
||||
StringFileConversions.stringsAsFilesList(x)
|
||||
}
|
||||
|
||||
implicit def filesAsStringsList(x: List[Comparable[_ >: File with String <: Comparable[_ >: File with String <: Serializable] with Serializable] with Serializable]): List[String] = {
|
||||
StringFileConversions.filesAsStringsList(x)
|
||||
}
|
||||
|
||||
implicit def stringsAsFiles(x: Set[Comparable[_ >: File with String <: Comparable[_ >: File with String <: Comparable[_ >: File with String <: Serializable] with Serializable] with Serializable] with Serializable]): Set[File] = {
|
||||
StringFileConversions.stringsAsFiles(x)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -24,7 +24,6 @@
|
|||
|
||||
package org.broadinstitute.sting.queue.util
|
||||
|
||||
import java.lang.management.ManagementFactory
|
||||
import java.net.InetAddress
|
||||
import java.io.File
|
||||
import io.Source
|
||||
|
|
@ -56,6 +55,4 @@ object SystemUtils extends Logging {
|
|||
else
|
||||
hostName.split('.').takeRight(2).mkString(".")
|
||||
}
|
||||
|
||||
val pidAtHost = ManagementFactory.getRuntimeMXBean.getName.split('.').head
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -40,9 +40,6 @@ class GATKIntervalsUnitTest {
|
|||
createSetFromSequenceDictionary(new ReferenceDataSource(hg18Reference).getReference.getSequenceDictionary).toList
|
||||
|
||||
private final lazy val hg19Reference = new File(BaseTest.hg19Reference)
|
||||
private final lazy val hg19GenomeLocParser = new GenomeLocParser(new CachingIndexedFastaSequenceFile(hg19Reference))
|
||||
private final lazy val hg19ReferenceLocs = GenomeLocSortedSet.
|
||||
createSetFromSequenceDictionary(new ReferenceDataSource(hg19Reference).getReference.getSequenceDictionary).toList
|
||||
|
||||
@Test
|
||||
def testWithIntervals() {
|
||||
|
|
@ -50,16 +47,14 @@ class GATKIntervalsUnitTest {
|
|||
val chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2:2-3")
|
||||
val chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3:3-5")
|
||||
|
||||
val gi = new GATKIntervals(hg18Reference, List("chr1:1-1", "chr2:2-3", "chr3:3-5"))
|
||||
Assert.assertEquals(gi.locs.toList, List(chr1, chr2, chr3))
|
||||
Assert.assertEquals(gi.contigs, List("chr1", "chr2", "chr3"))
|
||||
// Assert.assertEquals(gi.getSplits(2).toList, List(2, 3))
|
||||
// Assert.assertEquals(gi.getSplits(3).toList, List(1, 2, 3))
|
||||
val gi = new GATKIntervals(hg18Reference, Seq("chr1:1-1", "chr2:2-3", "chr3:3-5"))
|
||||
Assert.assertEquals(gi.locs.toSeq, Seq(chr1, chr2, chr3))
|
||||
Assert.assertEquals(gi.contigs, Seq("chr1", "chr2", "chr3"))
|
||||
}
|
||||
|
||||
@Test(timeOut = 30000)
|
||||
@Test(timeOut = 30000L)
|
||||
def testIntervalFile() {
|
||||
var gi = new GATKIntervals(hg19Reference, List(BaseTest.hg19Intervals))
|
||||
var gi = new GATKIntervals(hg19Reference, Seq(BaseTest.hg19Intervals))
|
||||
Assert.assertEquals(gi.locs.size, 189894)
|
||||
// Timeout check is because of bad:
|
||||
// for(Item item: javaConvertedScalaList)
|
||||
|
|
@ -74,14 +69,12 @@ class GATKIntervalsUnitTest {
|
|||
val gi = new GATKIntervals(hg18Reference, Nil)
|
||||
Assert.assertEquals(gi.locs, hg18ReferenceLocs)
|
||||
Assert.assertEquals(gi.contigs.size, hg18ReferenceLocs.size)
|
||||
// Assert.assertEquals(gi.getSplits(2).toList, List(10, 45))
|
||||
// Assert.assertEquals(gi.getSplits(4).toList, List(5, 10, 16, 45))
|
||||
}
|
||||
|
||||
@Test
|
||||
def testContigCounts() {
|
||||
Assert.assertEquals(new GATKIntervals(hg18Reference, Nil).contigs, hg18ReferenceLocs.map(_.getContig))
|
||||
Assert.assertEquals(new GATKIntervals(hg18Reference, List("chr1", "chr2", "chr3")).contigs, List("chr1", "chr2", "chr3"))
|
||||
Assert.assertEquals(new GATKIntervals(hg18Reference, List("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2")).contigs, List("chr1", "chr2", "chr3"))
|
||||
Assert.assertEquals(new GATKIntervals(hg18Reference, Seq("chr1", "chr2", "chr3")).contigs, Seq("chr1", "chr2", "chr3"))
|
||||
Assert.assertEquals(new GATKIntervals(hg18Reference, Seq("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2")).contigs, Seq("chr1", "chr2", "chr3"))
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,27 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.function
|
||||
|
||||
import org.testng.Assert
|
||||
|
|
@ -114,20 +138,20 @@ class CommandLineFunctionUnitTest extends CommandLineFunction {
|
|||
|
||||
@DataProvider( name = "repeatTestData" )
|
||||
def repeatDataProvider = {
|
||||
Array(Array("", List("a", "bc", "d"), "", " ", true, true, " 'a' 'bc' 'd' "),
|
||||
Array("", List("a", "bc", "d"), "", " ", true, false, " a bc d "),
|
||||
Array("", List("a", "bc", "d"), "", "", true, true, " 'a''bc''d' "),
|
||||
Array("", List("a", "bc", "d"), "", "", true, false, " abcd "),
|
||||
Array("-f", List("file1", "file2", "file3"), "", " ", true, true, " '-f' 'file1' '-f' 'file2' '-f' 'file3' "),
|
||||
Array("-f", List("file1", "file2", "file3"), "", " ", true, false, " -f file1 -f file2 -f file3 "),
|
||||
Array("-f", List("file1", "file2", "file3"), "", " ", false, true, " '-ffile1' '-ffile2' '-ffile3' "),
|
||||
Array("-f", List("file1", "file2", "file3"), "", " ", false, false, " -ffile1 -ffile2 -ffile3 "),
|
||||
Array("-f", List("file1", "file2", "file3"), "", "", false, true, " '-ffile1''-ffile2''-ffile3' "),
|
||||
Array("-f", List("file1", "file2", "file3"), "", "", false, false, " -ffile1-ffile2-ffile3 "),
|
||||
Array("-f", List("file1", "file2", "file3"), "suffix", " ", true, true, " '-f' 'file1' 'suffix' '-f' 'file2' 'suffix' '-f' 'file3' 'suffix' "),
|
||||
Array("-f", List("file1", "file2", "file3"), "suffix", " ", true, false, " -f file1 suffix -f file2 suffix -f file3 suffix "),
|
||||
Array("-f", List("file1", "file2", "file3"), "suffix", " ", false, true, " '-ffile1suffix' '-ffile2suffix' '-ffile3suffix' "),
|
||||
Array("-f", List("file1", "file2", "file3"), "suffix", " ", false, false, " -ffile1suffix -ffile2suffix -ffile3suffix "),
|
||||
Array(Array("", Seq("a", "bc", "d"), "", " ", true, true, " 'a' 'bc' 'd' "),
|
||||
Array("", Seq("a", "bc", "d"), "", " ", true, false, " a bc d "),
|
||||
Array("", Seq("a", "bc", "d"), "", "", true, true, " 'a''bc''d' "),
|
||||
Array("", Seq("a", "bc", "d"), "", "", true, false, " abcd "),
|
||||
Array("-f", Seq("file1", "file2", "file3"), "", " ", true, true, " '-f' 'file1' '-f' 'file2' '-f' 'file3' "),
|
||||
Array("-f", Seq("file1", "file2", "file3"), "", " ", true, false, " -f file1 -f file2 -f file3 "),
|
||||
Array("-f", Seq("file1", "file2", "file3"), "", " ", false, true, " '-ffile1' '-ffile2' '-ffile3' "),
|
||||
Array("-f", Seq("file1", "file2", "file3"), "", " ", false, false, " -ffile1 -ffile2 -ffile3 "),
|
||||
Array("-f", Seq("file1", "file2", "file3"), "", "", false, true, " '-ffile1''-ffile2''-ffile3' "),
|
||||
Array("-f", Seq("file1", "file2", "file3"), "", "", false, false, " -ffile1-ffile2-ffile3 "),
|
||||
Array("-f", Seq("file1", "file2", "file3"), "suffix", " ", true, true, " '-f' 'file1' 'suffix' '-f' 'file2' 'suffix' '-f' 'file3' 'suffix' "),
|
||||
Array("-f", Seq("file1", "file2", "file3"), "suffix", " ", true, false, " -f file1 suffix -f file2 suffix -f file3 suffix "),
|
||||
Array("-f", Seq("file1", "file2", "file3"), "suffix", " ", false, true, " '-ffile1suffix' '-ffile2suffix' '-ffile3suffix' "),
|
||||
Array("-f", Seq("file1", "file2", "file3"), "suffix", " ", false, false, " -ffile1suffix -ffile2suffix -ffile3suffix "),
|
||||
Array("-f", null, "", " ", true, true, ""),
|
||||
Array("-f", Nil, "", " ", true, true, "")
|
||||
)
|
||||
|
|
@ -148,11 +172,11 @@ class CommandLineFunctionUnitTest extends CommandLineFunction {
|
|||
|
||||
@DataProvider( name = "repeatWithPrefixFormattingTestData" )
|
||||
def repeatWithPrefixFormattingDataProvider = {
|
||||
Array(Array("-f", List("file1", "file2", "file3"), "", " ", true, true, (prefix: String, value: Any) => "%s:tag%s".format(prefix, value),
|
||||
Array(Array("-f", Seq("file1", "file2", "file3"), "", " ", true, true, (prefix: String, value: Any) => "%s:tag%s".format(prefix, value),
|
||||
" '-f:tagfile1' 'file1' '-f:tagfile2' 'file2' '-f:tagfile3' 'file3' "),
|
||||
Array("-f", List("file1", "file2", "file3"), "", " ", true, false, (prefix: String, value: Any) => "%s:tag%s".format(prefix, value),
|
||||
Array("-f", Seq("file1", "file2", "file3"), "", " ", true, false, (prefix: String, value: Any) => "%s:tag%s".format(prefix, value),
|
||||
" -f:tagfile1 file1 -f:tagfile2 file2 -f:tagfile3 file3 "),
|
||||
Array("", List("file1", "file2", "file3"), "", " ", true, true, (prefix: String, value: Any) => "-%s".format(value),
|
||||
Array("", Seq("file1", "file2", "file3"), "", " ", true, true, (prefix: String, value: Any) => "-%s".format(value),
|
||||
" '-file1' 'file1' '-file2' 'file2' '-file3' 'file3' "),
|
||||
Array("-f", null, "", " ", true, true, (prefix: String, value: Any) => "%s:tag%s".format(prefix, value),
|
||||
""),
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -47,10 +47,10 @@ object PipelineTest extends BaseTest with Logging {
|
|||
|
||||
final val allJobRunners = {
|
||||
val commandLinePluginManager = new CommandLinePluginManager
|
||||
commandLinePluginManager.getPlugins.map(commandLinePluginManager.getName(_)).toList
|
||||
commandLinePluginManager.getPlugins.map(commandLinePluginManager.getName(_)).toSeq
|
||||
}
|
||||
|
||||
final val defaultJobRunners = List("Lsf706", "GridEngine")
|
||||
final val defaultJobRunners = Seq("Lsf706", "GridEngine")
|
||||
|
||||
/**
|
||||
* Returns the top level output path to this test.
|
||||
|
|
|
|||
|
|
@ -1,14 +1,38 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.pipeline
|
||||
|
||||
/**
|
||||
* Data validations to evaluate on a GATKReport.
|
||||
*/
|
||||
class PipelineTestEvalSpec {
|
||||
/** List of eval modules to output. */
|
||||
/** Eval modules to output. */
|
||||
var evalReport: String = _
|
||||
|
||||
/** Validations to assert. */
|
||||
var validations: List[PipelineValidation[_]] = Nil
|
||||
var validations: Seq[PipelineValidation[_]] = Nil
|
||||
}
|
||||
|
||||
/** A VariantEval JEXL and range of values to validate. */
|
||||
|
|
|
|||
|
|
@ -1,3 +1,27 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.pipeline
|
||||
|
||||
class PipelineTestSpec(var name: String = null) {
|
||||
|
|
@ -9,7 +33,7 @@ class PipelineTestSpec(var name: String = null) {
|
|||
var jobQueue: String = _
|
||||
|
||||
/** Job runners to run the test. Default is null which means use the default. */
|
||||
var jobRunners: List[String] = _
|
||||
var jobRunners: Seq[String] = _
|
||||
|
||||
/** Expected MD5 results for each file path. */
|
||||
var fileMD5s = Map.empty[String, String]
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue