diff --git a/build.xml b/build.xml
index 7c81c1f20..dbdafa3d9 100644
--- a/build.xml
+++ b/build.xml
@@ -1,5 +1,5 @@
-
+
+
+
+
+
+
+
+
diff --git a/ivy.xml b/ivy.xml
index 4f41904ba..f5ff15c30 100644
--- a/ivy.xml
+++ b/ivy.xml
@@ -1,3 +1,26 @@
+
@@ -21,7 +44,6 @@
-
@@ -40,7 +62,7 @@
-
+
diff --git a/public/R/scripts/org/broadinstitute/sting/queue/util/queueJobReport.R b/public/R/scripts/org/broadinstitute/sting/queue/util/queueJobReport.R
index d5ee3626f..ae340e688 100644
--- a/public/R/scripts/org/broadinstitute/sting/queue/util/queueJobReport.R
+++ b/public/R/scripts/org/broadinstitute/sting/queue/util/queueJobReport.R
@@ -1,6 +1,7 @@
library(gsalib)
-require("ggplot2")
-require("gplots")
+library(ggplot2)
+library(gplots)
+library(tools)
#
# Standard command line switch. Can we loaded interactively for development
@@ -201,4 +202,7 @@ for ( group in gatkReportData ) {
if ( ! is.na(outputPDF) ) {
dev.off()
-}
+ if (exists("compactPDF")) {
+ compactPDF(outputPDF)
+ }
+}
diff --git a/public/java/src/net/sf/picard/sam/MergingSamRecordIterator.java b/public/java/src/net/sf/picard/sam/MergingSamRecordIterator.java
deleted file mode 100644
index 4b1c7a999..000000000
--- a/public/java/src/net/sf/picard/sam/MergingSamRecordIterator.java
+++ /dev/null
@@ -1,247 +0,0 @@
-/*
- * Copyright (c) 2011, The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-package net.sf.picard.sam;
-
-import net.sf.picard.PicardException;
-
-import java.util.*;
-import java.lang.reflect.Constructor;
-
-import net.sf.samtools.*;
-import net.sf.samtools.util.CloseableIterator;
-
-/**
- * Provides an iterator interface for merging multiple underlying iterators into a single
- * iterable stream. The underlying iterators/files must all have the same sort order unless
- * the requested output format is unsorted, in which case any combination is valid.
- */
-public class MergingSamRecordIterator implements CloseableIterator {
- private final PriorityQueue pq;
- private final SamFileHeaderMerger samHeaderMerger;
- private final Collection readers;
- private final SAMFileHeader.SortOrder sortOrder;
- private final SAMRecordComparator comparator;
-
- private boolean initialized = false;
- private boolean iterationStarted = false;
-
- /**
- * Constructs a new merging iterator with the same set of readers and sort order as
- * provided by the header merger parameter.
- * @param headerMerger The merged header and contents of readers.
- * @param forcePresorted True to ensure that the iterator checks the headers of the readers for appropriate sort order.
- * @deprecated replaced by (SamFileHeaderMerger, Collection, boolean)
- */
- public MergingSamRecordIterator(final SamFileHeaderMerger headerMerger, final boolean forcePresorted) {
- this(headerMerger, headerMerger.getReaders(), forcePresorted);
- }
-
- /**
- * Constructs a new merging iterator with the same set of readers and sort order as
- * provided by the header merger parameter.
- * @param headerMerger The merged header and contents of readers.
- * @param assumeSorted false ensures that the iterator checks the headers of the readers for appropriate sort order.
- */
- public MergingSamRecordIterator(final SamFileHeaderMerger headerMerger, Collection readers, final boolean assumeSorted) {
- this.samHeaderMerger = headerMerger;
- this.sortOrder = headerMerger.getMergedHeader().getSortOrder();
- this.comparator = getComparator();
- this.readers = readers;
-
- this.pq = new PriorityQueue(readers.size());
-
- for (final SAMFileReader reader : readers) {
- if (!assumeSorted && this.sortOrder != SAMFileHeader.SortOrder.unsorted &&
- reader.getFileHeader().getSortOrder() != this.sortOrder){
- throw new PicardException("Files are not compatible with sort order");
- }
- }
- }
-
- /**
- * Add a given SAM file iterator to the merging iterator. Use this to restrict the merged iteration to a given genomic interval,
- * rather than iterating over every read in the backing file or stream.
- * @param reader Reader to add to the merging iterator.
- * @param iterator Iterator traversing over reader contents.
- */
- public void addIterator(final SAMFileReader reader, final CloseableIterator iterator) {
- if(iterationStarted)
- throw new PicardException("Cannot add another iterator; iteration has already begun");
- if(!samHeaderMerger.containsHeader(reader.getFileHeader()))
- throw new PicardException("All iterators to be merged must be accounted for in the SAM header merger");
- final ComparableSamRecordIterator comparableIterator = new ComparableSamRecordIterator(reader,iterator,comparator);
- addIfNotEmpty(comparableIterator);
- initialized = true;
- }
-
- private void startIterationIfRequired() {
- if(initialized)
- return;
- for(SAMFileReader reader: readers)
- addIterator(reader,reader.iterator());
- iterationStarted = true;
- }
-
- /**
- * Close down all open iterators.
- */
- public void close() {
- // Iterators not in the priority queue have already been closed; only close down the iterators that are still in the priority queue.
- for(CloseableIterator iterator: pq)
- iterator.close();
- }
-
- /** Returns true if any of the underlying iterators has more records, otherwise false. */
- public boolean hasNext() {
- startIterationIfRequired();
- return !this.pq.isEmpty();
- }
-
- /** Returns the next record from the top most iterator during merging. */
- public SAMRecord next() {
- startIterationIfRequired();
-
- final ComparableSamRecordIterator iterator = this.pq.poll();
- final SAMRecord record = iterator.next();
- addIfNotEmpty(iterator);
- record.setHeader(this.samHeaderMerger.getMergedHeader());
-
- // Fix the read group if needs be
- if (this.samHeaderMerger.hasReadGroupCollisions()) {
- final String oldGroupId = (String) record.getAttribute(ReservedTagConstants.READ_GROUP_ID);
- if (oldGroupId != null ) {
- final String newGroupId = this.samHeaderMerger.getReadGroupId(iterator.getReader().getFileHeader(),oldGroupId);
- record.setAttribute(ReservedTagConstants.READ_GROUP_ID, newGroupId);
- }
- }
-
- // Fix the program group if needs be
- if (this.samHeaderMerger.hasProgramGroupCollisions()) {
- final String oldGroupId = (String) record.getAttribute(ReservedTagConstants.PROGRAM_GROUP_ID);
- if (oldGroupId != null ) {
- final String newGroupId = this.samHeaderMerger.getProgramGroupId(iterator.getReader().getFileHeader(),oldGroupId);
- record.setAttribute(ReservedTagConstants.PROGRAM_GROUP_ID, newGroupId);
- }
- }
-
- // Fix up the sequence indexes if needs be
- if (this.samHeaderMerger.hasMergedSequenceDictionary()) {
- if (record.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
- record.setReferenceIndex(this.samHeaderMerger.getMergedSequenceIndex(iterator.getReader().getFileHeader(),record.getReferenceIndex()));
- }
-
- if (record.getReadPairedFlag() && record.getMateReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
- record.setMateReferenceIndex(this.samHeaderMerger.getMergedSequenceIndex(iterator.getReader().getFileHeader(),record.getMateReferenceIndex()));
- }
- }
-
- return record;
- }
-
- /**
- * Adds iterator to priority queue. If the iterator has more records it is added
- * otherwise it is closed and not added.
- */
- private void addIfNotEmpty(final ComparableSamRecordIterator iterator) {
- if (iterator.hasNext()) {
- pq.offer(iterator);
- }
- else {
- iterator.close();
- }
- }
-
- /** Unsupported operation. */
- public void remove() {
- throw new UnsupportedOperationException("MergingSAMRecorderIterator.remove()");
- }
-
- /**
- * Get the right comparator for a given sort order (coordinate, alphabetic). In the
- * case of "unsorted" it will return a comparator that gives an arbitrary but reflexive
- * ordering.
- */
- private SAMRecordComparator getComparator() {
- // For unsorted build a fake comparator that compares based on object ID
- if (this.sortOrder == SAMFileHeader.SortOrder.unsorted) {
- return new SAMRecordComparator() {
- public int fileOrderCompare(final SAMRecord lhs, final SAMRecord rhs) {
- return System.identityHashCode(lhs) - System.identityHashCode(rhs);
- }
-
- public int compare(final SAMRecord lhs, final SAMRecord rhs) {
- return fileOrderCompare(lhs, rhs);
- }
- };
- }
- if (samHeaderMerger.hasMergedSequenceDictionary() && sortOrder.equals(SAMFileHeader.SortOrder.coordinate)) {
- return new MergedSequenceDictionaryCoordinateOrderComparator();
- }
-
- // Otherwise try and figure out what kind of comparator to return and build it
- return this.sortOrder.getComparatorInstance();
- }
-
- /** Returns the merged header that the merging iterator is working from. */
- public SAMFileHeader getMergedHeader() {
- return this.samHeaderMerger.getMergedHeader();
- }
-
- /**
- * Ugh. Basically does a regular coordinate compare, but looks up the sequence indices in the merged
- * sequence dictionary. I hate the fact that this extends SAMRecordCoordinateComparator, but it avoids
- * more copy & paste.
- */
- private class MergedSequenceDictionaryCoordinateOrderComparator extends SAMRecordCoordinateComparator {
-
- public int fileOrderCompare(final SAMRecord samRecord1, final SAMRecord samRecord2) {
- final int referenceIndex1 = getReferenceIndex(samRecord1);
- final int referenceIndex2 = getReferenceIndex(samRecord2);
- if (referenceIndex1 != referenceIndex2) {
- if (referenceIndex1 == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
- return 1;
- } else if (referenceIndex2 == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
- return -1;
- } else {
- return referenceIndex1 - referenceIndex2;
- }
- }
- if (referenceIndex1 == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
- // Both are unmapped.
- return 0;
- }
- return samRecord1.getAlignmentStart() - samRecord2.getAlignmentStart();
- }
-
- private int getReferenceIndex(final SAMRecord samRecord) {
- if (samRecord.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
- return samHeaderMerger.getMergedSequenceIndex(samRecord.getHeader(), samRecord.getReferenceIndex());
- }
- if (samRecord.getMateReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
- return samHeaderMerger.getMergedSequenceIndex(samRecord.getHeader(), samRecord.getMateReferenceIndex());
- }
- return SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
- }
- }
-}
diff --git a/public/java/src/net/sf/picard/sam/SamFileHeaderMerger.java b/public/java/src/net/sf/picard/sam/SamFileHeaderMerger.java
deleted file mode 100644
index f78cd81da..000000000
--- a/public/java/src/net/sf/picard/sam/SamFileHeaderMerger.java
+++ /dev/null
@@ -1,744 +0,0 @@
-/*
- * The MIT License
- *
- * Copyright (c) 2009 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-package net.sf.picard.sam;
-
-import java.util.*;
-
-import net.sf.picard.PicardException;
-import net.sf.samtools.AbstractSAMHeaderRecord;
-import net.sf.samtools.SAMFileHeader;
-import net.sf.samtools.SAMFileReader;
-import net.sf.samtools.SAMProgramRecord;
-import net.sf.samtools.SAMReadGroupRecord;
-import net.sf.samtools.SAMSequenceDictionary;
-import net.sf.samtools.SAMSequenceRecord;
-import net.sf.samtools.util.SequenceUtil;
-
-/**
- * Merges SAMFileHeaders that have the same sequences into a single merged header
- * object while providing read group translation for cases where read groups
- * clash across input headers.
- */
-public class SamFileHeaderMerger {
- //Super Header to construct
- private final SAMFileHeader mergedHeader;
- private Collection readers;
- private final Collection headers;
-
- //Translation of old group ids to new group ids
- private final Map> samReadGroupIdTranslation =
- new IdentityHashMap>();
-
- //the read groups from different files use the same group ids
- private boolean hasReadGroupCollisions = false;
-
- //the program records from different files use the same program record ids
- private boolean hasProgramGroupCollisions = false;
-
- //Translation of old program group ids to new program group ids
- private Map> samProgramGroupIdTranslation =
- new IdentityHashMap>();
-
- private boolean hasMergedSequenceDictionary = false;
-
- // Translation of old sequence dictionary ids to new dictionary ids
- // This is an IdentityHashMap because it can be quite expensive to compute the hashCode for
- // large SAMFileHeaders. It is possible that two input files will have identical headers so that
- // the regular HashMap would fold them together, but the value stored in each of the two
- // Map entries will be the same, so it should not hurt anything.
- private final Map> samSeqDictionaryIdTranslationViaHeader =
- new IdentityHashMap>();
-
- //HeaderRecordFactory that creates SAMReadGroupRecord instances.
- private static final HeaderRecordFactory READ_GROUP_RECORD_FACTORY = new HeaderRecordFactory() {
- public SAMReadGroupRecord createRecord(String id, SAMReadGroupRecord srcReadGroupRecord) {
- return new SAMReadGroupRecord(id, srcReadGroupRecord);
- }
- };
-
- //HeaderRecordFactory that creates SAMProgramRecord instances.
- private static final HeaderRecordFactory PROGRAM_RECORD_FACTORY = new HeaderRecordFactory() {
- public SAMProgramRecord createRecord(String id, SAMProgramRecord srcProgramRecord) {
- return new SAMProgramRecord(id, srcProgramRecord);
- }
- };
-
- //comparator used to sort lists of program group and read group records
- private static final Comparator RECORD_ID_COMPARATOR = new Comparator() {
- public int compare(AbstractSAMHeaderRecord o1, AbstractSAMHeaderRecord o2) {
- return o1.getId().compareTo(o2.getId());
- }
- };
-
- /**
- * Create SAMFileHeader with additional information. Required that sequence dictionaries agree.
- *
- * @param readers sam file readers to combine
- * @param sortOrder sort order new header should have
- * @deprecated replaced by SamFileHeaderMerger(Collection, SAMFileHeader.SortOrder, boolean)
- */
- public SamFileHeaderMerger(final Collection readers, final SAMFileHeader.SortOrder sortOrder) {
- this(readers, sortOrder, false);
- }
-
- /**
- * Create SAMFileHeader with additional information.
- *
- * @param readers sam file readers to combine
- * @param sortOrder sort order new header should have
- * @param mergeDictionaries If true, merge sequence dictionaries in new header. If false, require that
- * all input sequence dictionaries be identical.
- * @deprecated replaced by SamFileHeaderMerger(Collection, SAMFileHeader.SortOrder, boolean)
- */
- public SamFileHeaderMerger(final Collection readers, final SAMFileHeader.SortOrder sortOrder, final boolean mergeDictionaries) {
- this(sortOrder, getHeadersFromReaders(readers), mergeDictionaries);
- this.readers = readers;
- }
-
- /**
- * Create SAMFileHeader with additional information.. This is the preferred constructor.
- *
- * @param sortOrder sort order new header should have
- * @param headers sam file headers to combine
- * @param mergeDictionaries If true, merge sequence dictionaries in new header. If false, require that
- * all input sequence dictionaries be identical.
- */
- public SamFileHeaderMerger(final SAMFileHeader.SortOrder sortOrder, final Collection headers, final boolean mergeDictionaries) {
- this.headers = headers;
- this.mergedHeader = new SAMFileHeader();
-
- SAMSequenceDictionary sequenceDictionary;
- try {
- sequenceDictionary = getSequenceDictionary(headers);
- this.hasMergedSequenceDictionary = false;
- }
- catch (SequenceUtil.SequenceListsDifferException pe) {
- if (mergeDictionaries) {
- sequenceDictionary = mergeSequenceDictionaries(headers);
- this.hasMergedSequenceDictionary = true;
- }
- else {
- throw pe;
- }
- }
-
- this.mergedHeader.setSequenceDictionary(sequenceDictionary);
-
- // Set program that creates input alignments
- for (final SAMProgramRecord program : mergeProgramGroups(headers)) {
- this.mergedHeader.addProgramRecord(program);
- }
-
- // Set read groups for merged header
- final List readGroups = mergeReadGroups(headers);
- this.mergedHeader.setReadGroups(readGroups);
- this.mergedHeader.setGroupOrder(SAMFileHeader.GroupOrder.none);
-
- this.mergedHeader.setSortOrder(sortOrder);
-
- for (final SAMFileHeader header : headers) {
- for (final String comment : header.getComments()) {
- this.mergedHeader.addComment(comment);
- }
- }
- }
-
- // Utilility method to make use with old constructor
- private static List getHeadersFromReaders(Collection readers) {
- List headers = new ArrayList(readers.size());
- for (SAMFileReader reader : readers) {
- headers.add(reader.getFileHeader());
- }
- return headers;
- }
-
-
- /**
- * Checks to see if there are clashes where different readers are using the same read
- * group IDs. If yes, then those IDs that collided are remapped.
- *
- * @param headers headers to combine
- * @return new list of read groups constructed from all the readers
- */
- private List mergeReadGroups(final Collection headers) {
- //prepare args for mergeHeaderRecords(..) call
- final HashSet idsThatAreAlreadyTaken = new HashSet();
-
- final List> readGroupsToProcess = new LinkedList>();
- for (final SAMFileHeader header : headers) {
- for (final SAMReadGroupRecord readGroup : header.getReadGroups()) {
- //verify that there are no existing id collisions in this input file
- if(!idsThatAreAlreadyTaken.add(readGroup.getId()))
- throw new PicardException("Input file: " + header + " contains more than one RG with the same id (" + readGroup.getId() + ")");
-
- readGroupsToProcess.add(new HeaderRecordAndFileHeader(readGroup, header));
- }
- idsThatAreAlreadyTaken.clear();
- }
-
- final List result = new LinkedList();
-
- hasReadGroupCollisions = mergeHeaderRecords(readGroupsToProcess, READ_GROUP_RECORD_FACTORY, idsThatAreAlreadyTaken, samReadGroupIdTranslation, result);
-
- //sort the result list by record id
- Collections.sort(result, RECORD_ID_COMPARATOR);
-
- return result;
- }
-
-
- /**
- * Checks to see if there are clashes where different readers are using the same program
- * group IDs. If yes, then those IDs that collided are remapped.
- *
- * @param headers headers to combine
- * @return new list of program groups constructed from all the readers
- */
- private List mergeProgramGroups(final Collection headers) {
-
- final List overallResult = new LinkedList();
-
- //this Set will accumulate all SAMProgramRecord ids that have been encountered so far.
- final HashSet idsThatAreAlreadyTaken = new HashSet();
-
- //need to process all program groups
- List> programGroupsLeftToProcess = new LinkedList>();
- for (final SAMFileHeader header : headers) {
- for (final SAMProgramRecord programGroup : header.getProgramRecords()) {
- //verify that there are no existing id collisions in this input file
- if(!idsThatAreAlreadyTaken.add(programGroup.getId()))
- throw new PicardException("Input file: " + header + " contains more than one PG with the same id (" + programGroup.getId() + ")");
-
- programGroupsLeftToProcess.add(new HeaderRecordAndFileHeader(programGroup, header));
- }
- idsThatAreAlreadyTaken.clear();
- }
-
- //A program group header (lets say ID=2 PN=B PP=1) may have a PP (previous program) attribute which chains it to
- //another program group header (lets say ID=1 PN=A) to indicate that the given file was
- //processed by program A followed by program B. These PP attributes potentially
- //connect headers into one or more tree structures. Merging is done by
- //first merging all headers that don't have PP attributes (eg. tree roots),
- //then updating and merging all headers whose PPs point to the tree-root headers,
- //and so on until all program group headers are processed.
-
- //currentProgramGroups is the list of records to merge next. Start by merging the programGroups that don't have a PP attribute (eg. the tree roots).
- List< HeaderRecordAndFileHeader > currentProgramGroups = new LinkedList>();
- for(final Iterator> programGroupsLeftToProcessIterator = programGroupsLeftToProcess.iterator(); programGroupsLeftToProcessIterator.hasNext(); ) {
- final HeaderRecordAndFileHeader pair = programGroupsLeftToProcessIterator.next();
- if(pair.getHeaderRecord().getAttribute(SAMProgramRecord.PREVIOUS_PROGRAM_GROUP_ID_TAG) == null) {
- programGroupsLeftToProcessIterator.remove();
- currentProgramGroups.add(pair);
- }
- }
-
- //merge currentProgramGroups
- while(!currentProgramGroups.isEmpty())
- {
- final List currentResult = new LinkedList();
-
- hasProgramGroupCollisions |= mergeHeaderRecords(currentProgramGroups, PROGRAM_RECORD_FACTORY, idsThatAreAlreadyTaken, samProgramGroupIdTranslation, currentResult);
-
- //add currentResults to overallResults
- overallResult.addAll(currentResult);
-
- //apply the newly-computed id translations to currentProgramGroups and programGroupsLeftToProcess
- currentProgramGroups = translateIds(currentProgramGroups, samProgramGroupIdTranslation, false);
- programGroupsLeftToProcess = translateIds(programGroupsLeftToProcess, samProgramGroupIdTranslation, true);
-
- //find all records in programGroupsLeftToProcess whose ppId points to a record that was just processed (eg. a record that's in currentProgramGroups),
- //and move them to the list of programGroupsToProcessNext.
- LinkedList> programGroupsToProcessNext = new LinkedList>();
- for(final Iterator> programGroupsLeftToProcessIterator = programGroupsLeftToProcess.iterator(); programGroupsLeftToProcessIterator.hasNext(); ) {
- final HeaderRecordAndFileHeader pairLeftToProcess = programGroupsLeftToProcessIterator.next();
- final Object ppIdOfRecordLeftToProcess = pairLeftToProcess.getHeaderRecord().getAttribute(SAMProgramRecord.PREVIOUS_PROGRAM_GROUP_ID_TAG);
- //find what currentProgramGroups this ppId points to (NOTE: they have to come from the same file)
- for(final HeaderRecordAndFileHeader justProcessedPair : currentProgramGroups) {
- String idJustProcessed = justProcessedPair.getHeaderRecord().getId();
- if(pairLeftToProcess.getFileHeader() == justProcessedPair.getFileHeader() && ppIdOfRecordLeftToProcess.equals(idJustProcessed)) {
- programGroupsLeftToProcessIterator.remove();
- programGroupsToProcessNext.add(pairLeftToProcess);
- break;
- }
- }
- }
-
- currentProgramGroups = programGroupsToProcessNext;
- }
-
- //verify that all records were processed
- if(!programGroupsLeftToProcess.isEmpty()) {
- StringBuffer errorMsg = new StringBuffer(programGroupsLeftToProcess.size() + " program groups weren't processed. Do their PP ids point to existing PGs? \n");
- for( final HeaderRecordAndFileHeader pair : programGroupsLeftToProcess ) {
- SAMProgramRecord record = pair.getHeaderRecord();
- errorMsg.append("@PG ID:"+record.getProgramGroupId()+" PN:"+record.getProgramName()+" PP:"+record.getPreviousProgramGroupId() +"\n");
- }
- throw new PicardException(errorMsg.toString());
- }
-
- //sort the result list by record id
- Collections.sort(overallResult, RECORD_ID_COMPARATOR);
-
- return overallResult;
- }
-
-
- /**
- * Utility method that takes a list of program groups and remaps all their
- * ids (including ppIds if requested) using the given idTranslationTable.
- *
- * NOTE: when remapping, this method creates new SAMProgramRecords and
- * doesn't mutate any records in the programGroups list.
- *
- * @param programGroups The program groups to translate.
- * @param idTranslationTable The translation table.
- * @param translatePpIds Whether ppIds should be translated as well.
- *
- * @return The list of translated records.
- */
- private List> translateIds(
- List> programGroups,
- Map> idTranslationTable,
- boolean translatePpIds) {
-
- //go through programGroups and translate any IDs and PPs based on the idTranslationTable.
- List> result = new LinkedList>();
- for(final HeaderRecordAndFileHeader pair : programGroups ) {
- final SAMProgramRecord record = pair.getHeaderRecord();
- final String id = record.getProgramGroupId();
- final String ppId = (String) record.getAttribute(SAMProgramRecord.PREVIOUS_PROGRAM_GROUP_ID_TAG);
-
- final SAMFileHeader header = pair.getFileHeader();
- final Map translations = idTranslationTable.get(header);
-
- //see if one or both ids need to be translated
- SAMProgramRecord translatedRecord = null;
- if(translations != null)
- {
- String translatedId = translations.get( id );
- String translatedPpId = translatePpIds ? translations.get( ppId ) : null;
-
- boolean needToTranslateId = translatedId != null && !translatedId.equals(id);
- boolean needToTranslatePpId = translatedPpId != null && !translatedPpId.equals(ppId);
-
- if(needToTranslateId && needToTranslatePpId) {
- translatedRecord = new SAMProgramRecord(translatedId, record);
- translatedRecord.setAttribute(SAMProgramRecord.PREVIOUS_PROGRAM_GROUP_ID_TAG, translatedPpId);
- } else if(needToTranslateId) {
- translatedRecord = new SAMProgramRecord(translatedId, record);
- } else if(needToTranslatePpId) {
- translatedRecord = new SAMProgramRecord(id, record);
- translatedRecord.setAttribute(SAMProgramRecord.PREVIOUS_PROGRAM_GROUP_ID_TAG, translatedPpId);
- }
- }
-
- if(translatedRecord != null) {
- result.add(new HeaderRecordAndFileHeader(translatedRecord, header));
- } else {
- result.add(pair); //keep the original record
- }
- }
-
- return result;
- }
-
-
- /**
- * Utility method for merging a List of AbstractSAMHeaderRecords. If it finds
- * records that have identical ids and attributes, it will collapse them
- * into one record. If it finds records that have identical ids but
- * non-identical attributes, this is treated as a collision. When collision happens,
- * the records' ids are remapped, and an old-id to new-id mapping is added to the idTranslationTable.
- *
- * NOTE: Non-collided records also get recorded in the idTranslationTable as
- * old-id to old-id. This way, an idTranslationTable lookup should never return null.
- *
- * @param headerRecords The header records to merge.
- * @param headerRecordFactory Constructs a specific subclass of AbstractSAMHeaderRecord.
- * @param idsThatAreAlreadyTaken If the id of a headerRecord matches an id in this set, it will be treated as a collision, and the headRecord's id will be remapped.
- * @param idTranslationTable When records collide, their ids are remapped, and an old-id to new-id
- * mapping is added to the idTranslationTable. Non-collided records also get recorded in the idTranslationTable as
- * old-id to old-id. This way, an idTranslationTable lookup should never return null.
- *
- * @param result The list of merged header records.
- *
- * @return True if there were collisions.
- */
- private boolean mergeHeaderRecords(final List> headerRecords, HeaderRecordFactory headerRecordFactory,
- final HashSet idsThatAreAlreadyTaken, Map> idTranslationTable, List result) {
-
- //The outer Map bins the header records by their ids. The nested Map further collapses
- //header records which, in addition to having the same id, also have identical attributes.
- //In other words, each key in the nested map represents one or more
- //header records which have both identical ids and identical attributes. The List of
- //SAMFileHeaders keeps track of which readers these header record(s) came from.
- final Map>> idToRecord =
- new HashMap>>();
-
- //Populate the idToRecord and seenIds data structures
- for (final HeaderRecordAndFileHeader pair : headerRecords) {
- final RecordType record = pair.getHeaderRecord();
- final SAMFileHeader header = pair.getFileHeader();
- final String recordId = record.getId();
- Map> recordsWithSameId = idToRecord.get(recordId);
- if(recordsWithSameId == null) {
- recordsWithSameId = new LinkedHashMap>();
- idToRecord.put(recordId, recordsWithSameId);
- }
-
- List fileHeaders = recordsWithSameId.get(record);
- if(fileHeaders == null) {
- fileHeaders = new LinkedList();
- recordsWithSameId.put(record, fileHeaders);
- }
-
- fileHeaders.add(header);
- }
-
- //Resolve any collisions between header records by remapping their ids.
- boolean hasCollisions = false;
- for (final Map.Entry>> entry : idToRecord.entrySet() )
- {
- final String recordId = entry.getKey();
- final Map> recordsWithSameId = entry.getValue();
-
-
- for( Map.Entry> recordWithUniqueAttr : recordsWithSameId.entrySet()) {
- final RecordType record = recordWithUniqueAttr.getKey();
- final List fileHeaders = recordWithUniqueAttr.getValue();
-
- String newId;
- if(!idsThatAreAlreadyTaken.contains(recordId)) {
- //don't remap 1st record. If there are more records
- //with this id, they will be remapped in the 'else'.
- newId = recordId;
- idsThatAreAlreadyTaken.add(recordId);
- } else {
- //there is more than one record with this id.
- hasCollisions = true;
-
- //find a unique newId for this record
- int idx=1;
- while(idsThatAreAlreadyTaken.contains(newId = recordId + "." + Integer.toString(idx++)))
- ;
-
- idsThatAreAlreadyTaken.add( newId );
- }
-
- for(SAMFileHeader fileHeader : fileHeaders) {
- Map readerTranslationTable = idTranslationTable.get(fileHeader);
- if(readerTranslationTable == null) {
- readerTranslationTable = new HashMap();
- idTranslationTable.put(fileHeader, readerTranslationTable);
- }
- readerTranslationTable.put(recordId, newId);
- }
-
- result.add( headerRecordFactory.createRecord(newId, record) );
- }
- }
-
- return hasCollisions;
- }
-
-
- /**
- * Get the sequences off the SAMFileHeader. Throws runtime exception if the sequence
- * are different from one another.
- *
- * @param headers headers to pull sequences from
- * @return sequences from files. Each file should have the same sequence
- */
- private SAMSequenceDictionary getSequenceDictionary(final Collection headers) {
- SAMSequenceDictionary sequences = null;
- for (final SAMFileHeader header : headers) {
-
- if (sequences == null) {
- sequences = header.getSequenceDictionary();
- }
- else {
- final SAMSequenceDictionary currentSequences = header.getSequenceDictionary();
- SequenceUtil.assertSequenceDictionariesEqual(sequences, currentSequences);
- }
- }
-
- return sequences;
- }
-
- /**
- * Get the sequences from the SAMFileHeader, and merge the resulting sequence dictionaries.
- *
- * @param headers headers to pull sequences from
- * @return sequences from files. Each file should have the same sequence
- */
- private SAMSequenceDictionary mergeSequenceDictionaries(final Collection headers) {
- SAMSequenceDictionary sequences = new SAMSequenceDictionary();
- for (final SAMFileHeader header : headers) {
- final SAMSequenceDictionary currentSequences = header.getSequenceDictionary();
- sequences = mergeSequences(sequences, currentSequences);
- }
- // second pass, make a map of the original seqeunce id -> new sequence id
- createSequenceMapping(headers, sequences);
- return sequences;
- }
-
- /**
- * They've asked to merge the sequence headers. What we support right now is finding the sequence name superset.
- *
- * @param mergeIntoDict the result of merging so far. All SAMSequenceRecords in here have been cloned from the originals.
- * @param mergeFromDict A new sequence dictionary to merge into mergeIntoDict.
- * @return A new sequence dictionary that resulting from merging the two inputs.
- */
- private SAMSequenceDictionary mergeSequences(SAMSequenceDictionary mergeIntoDict, SAMSequenceDictionary mergeFromDict) {
-
- // a place to hold the sequences that we haven't found a home for, in the order the appear in mergeFromDict.
- LinkedList holder = new LinkedList();
-
- // Return value will be created from this.
- LinkedList resultingDict = new LinkedList();
- for (final SAMSequenceRecord sequenceRecord : mergeIntoDict.getSequences()) {
- resultingDict.add(sequenceRecord);
- }
-
- // Index into resultingDict of previous SAMSequenceRecord from mergeFromDict that already existed in mergeIntoDict.
- int prevloc = -1;
- // Previous SAMSequenceRecord from mergeFromDict that already existed in mergeIntoDict.
- SAMSequenceRecord previouslyMerged = null;
-
- for (SAMSequenceRecord sequenceRecord : mergeFromDict.getSequences()) {
- // Does it already exist in resultingDict?
- int loc = getIndexOfSequenceName(resultingDict, sequenceRecord.getSequenceName());
- if (loc == -1) {
- // If doesn't already exist in resultingDict, save it an decide where to insert it later.
- holder.add(sequenceRecord.clone());
- } else if (prevloc > loc) {
- // If sequenceRecord already exists in resultingDict, but prior to the previous one
- // from mergeIntoDict that already existed, cannot merge.
- throw new PicardException("Cannot merge sequence dictionaries because sequence " +
- sequenceRecord.getSequenceName() + " and " + previouslyMerged.getSequenceName() +
- " are in different orders in two input sequence dictionaries.");
- } else {
- // Since sequenceRecord already exists in resultingDict, don't need to add it.
- // Add in all the sequences prior to it that have been held in holder.
- resultingDict.addAll(loc, holder);
- // Remember the index of sequenceRecord so can check for merge imcompatibility.
- prevloc = loc + holder.size();
- previouslyMerged = sequenceRecord;
- holder.clear();
- }
- }
- // Append anything left in holder.
- if (holder.size() != 0) {
- resultingDict.addAll(holder);
- }
- return new SAMSequenceDictionary(resultingDict);
- }
-
- /**
- * Find sequence in list.
- * @param list List to search for the sequence name.
- * @param sequenceName Name to search for.
- * @return Index of SAMSequenceRecord with the given name in list, or -1 if not found.
- */
- private static int getIndexOfSequenceName(final List list, final String sequenceName) {
- for (int i = 0; i < list.size(); ++i) {
- if (list.get(i).getSequenceName().equals(sequenceName)) {
- return i;
- }
- }
- return -1;
- }
-
- /**
- * create the sequence mapping. This map is used to convert the unmerged header sequence ID's to the merged
- * list of sequence id's.
- * @param headers the collections of headers.
- * @param masterDictionary the superset dictionary we've created.
- */
- private void createSequenceMapping(final Collection headers, SAMSequenceDictionary masterDictionary) {
- LinkedList resultingDictStr = new LinkedList();
- for (SAMSequenceRecord r : masterDictionary.getSequences()) {
- resultingDictStr.add(r.getSequenceName());
- }
- for (final SAMFileHeader header : headers) {
- Map seqMap = new HashMap();
- SAMSequenceDictionary dict = header.getSequenceDictionary();
- for (SAMSequenceRecord rec : dict.getSequences()) {
- seqMap.put(rec.getSequenceIndex(), resultingDictStr.indexOf(rec.getSequenceName()));
- }
- this.samSeqDictionaryIdTranslationViaHeader.put(header, seqMap);
- }
- }
-
-
-
- /**
- * Returns the read group id that should be used for the input read and RG id.
- *
- * @deprecated replaced by getReadGroupId(SAMFileHeader, String)
- * */
- public String getReadGroupId(final SAMFileReader reader, final String originalReadGroupId) {
- return getReadGroupId(reader.getFileHeader(), originalReadGroupId);
- }
-
- /** Returns the read group id that should be used for the input read and RG id. */
- public String getReadGroupId(final SAMFileHeader header, final String originalReadGroupId) {
- return this.samReadGroupIdTranslation.get(header).get(originalReadGroupId);
- }
-
- /**
- * @param reader one of the input files
- * @param originalProgramGroupId a program group ID from the above input file
- * @return new ID from the merged list of program groups in the output file
- * @deprecated replaced by getProgramGroupId(SAMFileHeader, String)
- */
- public String getProgramGroupId(final SAMFileReader reader, final String originalProgramGroupId) {
- return getProgramGroupId(reader.getFileHeader(), originalProgramGroupId);
- }
-
- /**
- * @param header one of the input headers
- * @param originalProgramGroupId a program group ID from the above input file
- * @return new ID from the merged list of program groups in the output file
- */
- public String getProgramGroupId(final SAMFileHeader header, final String originalProgramGroupId) {
- return this.samProgramGroupIdTranslation.get(header).get(originalProgramGroupId);
- }
-
- /** Returns true if there are read group duplicates within the merged headers. */
- public boolean hasReadGroupCollisions() {
- return this.hasReadGroupCollisions;
- }
-
- /** Returns true if there are program group duplicates within the merged headers. */
- public boolean hasProgramGroupCollisions() {
- return hasProgramGroupCollisions;
- }
-
- /** @return if we've merged the sequence dictionaries, return true */
- public boolean hasMergedSequenceDictionary() {
- return hasMergedSequenceDictionary;
- }
-
- /** Returns the merged header that should be written to any output merged file. */
- public SAMFileHeader getMergedHeader() {
- return this.mergedHeader;
- }
-
- /** Returns the collection of readers that this header merger is working with. May return null.
- * @deprecated replaced by getHeaders()
- */
- public Collection getReaders() {
- return this.readers;
- }
-
- /** Returns the collection of readers that this header merger is working with.
- */
- public Collection getHeaders() {
- return this.headers;
- }
-
- /**
- * Tells whether this header merger contains a given SAM file header. Note that header presence
- * is confirmed / blocked by == equality, rather than actually testing SAMFileHeader.equals(), for
- * reasons of performance.
- * @param header header to check for.
- * @return True if the header exists in this HeaderMerger. False otherwise.
- */
- boolean containsHeader(SAMFileHeader header) {
- for(SAMFileHeader headerMergerHeader: headers) {
- if(headerMergerHeader == header)
- return true;
- }
- return false;
- }
-
- /**
- * returns the new mapping for a specified reader, given it's old sequence index
- * @param reader the reader
- * @param oldReferenceSequenceIndex the old sequence (also called reference) index
- * @return the new index value
- * @deprecated replaced by getMergedSequenceIndex(SAMFileHeader, Integer)
- */
- public Integer getMergedSequenceIndex(SAMFileReader reader, Integer oldReferenceSequenceIndex) {
- return this.getMergedSequenceIndex(reader.getFileHeader(), oldReferenceSequenceIndex);
- }
-
- /**
- * Another mechanism for getting the new sequence index, for situations in which the reader is not available.
- * Note that if the SAMRecord has already had its header replaced with the merged header, this won't work.
- * @param header The original header for the input record in question.
- * @param oldReferenceSequenceIndex The original sequence index.
- * @return the new index value that is compatible with the merged sequence index.
- */
- public Integer getMergedSequenceIndex(final SAMFileHeader header, Integer oldReferenceSequenceIndex) {
- final Map mapping = this.samSeqDictionaryIdTranslationViaHeader.get(header);
- if (mapping == null) {
- throw new PicardException("No sequence dictionary mapping available for header: " + header);
- }
-
- final Integer newIndex = mapping.get(oldReferenceSequenceIndex);
- if (newIndex == null) {
- throw new PicardException("No mapping for reference index " + oldReferenceSequenceIndex + " from header: " + header);
- }
-
- return newIndex;
- }
-
-
- /**
- * Implementations of this interface are used by mergeHeaderRecords(..) to instantiate
- * specific subclasses of AbstractSAMHeaderRecord.
- */
- private static interface HeaderRecordFactory {
-
- /**
- * Constructs a new instance of RecordType.
- * @param id The id of the new record.
- * @param srcRecord Except for the id, the new record will be a copy of this source record.
- */
- public RecordType createRecord(final String id, RecordType srcRecord);
- }
-
- /**
- * Struct that groups together a subclass of AbstractSAMHeaderRecord with the
- * SAMFileHeader that it came from.
- */
- private static class HeaderRecordAndFileHeader {
- private RecordType headerRecord;
- private SAMFileHeader samFileHeader;
-
- public HeaderRecordAndFileHeader(RecordType headerRecord, SAMFileHeader samFileHeader) {
- this.headerRecord = headerRecord;
- this.samFileHeader = samFileHeader;
- }
-
- public RecordType getHeaderRecord() {
- return headerRecord;
- }
- public SAMFileHeader getFileHeader() {
- return samFileHeader;
- }
- }
-}
diff --git a/public/java/src/org/broadinstitute/sting/commandline/Gather.java b/public/java/src/org/broadinstitute/sting/commandline/Gather.java
index 59c3f50cb..d452f708e 100644
--- a/public/java/src/org/broadinstitute/sting/commandline/Gather.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/Gather.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010, The Broad Institute
+ * Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@@ -34,5 +34,6 @@ import java.lang.annotation.*;
@Retention(RetentionPolicy.RUNTIME)
@Target({ElementType.FIELD})
public @interface Gather {
- Class value();
+ Class value() default Gather.class;
+ boolean enabled() default true;
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
index f954d7650..ede8e9340 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@@ -443,7 +443,7 @@ public class GenomeAnalysisEngine {
if(!readsDataSource.hasIndex() && intervals != null && !argCollection.allowIntervalsWithUnindexedBAM)
throw new UserException.CommandLineException("Cannot perform interval processing when reads are present but no index is available.");
- if(walker instanceof LocusWalker) {
+ if(walker instanceof LocusWalker || walker instanceof ActiveRegionWalker) {
if (readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.coordinate)
throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.coordinate, "Locus walkers can only traverse coordinate-sorted data. Please resort your input BAM file(s) or set the Sort Order tag in the header appropriately.");
if(intervals == null)
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java
index dca4cc771..bcb726607 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java
@@ -256,7 +256,7 @@ public class BAMScheduler implements Iterator {
// Naive algorithm: find all elements in current contig for proper schedule creation.
List lociInContig = new LinkedList();
for(GenomeLoc locus: loci) {
- if(dataSource.getHeader().getSequence(locus.getContig()).getSequenceIndex() == lastReferenceSequenceLoaded)
+ if(!GenomeLoc.isUnmapped(locus) && dataSource.getHeader().getSequence(locus.getContig()).getSequenceIndex() == lastReferenceSequenceLoaded)
lociInContig.add(locus);
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BlockInputStream.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BlockInputStream.java
index e377f865d..a95eb7b8e 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BlockInputStream.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BlockInputStream.java
@@ -31,13 +31,13 @@ import net.sf.samtools.util.BlockCompressedFilePointerUtil;
import net.sf.samtools.util.BlockCompressedInputStream;
import net.sf.samtools.util.RuntimeEOFException;
import net.sf.samtools.util.SeekableStream;
-import org.broad.tribble.util.BlockCompressedStreamConstants;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.Arrays;
+import java.util.Iterator;
import java.util.LinkedList;
/**
@@ -120,6 +120,12 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
// TODO: Kill the region when all we want to do is start at the beginning of the stream and run to the end of the stream.
this.position = new SAMReaderPosition(reader,this,new GATKBAMFileSpan(new GATKChunk(0,Long.MAX_VALUE)));
+ // The block offsets / block positions guarantee that the ending offset/position in the data structure maps to
+ // the point in the file just following the last read. These two arrays should never be empty; initializing
+ // to 0 to match the position above.
+ this.blockOffsets.add(0);
+ this.blockPositions.add(0L);
+
try {
if(validate) {
System.out.printf("BlockInputStream %s: BGZF block validation mode activated%n",this);
@@ -143,34 +149,52 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
public long getFilePointer() {
long filePointer;
synchronized(lock) {
- if(buffer.remaining() > 0) {
- // If there's data in the buffer, figure out from whence it came.
- final long blockAddress = blockPositions.size() > 0 ? blockPositions.get(0) : 0;
- final int blockOffset = buffer.position();
- filePointer = blockAddress << 16 | blockOffset;
- }
- else {
- // Otherwise, find the next position to load.
- filePointer = position.getBlockAddress() << 16;
- }
+ // Find the current block within the input stream.
+ int blockIndex;
+ for(blockIndex = 0; blockIndex+1 < blockOffsets.size() && buffer.position() >= blockOffsets.get(blockIndex + 1); blockIndex++)
+ ;
+ filePointer = blockPositions.get(blockIndex) + (buffer.position()-blockOffsets.get(blockIndex));
}
- if(validatingInputStream != null && filePointer != validatingInputStream.getFilePointer())
- throw new ReviewedStingException(String.format("Position of input stream is invalid; expected (block address, block offset) = (%d,%d), got (%d,%d)",
- BlockCompressedFilePointerUtil.getBlockAddress(filePointer),BlockCompressedFilePointerUtil.getBlockOffset(filePointer),
- BlockCompressedFilePointerUtil.getBlockAddress(validatingInputStream.getFilePointer()),BlockCompressedFilePointerUtil.getBlockOffset(validatingInputStream.getFilePointer())));
+// if(validatingInputStream != null && filePointer != validatingInputStream.getFilePointer())
+// throw new ReviewedStingException(String.format("Position of input stream is invalid; expected (block address, block offset) = (%d,%d), got (%d,%d)",
+// BlockCompressedFilePointerUtil.getBlockAddress(validatingInputStream.getFilePointer()),BlockCompressedFilePointerUtil.getBlockOffset(validatingInputStream.getFilePointer()),
+// BlockCompressedFilePointerUtil.getBlockAddress(filePointer),BlockCompressedFilePointerUtil.getBlockOffset(filePointer)));
return filePointer;
}
public void seek(long target) {
- // TODO: Validate the seek point.
//System.out.printf("Thread %s, BlockInputStream %s: seeking to block %d, offset %d%n",Thread.currentThread().getId(),this,BlockCompressedFilePointerUtil.getBlockAddress(target),BlockCompressedFilePointerUtil.getBlockOffset(target));
synchronized(lock) {
clearBuffers();
- position.advancePosition(BlockCompressedFilePointerUtil.getBlockAddress(target));
- waitForBufferFill();
- buffer.position(BlockCompressedFilePointerUtil.getBlockOffset(target));
+
+ // Ensure that the position filled in by submitAccessPlan() is in sync with the seek target just specified.
+ position.advancePosition(target);
+
+ // If the position advances past the end of the target, that must mean that we seeked to a point at the end
+ // of one of the chunk list's subregions. Make a note of our current position and punt on loading any data.
+ if(target < position.getBlockAddress() << 16) {
+ blockOffsets.clear();
+ blockOffsets.add(0);
+ blockPositions.clear();
+ blockPositions.add(target);
+ }
+ else {
+ waitForBufferFill();
+ // A buffer fill will load the relevant data from the shard, but the buffer position still needs to be
+ // advanced as appropriate.
+ Iterator blockOffsetIterator = blockOffsets.descendingIterator();
+ Iterator blockPositionIterator = blockPositions.descendingIterator();
+ while(blockOffsetIterator.hasNext() && blockPositionIterator.hasNext()) {
+ final int blockOffset = blockOffsetIterator.next();
+ final long blockPosition = blockPositionIterator.next();
+ if((blockPosition >> 16) == (target >> 16) && (blockPosition&0xFFFF) < (target&0xFFFF)) {
+ buffer.position(blockOffset + (int)(target&0xFFFF)-(int)(blockPosition&0xFFFF));
+ break;
+ }
+ }
+ }
if(validatingInputStream != null) {
try {
@@ -191,14 +215,17 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
buffer.clear();
buffer.limit(0);
+ // Clear everything except the last block offset / position
blockOffsets.clear();
- blockPositions.clear();
+ blockOffsets.add(0);
+ while(blockPositions.size() > 1)
+ blockPositions.removeFirst();
}
public boolean eof() {
synchronized(lock) {
// TODO: Handle multiple empty BGZF blocks at end of the file.
- return position != null && position.getBlockAddress() >= length;
+ return position != null && (position.getBlockAddress() < 0 || position.getBlockAddress() >= length);
}
}
@@ -216,7 +243,7 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
// Assume that the access plan is going to tell us to start where we are and move forward.
// If this isn't the case, we'll soon receive a seek request and the buffer will be forced to reset.
if(this.position != null && position.getBlockAddress() < this.position.getBlockAddress())
- position.advancePosition(this.position.getBlockAddress());
+ position.advancePosition(this.position.getBlockAddress() << 16);
}
this.position = position;
}
@@ -225,14 +252,15 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
// Compact buffer to maximize storage space.
int bytesToRemove = 0;
- // Look ahead to see if we can compact away the first block in the series.
- while(blockOffsets.size() > 1 && buffer.position() < blockOffsets.get(1)) {
- bytesToRemove += blockOffsets.remove();
+ // Look ahead to see if we can compact away the first blocks in the series.
+ while(blockOffsets.size() > 1 && buffer.position() >= blockOffsets.get(1)) {
+ blockOffsets.remove();
blockPositions.remove();
+ bytesToRemove = blockOffsets.peek();
}
// If we end up with an empty block at the end of the series, compact this as well.
- if(buffer.remaining() == 0 && !blockOffsets.isEmpty() && buffer.position() >= blockOffsets.peek()) {
+ if(buffer.remaining() == 0 && blockOffsets.size() > 1 && buffer.position() >= blockOffsets.peek()) {
bytesToRemove += buffer.position();
blockOffsets.remove();
blockPositions.remove();
@@ -241,11 +269,17 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
int finalBufferStart = buffer.position() - bytesToRemove;
int finalBufferSize = buffer.remaining();
+ // Position the buffer to remove the unneeded data, and compact it away.
buffer.position(bytesToRemove);
buffer.compact();
+ // Reset the limits for reading.
buffer.position(finalBufferStart);
buffer.limit(finalBufferStart+finalBufferSize);
+
+ // Shift everything in the offset buffer down to accommodate the bytes removed from the buffer.
+ for(int i = 0; i < blockOffsets.size(); i++)
+ blockOffsets.set(i,blockOffsets.get(i)-bytesToRemove);
}
/**
@@ -253,6 +287,7 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
* MUST be called from a thread that is NOT the reader thread.
* @param incomingBuffer The data being pushed into this input stream.
* @param position target position for the data.
+ * @param filePosition the current position of the file pointer
*/
public void copyIntoBuffer(final ByteBuffer incomingBuffer, final SAMReaderPosition position, final long filePosition) {
synchronized(lock) {
@@ -262,7 +297,12 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
buffer.limit(buffer.capacity());
// Advance the position to take the most recent read into account.
- long lastReadPosition = position.getBlockAddress();
+ final long lastBlockAddress = position.getBlockAddress();
+ final int blockOffsetStart = position.getFirstOffsetInBlock();
+ final int blockOffsetEnd = position.getLastOffsetInBlock();
+
+ // Where did this read end? It either ended in the middle of a block (for a bounding chunk) or it ended at the start of the next block.
+ final long endOfRead = (blockOffsetEnd < incomingBuffer.remaining()) ? (lastBlockAddress << 16) | blockOffsetEnd : filePosition << 16;
byte[] validBytes = null;
if(validatingInputStream != null) {
@@ -277,7 +317,7 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
incomingBuffer.position(pos);
long currentFilePointer = validatingInputStream.getFilePointer();
- validatingInputStream.seek(lastReadPosition << 16);
+ validatingInputStream.seek(lastBlockAddress << 16);
validatingInputStream.read(validBytes);
validatingInputStream.seek(currentFilePointer);
@@ -286,7 +326,7 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
}
this.position = position;
- position.advancePosition(filePosition);
+ position.advancePosition(filePosition << 16);
if(buffer.remaining() < incomingBuffer.remaining()) {
//System.out.printf("Thread %s: waiting for available space in buffer; buffer remaining = %d, incoming buffer remaining = %d%n",Thread.currentThread().getId(),buffer.remaining(),incomingBuffer.remaining());
@@ -294,12 +334,21 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
//System.out.printf("Thread %s: waited for available space in buffer; buffer remaining = %d, incoming buffer remaining = %d%n", Thread.currentThread().getId(), buffer.remaining(), incomingBuffer.remaining());
}
- // Queue list of block offsets / block positions.
+ // Remove the last position in the list and add in the last read position, in case the two are different.
+ blockOffsets.removeLast();
blockOffsets.add(buffer.position());
- blockPositions.add(lastReadPosition);
+ blockPositions.removeLast();
+ blockPositions.add(lastBlockAddress << 16 | blockOffsetStart);
+ // Stream the buffer into the data stream.
+ incomingBuffer.position(blockOffsetStart);
+ incomingBuffer.limit(Math.min(incomingBuffer.limit(),blockOffsetEnd));
buffer.put(incomingBuffer);
+ // Then, add the last position read to the very end of the list, just past the end of the last buffer.
+ blockOffsets.add(buffer.position());
+ blockPositions.add(endOfRead);
+
// Set up the buffer for reading.
buffer.flip();
bufferFilled = true;
@@ -380,23 +429,21 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
lock.notify();
}
- if(validatingInputStream != null) {
- byte[] validBytes = new byte[length];
- try {
- validatingInputStream.read(validBytes,offset,length);
- for(int i = offset; i < offset+length; i++) {
- if(bytes[i] != validBytes[i]) {
- System.out.printf("Thread %s: preparing to throw an exception because contents don't match%n",Thread.currentThread().getId());
- throw new ReviewedStingException(String.format("Thread %s: blockInputStream %s attempting to return wrong set of bytes; mismatch at offset %d",Thread.currentThread().getId(),this,i));
- }
- }
- }
- catch(IOException ex) {
- throw new ReviewedStingException("Unable to validate against Picard input stream",ex);
- }
- }
+// if(validatingInputStream != null) {
+// byte[] validBytes = new byte[length];
+// try {
+// validatingInputStream.read(validBytes,offset,length);
+// for(int i = offset; i < offset+length; i++) {
+// if(bytes[i] != validBytes[i])
+// throw new ReviewedStingException(String.format("Thread %s: blockInputStream %s attempting to return wrong set of bytes; mismatch at offset %d",Thread.currentThread().getId(),this,i));
+// }
+// }
+// catch(IOException ex) {
+// throw new ReviewedStingException("Unable to validate against Picard input stream",ex);
+// }
+// }
- return length - remaining;
+ return eof() ? -1 : length-remaining;
}
public void close() {
@@ -424,7 +471,6 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
lock.wait();
}
catch(InterruptedException ex) {
- // TODO: handle me.
throw new ReviewedStingException("Interrupt occurred waiting for buffer to fill",ex);
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java
index dc703ff23..244438a59 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java
@@ -88,7 +88,7 @@ public class GATKBAMIndex {
seek(0);
final byte[] buffer = readBytes(4);
if (!Arrays.equals(buffer, BAM_INDEX_MAGIC)) {
- throw new RuntimeException("Invalid file header in BAM index " + mFile +
+ throw new ReviewedStingException("Invalid file header in BAM index " + mFile +
": " + new String(buffer));
}
@@ -112,7 +112,7 @@ public class GATKBAMIndex {
openIndexFile();
if (referenceSequence >= sequenceCount)
- throw new ReviewedStingException("Invalid sequence number " + referenceSequence);
+ throw new ReviewedStingException("Invalid sequence number " + referenceSequence + " in index file " + mFile);
skipToSequence(referenceSequence);
@@ -183,12 +183,12 @@ public class GATKBAMIndex {
public int getLevelForBin(final Bin bin) {
GATKBin gatkBin = new GATKBin(bin);
if(gatkBin.getBinNumber() >= MAX_BINS)
- throw new SAMException("Tried to get level for invalid bin.");
+ throw new ReviewedStingException("Tried to get level for invalid bin in index file " + mFile);
for(int i = getNumIndexLevels()-1; i >= 0; i--) {
if(gatkBin.getBinNumber() >= LEVEL_STARTS[i])
return i;
}
- throw new SAMException("Unable to find correct bin for bin "+bin);
+ throw new ReviewedStingException("Unable to find correct bin for bin " + bin + " in index file " + mFile);
}
/**
@@ -352,7 +352,7 @@ public class GATKBAMIndex {
fileChannel.read(buffer);
}
catch(IOException ex) {
- throw new ReviewedStingException("Index: unable to read bytes from index file.");
+ throw new ReviewedStingException("Index: unable to read bytes from index file " + mFile);
}
}
@@ -379,7 +379,7 @@ public class GATKBAMIndex {
fileChannel.position(fileChannel.position() + count);
}
catch(IOException ex) {
- throw new ReviewedStingException("Index: unable to reposition file channel.");
+ throw new ReviewedStingException("Index: unable to reposition file channel of index file " + mFile);
}
}
@@ -388,7 +388,7 @@ public class GATKBAMIndex {
fileChannel.position(position);
}
catch(IOException ex) {
- throw new ReviewedStingException("Index: unable to reposition of file channel.");
+ throw new ReviewedStingException("Index: unable to reposition of file channel of index file " + mFile);
}
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
index 2e243b847..c0537334d 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
@@ -556,7 +556,7 @@ public class SAMDataSource {
*/
private StingSAMIterator getIterator(SAMReaders readers, Shard shard, boolean enableVerification) {
// Set up merging to dynamically merge together multiple BAMs.
- MergingSamRecordIterator mergingIterator = readers.createMergingIterator();
+ Map> iteratorMap = new HashMap>();
for(SAMReaderID id: getReaderIDs()) {
CloseableIterator iterator = null;
@@ -573,9 +573,13 @@ public class SAMDataSource {
iterator = readers.getReader(id).iterator(shard.getFileSpans().get(id));
if(shard.getGenomeLocs().size() > 0)
iterator = new IntervalOverlapFilteringIterator(iterator,shard.getGenomeLocs());
- mergingIterator.addIterator(readers.getReader(id),iterator);
+ iteratorMap.put(readers.getReader(id), iterator);
}
+ MergingSamRecordIterator mergingIterator = readers.createMergingIterator(iteratorMap);
+
+
+
return applyDecoratingIterators(shard.getReadMetrics(),
enableVerification,
readProperties.useOriginalBaseQualities(),
@@ -847,8 +851,13 @@ public class SAMDataSource {
return headerMerger.getReadGroupId(header,originalReadGroupID);
}
- public MergingSamRecordIterator createMergingIterator() {
- return new MergingSamRecordIterator(headerMerger,readers.values(),true);
+ /**
+ * Creates a new merging iterator from the given map, with the given header.
+ * @param iteratorMap A map of readers to iterators.
+ * @return An iterator which will merge those individual iterators.
+ */
+ public MergingSamRecordIterator createMergingIterator(final Map> iteratorMap) {
+ return new MergingSamRecordIterator(headerMerger,iteratorMap,true);
}
/**
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderPosition.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderPosition.java
index f9f6539a7..0a6173c1e 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderPosition.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderPosition.java
@@ -75,6 +75,22 @@ class SAMReaderPosition {
return nextBlockAddress;
}
+ /**
+ * Retrieves the first offset of interest in the block returned by getBlockAddress().
+ * @return First block of interest in this segment.
+ */
+ public int getFirstOffsetInBlock() {
+ return (nextBlockAddress == positionIterator.peek().getBlockStart()) ? positionIterator.peek().getBlockOffsetStart() : 0;
+ }
+
+ /**
+ * Retrieves the last offset of interest in the block returned by getBlockAddress().
+ * @return First block of interest in this segment.
+ */
+ public int getLastOffsetInBlock() {
+ return (nextBlockAddress == positionIterator.peek().getBlockEnd()) ? positionIterator.peek().getBlockOffsetEnd() : 65536;
+ }
+
public void reset() {
initialize();
}
@@ -95,26 +111,27 @@ class SAMReaderPosition {
* @param filePosition The current position within the file.
*/
void advancePosition(final long filePosition) {
- nextBlockAddress = filePosition;
+ nextBlockAddress = filePosition >> 16;
// Check the current file position against the iterator; if the iterator is before the current file position,
// draw the iterator forward. Remember when performing the check that coordinates are half-open!
- try {
- while(positionIterator.hasNext() && isFilePositionPastEndOfChunk(filePosition,positionIterator.peek())) {
- positionIterator.next();
- // Check to see if the iterator has more data available.
- if(positionIterator.hasNext() && filePosition < positionIterator.peek().getBlockStart()) {
- nextBlockAddress = positionIterator.peek().getBlockStart();
- break;
- }
+ while(positionIterator.hasNext() && isFilePositionPastEndOfChunk(filePosition,positionIterator.peek())) {
+ positionIterator.next();
+
+ // If the block iterator has shot past the file pointer, bring the file pointer flush with the start of the current block.
+ if(positionIterator.hasNext() && filePosition < positionIterator.peek().getChunkStart()) {
+ nextBlockAddress = positionIterator.peek().getBlockStart();
+ //System.out.printf("SAMReaderPosition: next block address advanced to %d%n",nextBlockAddress);
+ break;
}
}
- catch(Exception ex) {
- throw new ReviewedStingException("");
- }
+
+ // If we've shot off the end of the block pointer, notify consumers that iteration is complete.
+ if(!positionIterator.hasNext())
+ nextBlockAddress = -1;
}
private boolean isFilePositionPastEndOfChunk(final long filePosition, final GATKChunk chunk) {
- return (filePosition > chunk.getBlockEnd() || (filePosition == chunk.getBlockEnd() && chunk.getBlockOffsetEnd() == 0));
+ return filePosition >= chunk.getChunkEnd();
}
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java
index 39e1bdc72..eec440820 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java
@@ -11,7 +11,6 @@ import org.broadinstitute.sting.gatk.io.ThreadLocalOutputTracker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
-import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.threading.ThreadPoolMonitor;
import java.util.Collection;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
index ff5e1064b..774b532f3 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
@@ -10,6 +10,7 @@ import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
import org.broadinstitute.sting.gatk.io.OutputTracker;
+import org.broadinstitute.sting.gatk.traversals.TraverseActiveRegions;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.SampleUtils;
@@ -55,7 +56,6 @@ public class LinearMicroScheduler extends MicroScheduler {
traversalEngine.startTimersIfNecessary();
if(shard.getShardType() == Shard.ShardType.LOCUS) {
- LocusWalker lWalker = (LocusWalker)walker;
WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(),
getReadIterator(shard), shard.getGenomeLocs(), SampleUtils.getSAMFileSamples(engine));
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
@@ -77,6 +77,12 @@ public class LinearMicroScheduler extends MicroScheduler {
done = walker.isDone();
}
+ // Special function call to empty out the work queue. Ugly for now but will be cleaned up when we push this functionality more into the engine
+ if( traversalEngine instanceof TraverseActiveRegions ) {
+ final Object result = ((TraverseActiveRegions) traversalEngine).endTraversal(walker, accumulator.getReduceInit());
+ accumulator.accumulate(null, result); // Assumes only used with StandardAccumulator
+ }
+
Object result = accumulator.finishTraversal();
printOnTraversalDone(result);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
index d013db7e8..508099708 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
@@ -128,6 +128,8 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
traversalEngine = new TraverseDuplicates();
} else if (walker instanceof ReadPairWalker) {
traversalEngine = new TraverseReadPairs();
+ } else if (walker instanceof ActiveRegionWalker) {
+ traversalEngine = new TraverseActiveRegions();
} else {
throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type.");
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java b/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java
index d36a3b576..6acaadd50 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java
@@ -27,8 +27,8 @@ import java.util.concurrent.Future;
public class TreeReducer implements Callable {
private HierarchicalMicroScheduler microScheduler;
private TreeReducible walker;
- private final Future lhs;
- private final Future rhs;
+ private Future lhs;
+ private Future rhs;
/**
* Create a one-sided reduce. Result will be a simple pass-through of the result.
@@ -99,6 +99,10 @@ public class TreeReducer implements Callable {
long endTime = System.currentTimeMillis();
+ // Constituent bits of this tree reduces are no longer required. Throw them away.
+ this.lhs = null;
+ this.rhs = null;
+
microScheduler.reportTreeReduceTime( endTime - startTime );
return result;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
new file mode 100644
index 000000000..01bfe396a
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
@@ -0,0 +1,213 @@
+package org.broadinstitute.sting.gatk.traversals;
+
+import net.sf.samtools.SAMRecord;
+import org.apache.log4j.Logger;
+import org.broadinstitute.sting.gatk.WalkerManager;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.datasources.providers.*;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
+import org.broadinstitute.sting.gatk.walkers.DataSource;
+import org.broadinstitute.sting.gatk.walkers.Walker;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
+import org.broadinstitute.sting.utils.pileup.PileupElement;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+
+import java.util.ArrayList;
+import java.util.LinkedHashSet;
+import java.util.LinkedList;
+import java.util.Queue;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: 12/9/11
+ */
+
+public class TraverseActiveRegions extends TraversalEngine,LocusShardDataProvider> {
+ /**
+ * our log, which we want to capture anything from this class
+ */
+ protected static Logger logger = Logger.getLogger(TraversalEngine.class);
+
+ private final Queue workQueue = new LinkedList();
+ private final LinkedHashSet myReads = new LinkedHashSet();
+
+ @Override
+ protected String getTraversalType() {
+ return "active regions";
+ }
+
+ @Override
+ public T traverse( final ActiveRegionWalker walker,
+ final LocusShardDataProvider dataProvider,
+ T sum) {
+ logger.debug(String.format("TraverseActiveRegion.traverse: Shard is %s", dataProvider));
+
+ LocusView locusView = getLocusView( walker, dataProvider );
+
+ int minStart = Integer.MAX_VALUE;
+ final LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
+
+ if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all
+
+ final ArrayList isActiveList = new ArrayList();
+
+ //ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider );
+ ReferenceOrderedView referenceOrderedDataView = null;
+ if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA )
+ referenceOrderedDataView = new ManagingReferenceOrderedView( dataProvider );
+ else
+ referenceOrderedDataView = (RodLocusView)locusView;
+
+ // We keep processing while the next reference location is within the interval
+ while( locusView.hasNext() ) {
+ final AlignmentContext locus = locusView.next();
+ GenomeLoc location = locus.getLocation();
+
+ dataProvider.getShard().getReadMetrics().incrementNumIterations();
+
+ if ( locus.hasExtendedEventPileup() ) {
+ // if the alignment context we received holds an "extended" pileup (i.e. pileup of insertions/deletions
+ // associated with the current site), we need to update the location. The updated location still starts
+ // at the current genomic position, but it has to span the length of the longest deletion (if any).
+ location = engine.getGenomeLocParser().setStop(location,location.getStop()+locus.getExtendedEventPileup().getMaxDeletionLength());
+
+ // it is possible that the new expanded location spans the current shard boundary; the next method ensures
+ // that when it is the case, the reference sequence held by the ReferenceView will be reloaded so that
+ // the view has all the bases we are gonna need. If the location fits within the current view bounds,
+ // the next call will not do anything to the view:
+ referenceView.expandBoundsToAccomodateLoc(location);
+ }
+
+ // create reference context. Note that if we have a pileup of "extended events", the context will
+ // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
+ final ReferenceContext refContext = referenceView.getReferenceContext(location);
+
+ // Iterate forward to get all reference ordered data covering this location
+ final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
+
+ // Call the walkers isActive function for this locus and add them to the list to be integrated later
+ final boolean isActive = walker.isActive( tracker, refContext, locus );
+ isActiveList.add( new ActiveRegion(location, isActive, engine.getGenomeLocParser()) );
+
+ // Grab all the previously unseen reads from this pileup and add them to the massive read list
+ for( final PileupElement p : locus.getBasePileup() ) {
+ final SAMRecord read = p.getRead();
+ if( !myReads.contains(read) ) {
+ myReads.add(read);
+ }
+ }
+
+ // If this is the last pileup for this shard then need to calculate the minimum alignment start so that
+ // we know which active regions in the work queue are now safe to process
+ if( !locusView.hasNext() ) {
+ for( final PileupElement p : locus.getBasePileup() ) {
+ final SAMRecord read = p.getRead();
+ if( read.getAlignmentStart() < minStart ) { minStart = read.getAlignmentStart(); }
+ }
+ }
+ printProgress(dataProvider.getShard(),locus.getLocation());
+ }
+
+ // Take the individual isActive calls and integrate them into contiguous active regions and
+ // add these blocks of work to the work queue
+ final ArrayList activeRegions = integrateActiveList( isActiveList );
+ logger.debug("Integrated " + isActiveList.size() + " isActive calls into " + activeRegions.size() + " regions." );
+ workQueue.addAll( activeRegions );
+ }
+
+ while( workQueue.peek().getLocation().getStop() < minStart ) {
+ final ActiveRegion activeRegion = workQueue.remove();
+ sum = processActiveRegion( activeRegion, myReads, workQueue, sum, walker );
+ }
+
+ return sum;
+ }
+
+ // Special function called in LinearMicroScheduler to empty out the work queue. Ugly for now but will be cleaned up when we push this functionality more into the engine
+ public T endTraversal( final Walker walker, T sum) {
+ while( workQueue.peek() != null ) {
+ final ActiveRegion activeRegion = workQueue.remove();
+ sum = processActiveRegion( activeRegion, myReads, workQueue, sum, (ActiveRegionWalker) walker );
+ }
+
+ return sum;
+ }
+
+ private T processActiveRegion( final ActiveRegion activeRegion, final LinkedHashSet reads, final Queue workQueue, final T sum, final ActiveRegionWalker walker ) {
+ final ArrayList placedReads = new ArrayList();
+ for( final SAMRecord read : reads ) {
+ final GenomeLoc readLoc = this.engine.getGenomeLocParser().createGenomeLoc( read );
+ if( activeRegion.getLocation().overlapsP( readLoc ) ) {
+ // The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region)
+ long maxOverlap = activeRegion.getLocation().sizeOfOverlap( readLoc );
+ ActiveRegion bestRegion = activeRegion;
+ for( final ActiveRegion otherRegionToTest : workQueue ) {
+ if( otherRegionToTest.getLocation().sizeOfOverlap(readLoc) >= maxOverlap ) {
+ maxOverlap = otherRegionToTest.getLocation().sizeOfOverlap(readLoc);
+ bestRegion = otherRegionToTest;
+ }
+ }
+ bestRegion.add( (GATKSAMRecord) read, true );
+
+ // The read is also added to all other region in which it overlaps but marked as non-primary
+ if( !bestRegion.equals(activeRegion) ) {
+ activeRegion.add( (GATKSAMRecord) read, false );
+ }
+ for( final ActiveRegion otherRegionToTest : workQueue ) {
+ if( !bestRegion.equals(otherRegionToTest) && otherRegionToTest.getLocation().overlapsP( readLoc ) ) {
+ activeRegion.add( (GATKSAMRecord) read, false );
+ }
+ }
+ placedReads.add( read );
+ }
+ }
+ reads.removeAll( placedReads ); // remove all the reads which have been placed into their active region
+
+ logger.debug(">> Map call with " + activeRegion.getReads().size() + " " + (activeRegion.isActive ? "active" : "inactive") + " reads @ " + activeRegion.getLocation() + " with full extent: " + activeRegion.getReferenceLocation());
+ final M x = walker.map( activeRegion, null ); // BUGBUG: tracker needs to be filled in and passed to the walker
+ return walker.reduce( x, sum );
+ }
+
+ /**
+ * Gets the best view of loci for this walker given the available data.
+ * @param walker walker to interrogate.
+ * @param dataProvider Data which which to drive the locus view.
+ * @return A view of the locus data, where one iteration of the locus view maps to one iteration of the traversal.
+ */
+ private LocusView getLocusView( Walker walker, LocusShardDataProvider dataProvider ) {
+ DataSource dataSource = WalkerManager.getWalkerDataSource(walker);
+ if( dataSource == DataSource.READS )
+ return new CoveredLocusView(dataProvider);
+ else if( dataSource == DataSource.REFERENCE ) //|| ! GenomeAnalysisEngine.instance.getArguments().enableRodWalkers )
+ return new AllLocusView(dataProvider);
+ else if( dataSource == DataSource.REFERENCE_ORDERED_DATA )
+ return new RodLocusView(dataProvider);
+ else
+ throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource);
+ }
+
+ // integrate active regions into contiguous chunks based on active status
+ private ArrayList integrateActiveList( final ArrayList activeList ) {
+ final ArrayList returnList = new ArrayList();
+ ActiveRegion prevLocus = activeList.remove(0);
+ ActiveRegion startLocus = prevLocus;
+ for( final ActiveRegion thisLocus : activeList ) {
+ if( prevLocus.isActive != thisLocus.isActive ) {
+ returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(startLocus.getLocation().getContig(), startLocus.getLocation().getStart(), prevLocus.getLocation().getStart()),
+ prevLocus.isActive, engine.getGenomeLocParser() ) );
+ startLocus = thisLocus;
+ }
+ prevLocus = thisLocus;
+ }
+ // output the last region if necessary
+ if( startLocus != prevLocus ) {
+ returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(startLocus.getLocation().getContig(), startLocus.getLocation().getStart(), prevLocus.getLocation().getStart()),
+ prevLocus.isActive, engine.getGenomeLocParser() ) );
+ }
+ return returnList;
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
new file mode 100644
index 000000000..d2891c959
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
@@ -0,0 +1,29 @@
+package org.broadinstitute.sting.gatk.walkers;
+
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: 12/7/11
+ */
+
+@By(DataSource.READS)
+@Requires({DataSource.READS, DataSource.REFERENCE_BASES})
+@PartitionBy(PartitionType.READ)
+public abstract class ActiveRegionWalker extends Walker {
+ // Do we actually want to operate on the context?
+ public boolean filter(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
+ return true; // We are keeping all the reads
+ }
+
+ // Determine active status over the AlignmentContext
+ public abstract boolean isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
+
+ // Map over the ActiveRegion
+ public abstract MapType map(final ActiveRegion activeRegion, final ReadMetaDataTracker metaDataTracker);
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java
index ac69738d3..0702b08c1 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java
@@ -30,18 +30,19 @@ import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.baq.BAQ;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.File;
import java.util.Collection;
+import java.util.Random;
import java.util.Set;
import java.util.TreeSet;
-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
-import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
-
/**
* Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear in the input file.
*
@@ -70,12 +71,21 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
* -I input2.bam \
* --read_filter MappingQualityZero
*
+ * // Prints the first 2000 reads in the BAM file
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T PrintReads \
* -o output.bam \
* -I input.bam \
* -n 2000
+ *
+ * // Downsamples BAM file to 25%
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ * -R ref.fasta \
+ * -T PrintReads \
+ * -o output.bam \
+ * -I input.bam \
+ * -ds 0.25
*
*
*/
@@ -95,9 +105,18 @@ public class PrintReadsWalker extends ReadWalker {
@Argument(fullName = "platform", shortName = "platform", doc="Exclude all reads with this platform from the output", required = false)
String platform = null;
+ /**
+ * Only prints the first n reads of the file
+ */
@Argument(fullName = "number", shortName = "n", doc="Print the first n reads from the file, discarding the rest", required = false)
int nReadsToPrint = -1;
+ /**
+ * Downsamples the bam file by the given ratio, printing only approximately the given percentage of reads. The downsampling is balanced (over the entire coverage)
+ */
+ @Argument(fullName = "downsample_coverage", shortName = "ds", doc="Downsample BAM to desired coverage", required = false)
+ public double downsampleRatio = 1.0;
+
/**
* Only reads from samples listed in the provided file(s) will be included in the output.
*/
@@ -112,6 +131,8 @@ public class PrintReadsWalker extends ReadWalker {
private TreeSet samplesToChoose = new TreeSet();
private boolean SAMPLES_SPECIFIED = false;
+
+ Random random;
/**
* The initialize function.
@@ -132,13 +153,15 @@ public class PrintReadsWalker extends ReadWalker {
if(!samplesToChoose.isEmpty()) {
SAMPLES_SPECIFIED = true;
}
+
+ random = GenomeAnalysisEngine.getRandomGenerator();
}
/**
* The reads filter function.
*
- * @param ref the reference bases that correspond to our read, if a reference was provided
+ * @param ref the reference bases that correspond to our read, if a reference was provided
* @param read the read itself, as a SAMRecord
* @return true if the read passes the filter, false if it doesn't
*/
@@ -177,13 +200,14 @@ public class PrintReadsWalker extends ReadWalker {
nReadsToPrint--; // n > 0 means there are still reads to be printed.
}
- return true;
- }
+ // if downsample option is turned off (= 1) then don't waste time getting the next random number.
+ return (downsampleRatio == 1 || random.nextDouble() < downsampleRatio);
+ }
/**
* The reads map function.
*
- * @param ref the reference bases that correspond to our read, if a reference was provided
+ * @param ref the reference bases that correspond to our read, if a reference was provided
* @param read the read itself, as a SAMRecord
* @return the read itself
*/
@@ -194,6 +218,7 @@ public class PrintReadsWalker extends ReadWalker {
/**
* reduceInit is called once before any calls to the map function. We use it here to setup the output
* bam file, if it was specified on the command line
+ *
* @return SAMFileWriter, set to the BAM output file if the command line option was set, null otherwise
*/
public SAMFileWriter reduceInit() {
@@ -202,7 +227,8 @@ public class PrintReadsWalker extends ReadWalker {
/**
* given a read and a output location, reduce by emitting the read
- * @param read the read itself
+ *
+ * @param read the read itself
* @param output the output source
* @return the SAMFileWriter, so that the next reduce can emit to the same source
*/
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java
index aa743f86f..1594c92cb 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java
@@ -35,7 +35,7 @@ import java.util.*;
public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
- private final static boolean DEBUG = false;
+ // private final static boolean DEBUG = false;
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
@@ -70,47 +70,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
return genotypeLikelihoods;
}
-
- final static double approximateLog10SumLog10(double[] vals) {
- if ( vals.length < 2 )
- throw new ReviewedStingException("Passing array with fewer than 2 values when computing approximateLog10SumLog10");
-
- double approx = approximateLog10SumLog10(vals[0], vals[1]);
- for ( int i = 2; i < vals.length; i++ )
- approx = approximateLog10SumLog10(approx, vals[i]);
- return approx;
- }
-
- final static double approximateLog10SumLog10(double small, double big) {
- // make sure small is really the smaller value
- if ( small > big ) {
- final double t = big;
- big = small;
- small = t;
- }
-
- if (small == Double.NEGATIVE_INFINITY || big == Double.NEGATIVE_INFINITY )
- return big;
-
- if (big >= small + MathUtils.MAX_JACOBIAN_TOLERANCE)
- return big;
-
- // OK, so |y-x| < tol: we use the following identity then:
- // we need to compute log10(10^x + 10^y)
- // By Jacobian logarithm identity, this is equal to
- // max(x,y) + log10(1+10^-abs(x-y))
- // we compute the second term as a table lookup
- // with integer quantization
- // we have pre-stored correction for 0,0.1,0.2,... 10.0
- //final int ind = (int)(((big-small)/JACOBIAN_LOG_TABLE_STEP)); // hard rounding
- int ind = (int)(Math.round((big-small)/MathUtils.JACOBIAN_LOG_TABLE_STEP)); // hard rounding
-
- //double z =Math.log10(1+Math.pow(10.0,-diff));
- //System.out.format("x: %f, y:%f, app: %f, true: %f ind:%d\n",x,y,t2,z,ind);
- return big + MathUtils.jacobianLogTable[ind];
- }
-
-
// -------------------------------------------------------------------------------------
//
// Multi-allelic implementation.
@@ -218,12 +177,18 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
ACqueue.add(zeroSet);
indexesToACset.put(zeroSet.ACcounts, zeroSet);
+ // optimization: create the temporary storage for computing L(j,k) just once
+ final int maxPossibleDependencies = numAlternateAlleles + (numAlternateAlleles * (numAlternateAlleles + 1) / 2) + 1;
+ final double[][] tempLog10ConformationLikelihoods = new double[numSamples+1][maxPossibleDependencies];
+ for ( int i = 0; i < maxPossibleDependencies; i++ )
+ tempLog10ConformationLikelihoods[0][i] = Double.NEGATIVE_INFINITY;
+
// keep processing while we have AC conformations that need to be calculated
double maxLog10L = Double.NEGATIVE_INFINITY;
while ( !ACqueue.isEmpty() ) {
// compute log10Likelihoods
final ExactACset set = ACqueue.remove();
- final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, preserveData, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result);
+ final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, preserveData, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result, tempLog10ConformationLikelihoods);
// adjust max likelihood seen if needed
maxLog10L = Math.max(maxLog10L, log10LofKs);
@@ -238,20 +203,21 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
final Queue ACqueue,
final HashMap indexesToACset,
final double[][] log10AlleleFrequencyPriors,
- final AlleleFrequencyCalculationResult result) {
+ final AlleleFrequencyCalculationResult result,
+ final double[][] tempLog10ConformationLikelihoods) {
- if ( DEBUG )
- System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts);
+ //if ( DEBUG )
+ // System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts);
// compute the log10Likelihoods
- computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, result);
+ computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, result, tempLog10ConformationLikelihoods);
// clean up memory
if ( !preserveData ) {
for ( ExactACcounts index : set.dependentACsetsToDelete ) {
indexesToACset.put(index, null);
- if ( DEBUG )
- System.out.printf(" *** removing used set=%s after seeing final dependent set=%s%n", index, set.ACcounts);
+ //if ( DEBUG )
+ // System.out.printf(" *** removing used set=%s after seeing final dependent set=%s%n", index, set.ACcounts);
}
}
@@ -259,8 +225,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
// can we abort early because the log10Likelihoods are so small?
if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
- if ( DEBUG )
- System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
+ //if ( DEBUG )
+ // System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
// no reason to keep this data around because nothing depends on it
if ( !preserveData )
@@ -303,8 +269,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
// if the last dependent set was not at the back of the queue (i.e. not just added), then we need to iterate
// over all the dependent sets to find the last one in the queue (otherwise it will be cleaned up too early)
if ( !preserveData && lastSet == null ) {
- if ( DEBUG )
- System.out.printf(" *** iterating over dependent sets for set=%s%n", set.ACcounts);
+ //if ( DEBUG )
+ // System.out.printf(" *** iterating over dependent sets for set=%s%n", set.ACcounts);
lastSet = determineLastDependentSetInQueue(set.ACcounts, ACqueue);
}
if ( lastSet != null )
@@ -350,7 +316,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
final ArrayList genotypeLikelihoods,
final HashMap indexesToACset,
final double[][] log10AlleleFrequencyPriors,
- final AlleleFrequencyCalculationResult result) {
+ final AlleleFrequencyCalculationResult result,
+ final double[][] tempLog10ConformationLikelihoods) {
set.log10Likelihoods[0] = 0.0; // the zero case
final int totalK = set.getACsum();
@@ -362,38 +329,41 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
}
// k > 0 for at least one k
else {
- // all possible likelihoods for a given cell from which to choose the max
- final int numPaths = set.ACsetIndexToPLIndex.size() + 1;
- final double[] log10ConformationLikelihoods = new double[numPaths]; // TODO can be created just once, since you initialize it
+ // deal with the non-AA possible conformations
+ int conformationIndex = 1;
+ for ( Map.Entry mapping : set.ACsetIndexToPLIndex.entrySet() ) {
+ //if ( DEBUG )
+ // System.out.printf(" *** evaluating set=%s which depends on set=%s%n", set.ACcounts, mapping.getKey());
- for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {
- final double[] gl = genotypeLikelihoods.get(j);
- final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
+ ExactACset dependent = indexesToACset.get(mapping.getKey());
- // initialize
- for ( int i = 0; i < numPaths; i++ )
- // TODO -- Arrays.fill?
- // todo -- is this even necessary? Why not have as else below?
- log10ConformationLikelihoods[i] = Double.NEGATIVE_INFINITY;
+ for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {
- // deal with the AA case first
- if ( totalK < 2*j-1 )
- log10ConformationLikelihoods[0] = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX];
-
- // deal with the other possible conformations now
- if ( totalK <= 2*j ) { // skip impossible conformations
- int conformationIndex = 1;
- for ( Map.Entry mapping : set.ACsetIndexToPLIndex.entrySet() ) {
- if ( DEBUG )
- System.out.printf(" *** evaluating set=%s which depends on set=%s%n", set.ACcounts, mapping.getKey());
- log10ConformationLikelihoods[conformationIndex++] =
- determineCoefficient(mapping.getValue(), j, set.ACcounts.getCounts(), totalK) + indexesToACset.get(mapping.getKey()).log10Likelihoods[j-1] + gl[mapping.getValue()];
- }
+ if ( totalK <= 2*j ) { // skip impossible conformations
+ final double[] gl = genotypeLikelihoods.get(j);
+ tempLog10ConformationLikelihoods[j][conformationIndex] =
+ determineCoefficient(mapping.getValue(), j, set.ACcounts.getCounts(), totalK) + dependent.log10Likelihoods[j-1] + gl[mapping.getValue()];
+ } else {
+ tempLog10ConformationLikelihoods[j][conformationIndex] = Double.NEGATIVE_INFINITY;
+ }
}
- final double log10Max = approximateLog10SumLog10(log10ConformationLikelihoods);
+ conformationIndex++;
+ }
- // finally, update the L(j,k) value
+ // finally, deal with the AA case (which depends on previous cells in this column) and then update the L(j,k) value
+ final int numPaths = set.ACsetIndexToPLIndex.size() + 1;
+ for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {
+
+ if ( totalK < 2*j-1 ) {
+ final double[] gl = genotypeLikelihoods.get(j);
+ tempLog10ConformationLikelihoods[j][0] = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX];
+ } else {
+ tempLog10ConformationLikelihoods[j][0] = Double.NEGATIVE_INFINITY;
+ }
+
+ final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
+ final double log10Max = MathUtils.approximateLog10SumLog10(tempLog10ConformationLikelihoods[j], numPaths);
set.log10Likelihoods[j] = log10Max - logDenominator;
}
}
@@ -415,10 +385,10 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
// update the likelihoods/posteriors vectors which are collapsed views of each of the various ACs
for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) {
int AC = set.ACcounts.getCounts()[i];
- result.log10AlleleFrequencyLikelihoods[i][AC] = approximateLog10SumLog10(result.log10AlleleFrequencyLikelihoods[i][AC], log10LofK);
+ result.log10AlleleFrequencyLikelihoods[i][AC] = MathUtils.approximateLog10SumLog10(result.log10AlleleFrequencyLikelihoods[i][AC], log10LofK);
final double prior = log10AlleleFrequencyPriors[nonRefAlleles-1][AC];
- result.log10AlleleFrequencyPosteriors[i][AC] = approximateLog10SumLog10(result.log10AlleleFrequencyPosteriors[i][AC], log10LofK + prior);
+ result.log10AlleleFrequencyPosteriors[i][AC] = MathUtils.approximateLog10SumLog10(result.log10AlleleFrequencyPosteriors[i][AC], log10LofK + prior);
}
}
}
@@ -564,7 +534,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
lastK = k;
maxLog10L = Math.max(maxLog10L, log10LofK);
if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
- if ( DEBUG ) System.out.printf(" *** breaking early k=%d log10L=%.2f maxLog10L=%.2f%n", k, log10LofK, maxLog10L);
+ //if ( DEBUG ) System.out.printf(" *** breaking early k=%d log10L=%.2f maxLog10L=%.2f%n", k, log10LofK, maxLog10L);
done = true;
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
index b30a25414..ace780dd0 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
@@ -31,6 +31,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@@ -72,25 +73,28 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
this.logger = logger;
}
- /**
- * Must be overridden by concrete subclasses
- *
- * @param tracker rod data
- * @param ref reference context
- * @param contexts stratified alignment contexts
- * @param contextType stratified context type
- * @param priors priors to use for GLs
- * @param alternateAlleleToUse the alternate allele to use, null if not set
- * @param useBAQedPileup should we use the BAQed pileup or the raw one?
- * @return variant context where genotypes are no-called but with GLs
- */
- public abstract VariantContext getLikelihoods(RefMetaDataTracker tracker,
- ReferenceContext ref,
- Map contexts,
- AlignmentContextUtils.ReadOrientation contextType,
- GenotypePriors priors,
- Allele alternateAlleleToUse,
- boolean useBAQedPileup);
+ /**
+ * Can be overridden by concrete subclasses
+ *
+ * @param tracker rod data
+ * @param ref reference context
+ * @param contexts stratified alignment contexts
+ * @param contextType stratified context type
+ * @param priors priors to use for GLs
+ * @param alternateAlleleToUse the alternate allele to use, null if not set
+ * @param useBAQedPileup should we use the BAQed pileup or the raw one?
+ * @param locParser Genome Loc Parser
+ * @return variant context where genotypes are no-called but with GLs
+ */
+ public abstract VariantContext getLikelihoods(RefMetaDataTracker tracker,
+ ReferenceContext ref,
+ Map contexts,
+ AlignmentContextUtils.ReadOrientation contextType,
+ GenotypePriors priors,
+ Allele alternateAlleleToUse,
+ boolean useBAQedPileup,
+ GenomeLocParser locParser);
+
protected int getFilteredDepth(ReadBackedPileup pileup) {
int count = 0;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
index fe2086d47..0756caf03 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
@@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
@@ -54,17 +55,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
private final boolean getAlleleListFromVCF;
private boolean DEBUG = false;
-
+ private final boolean doMultiAllelicCalls;
private boolean ignoreSNPAllelesWhenGenotypingIndels = false;
-
+ private final int maxAlternateAlleles;
private PairHMMIndelErrorModel pairModel;
private static ThreadLocal>> indelLikelihoodMap =
new ThreadLocal>>() {
- protected synchronized HashMap> initialValue() {
- return new HashMap>();
- }
- };
+ protected synchronized HashMap> initialValue() {
+ return new HashMap>();
+ }
+ };
private LinkedHashMap haplotypeMap;
@@ -81,12 +82,14 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
protected IndelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
super(UAC, logger);
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY,UAC.INDEL_GAP_CONTINUATION_PENALTY,
- UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.BANDED_INDEL_COMPUTATION);
+ UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION);
alleleList = new ArrayList();
getAlleleListFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
minIndelCountForGenotyping = UAC.MIN_INDEL_COUNT_FOR_GENOTYPING;
HAPLOTYPE_SIZE = UAC.INDEL_HAPLOTYPE_SIZE;
DEBUG = UAC.OUTPUT_DEBUG_INDEL_INFO;
+ maxAlternateAlleles = UAC.MAX_ALTERNATE_ALLELES;
+ doMultiAllelicCalls = UAC.MULTI_ALLELIC;
haplotypeMap = new LinkedHashMap();
ignoreSNPAllelesWhenGenotypingIndels = UAC.IGNORE_SNP_ALLELES;
@@ -95,7 +98,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
private ArrayList computeConsensusAlleles(ReferenceContext ref,
Map contexts,
- AlignmentContextUtils.ReadOrientation contextType) {
+ AlignmentContextUtils.ReadOrientation contextType, GenomeLocParser locParser) {
Allele refAllele=null, altAllele=null;
GenomeLoc loc = ref.getLocus();
ArrayList aList = new ArrayList();
@@ -114,7 +117,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
if (insCount < minIndelCountForGenotyping && delCount < minIndelCountForGenotyping)
return aList;
-
+
for ( Map.Entry sample : contexts.entrySet() ) {
// todo -- warning, can be duplicating expensive partition here
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
@@ -126,9 +129,9 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
for ( ExtendedEventPileupElement p : indelPileup.toExtendedIterable() ) {
//SAMRecord read = p.getRead();
- GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
+ GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
if (read == null)
- continue;
+ continue;
if(ReadUtils.is454Read(read)) {
continue;
}
@@ -208,63 +211,69 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
}
}
-/* if (DEBUG) {
- int icount = indelPileup.getNumberOfInsertions();
- int dcount = indelPileup.getNumberOfDeletions();
- if (icount + dcount > 0)
- {
- List> eventStrings = indelPileup.getEventStringsWithCounts(ref.getBases());
- System.out.format("#ins: %d, #del:%d\n", insCount, delCount);
-
- for (int i=0 ; i < eventStrings.size() ; i++ ) {
- System.out.format("%s:%d,",eventStrings.get(i).first,eventStrings.get(i).second);
- // int k=0;
- }
- System.out.println();
- }
- } */
}
+ Collection vcs = new ArrayList();
int maxAlleleCnt = 0;
String bestAltAllele = "";
+
for (String s : consensusIndelStrings.keySet()) {
- int curCnt = consensusIndelStrings.get(s);
- if (curCnt > maxAlleleCnt) {
- maxAlleleCnt = curCnt;
- bestAltAllele = s;
+ int curCnt = consensusIndelStrings.get(s), stop = 0;
+ // if observed count if above minimum threshold, we will genotype this allele
+ if (curCnt < minIndelCountForGenotyping)
+ continue;
+
+ if (s.startsWith("D")) {
+ // get deletion length
+ int dLen = Integer.valueOf(s.substring(1));
+ // get ref bases of accurate deletion
+ int startIdxInReference = 1+loc.getStart()-ref.getWindow().getStart();
+ stop = loc.getStart() + dLen;
+ byte[] refBases = Arrays.copyOfRange(ref.getBases(),startIdxInReference,startIdxInReference+dLen);
+
+ if (Allele.acceptableAlleleBases(refBases)) {
+ refAllele = Allele.create(refBases,true);
+ altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
+ }
+ }
+ else {
+ // insertion case
+ if (Allele.acceptableAlleleBases(s)) {
+ refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
+ altAllele = Allele.create(s, false);
+ stop = loc.getStart();
+ }
}
-// if (DEBUG)
-// System.out.format("Key:%s, number: %d\n",s,consensusIndelStrings.get(s) );
- } //gdebug-
- if (maxAlleleCnt < minIndelCountForGenotyping)
- return aList;
- if (bestAltAllele.startsWith("D")) {
- // get deletion length
- int dLen = Integer.valueOf(bestAltAllele.substring(1));
- // get ref bases of accurate deletion
- int startIdxInReference = 1+loc.getStart()-ref.getWindow().getStart();
+ ArrayList vcAlleles = new ArrayList();
+ vcAlleles.add(refAllele);
+ vcAlleles.add(altAllele);
- //System.out.println(new String(ref.getBases()));
- byte[] refBases = Arrays.copyOfRange(ref.getBases(),startIdxInReference,startIdxInReference+dLen);
+ final VariantContextBuilder builder = new VariantContextBuilder().source("");
+ builder.loc(loc.getContig(), loc.getStart(), stop);
+ builder.alleles(vcAlleles);
+ builder.referenceBaseForIndel(ref.getBase());
+ builder.noGenotypes();
+ if (doMultiAllelicCalls)
+ vcs.add(builder.make());
+ else {
+ if (curCnt > maxAlleleCnt) {
+ maxAlleleCnt = curCnt;
+ vcs.clear();
+ vcs.add(builder.make());
+ }
- if (Allele.acceptableAlleleBases(refBases)) {
- refAllele = Allele.create(refBases,true);
- altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
}
}
- else {
- // insertion case
- if (Allele.acceptableAlleleBases(bestAltAllele)) {
- refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
- altAllele = Allele.create(bestAltAllele, false);
- }
- }
- if (refAllele != null && altAllele != null) {
- aList.add(0,refAllele);
- aList.add(1,altAllele);
- }
+
+ if (vcs.isEmpty())
+ return aList; // nothing else to do, no alleles passed minimum count criterion
+
+ VariantContext mergedVC = VariantContextUtils.simpleMerge(locParser, vcs, null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, null, false, false);
+
+ aList = new ArrayList(mergedVC.getAlleles());
+
return aList;
}
@@ -277,7 +286,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
AlignmentContextUtils.ReadOrientation contextType,
GenotypePriors priors,
Allele alternateAlleleToUse,
- boolean useBAQedPileup) {
+ boolean useBAQedPileup, GenomeLocParser locParser) {
if ( tracker == null )
return null;
@@ -294,17 +303,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
haplotypeMap.clear();
if (getAlleleListFromVCF) {
- for( final VariantContext vc_input : tracker.getValues(UAC.alleles, loc) ) {
- if( vc_input != null &&
- allowableTypes.contains(vc_input.getType()) &&
- ref.getLocus().getStart() == vc_input.getStart()) {
- vc = vc_input;
- break;
- }
- }
- // ignore places where we don't have a variant
- if ( vc == null )
- return null;
+ for( final VariantContext vc_input : tracker.getValues(UAC.alleles, loc) ) {
+ if( vc_input != null &&
+ allowableTypes.contains(vc_input.getType()) &&
+ ref.getLocus().getStart() == vc_input.getStart()) {
+ vc = vc_input;
+ break;
+ }
+ }
+ // ignore places where we don't have a variant
+ if ( vc == null )
+ return null;
alleleList.clear();
if (ignoreSNPAllelesWhenGenotypingIndels) {
@@ -323,7 +332,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
}
else {
- alleleList = computeConsensusAlleles(ref,contexts, contextType);
+ alleleList = computeConsensusAlleles(ref,contexts, contextType, locParser);
if (alleleList.isEmpty())
return null;
}
@@ -340,7 +349,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
if (alleleList.isEmpty())
return null;
-
+
refAllele = alleleList.get(0);
altAllele = alleleList.get(1);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
index eee89674a..81c766e4d 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
@@ -30,10 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.utils.BaseUtils;
-import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.MathUtils;
-import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.StingException;
@@ -66,7 +63,8 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
final AlignmentContextUtils.ReadOrientation contextType,
final GenotypePriors priors,
final Allele alternateAlleleToUse,
- final boolean useBAQedPileup) {
+ final boolean useBAQedPileup,
+ final GenomeLocParser locParser) {
if ( !(priors instanceof DiploidSNPGenotypePriors) )
throw new StingException("Only diploid-based SNP priors are supported in the SNP GL model");
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGBoundAF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGBoundAF.java
index e40054c9f..99d55bc69 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGBoundAF.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGBoundAF.java
@@ -204,6 +204,6 @@ public class UGBoundAF extends RodWalker {
return Math.log10(s_2 + (s_2 - s)/15.0);
}
- return ExactAFCalculationModel.approximateLog10SumLog10(simpAux(likelihoods,a,c,eps/2,s_l,fa,fc,fd,cap-1),simpAux(likelihoods, c, b, eps / 2, s_r, fc, fb, fe, cap - 1));
+ return MathUtils.approximateLog10SumLog10(simpAux(likelihoods,a,c,eps/2,s_l,fa,fc,fd,cap-1),simpAux(likelihoods, c, b, eps / 2, s_r, fc, fb, fe, cap - 1));
}
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
index 5713432b4..16159393f 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
@@ -109,7 +109,7 @@ public class UnifiedArgumentCollection {
* For advanced users only.
*/
@Advanced
- @Argument(fullName = "multiallelic", shortName = "multiallelic", doc = "Allow the discovery of multiple alleles (SNPs only)", required = false)
+ @Argument(fullName = "multiallelic", shortName = "multiallelic", doc = "Allow the discovery of multiple alleles", required = false)
public boolean MULTI_ALLELIC = false;
/**
@@ -146,8 +146,8 @@ public class UnifiedArgumentCollection {
public int INDEL_HAPLOTYPE_SIZE = 80;
@Hidden
- @Argument(fullName = "bandedIndel", shortName = "bandedIndel", doc = "Banded Indel likelihood computation", required = false)
- public boolean BANDED_INDEL_COMPUTATION = false;
+ @Argument(fullName = "noBandedIndel", shortName = "noBandedIndel", doc = "Don't do Banded Indel likelihood computation", required = false)
+ public boolean DONT_DO_BANDED_INDEL_COMPUTATION = false;
@Hidden
@Argument(fullName = "indelDebug", shortName = "indelDebug", doc = "Output indel debug info", required = false)
@@ -184,7 +184,7 @@ public class UnifiedArgumentCollection {
// todo- arguments to remove
uac.IGNORE_SNP_ALLELES = IGNORE_SNP_ALLELES;
- uac.BANDED_INDEL_COMPUTATION = BANDED_INDEL_COMPUTATION;
+ uac.DONT_DO_BANDED_INDEL_COMPUTATION = DONT_DO_BANDED_INDEL_COMPUTATION;
uac.MULTI_ALLELIC = MULTI_ALLELIC;
return uac;
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
index 34be88dbb..5d73e8d28 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@@ -243,7 +243,7 @@ public class UnifiedGenotyperEngine {
glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
}
- return glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine);
+ return glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser);
}
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, AlignmentContext rawContext) {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java
index a271d3c35..3c7c6f00c 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java
@@ -46,6 +46,7 @@ import java.util.*;
public class VariantSummary extends VariantEvaluator implements StandardEval {
final protected static Logger logger = Logger.getLogger(VariantSummary.class);
+ /** Indels with size greater than this value are tallied in the CNV column */
private final static int MAX_INDEL_LENGTH = 50;
private final static double MIN_CNV_OVERLAP = 0.5;
private VariantEvalWalker walker;
@@ -196,14 +197,16 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
}
private final boolean overlapsKnownCNV(VariantContext cnv) {
- final GenomeLoc loc = walker.getGenomeLocParser().createGenomeLoc(cnv, true);
- IntervalTree intervalTree = knownCNVs.get(loc.getContig());
+ if ( knownCNVs != null ) {
+ final GenomeLoc loc = walker.getGenomeLocParser().createGenomeLoc(cnv, true);
+ IntervalTree intervalTree = knownCNVs.get(loc.getContig());
- final Iterator> nodeIt = intervalTree.overlappers(loc.getStart(), loc.getStop());
- while ( nodeIt.hasNext() ) {
- final double overlapP = loc.reciprocialOverlapFraction(nodeIt.next().getValue());
- if ( overlapP > MIN_CNV_OVERLAP )
- return true;
+ final Iterator> nodeIt = intervalTree.overlappers(loc.getStart(), loc.getStop());
+ while ( nodeIt.hasNext() ) {
+ final double overlapP = loc.reciprocialOverlapFraction(nodeIt.next().getValue());
+ if ( overlapP > MIN_CNV_OVERLAP )
+ return true;
+ }
}
return false;
@@ -224,7 +227,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
allVariantCounts.inc(type, ALL);
// type specific calculations
- if ( type == Type.SNP ) {
+ if ( type == Type.SNP && eval.isBiallelic() ) {
titvTable = VariantContextUtils.isTransition(eval) ? transitionsPerSample : transversionsPerSample;
titvTable.inc(type, ALL);
}
diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java
index cdfc329e8..71640c66a 100644
--- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java
+++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010, The Broad Institute
+ * Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@@ -70,17 +70,18 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
" * Short name of %1$s%n" +
" * @return Short name of %1$s%n" +
" */%n" +
- "def %3$s = this.%1$s%n" +
+ "%5$sdef %3$s = this.%1$s%n" +
"%n" +
"/**%n" +
" * Short name of %1$s%n" +
" * @param value Short name of %1$s%n" +
" */%n" +
- "def %4$s(value: %2$s) { this.%1$s = value }%n",
+ "%5$sdef %4$s(value: %2$s) { this.%1$s = value }%n",
getFieldName(),
getFieldType(),
getShortFieldGetter(),
- getShortFieldSetter());
+ getShortFieldSetter(),
+ getPrivacy());
}
protected static final String REQUIRED_TEMPLATE = " + required(\"%1$s\", %3$s, spaceSeparated=true, escape=true, format=%2$s)";
@@ -135,11 +136,8 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
new IntervalFileArgumentField(argumentDefinition),
new IntervalStringArgumentField(argumentDefinition));
- // ROD Bindings are set by the RodBindField
- } else if (RodBindArgumentField.ROD_BIND_FIELD.equals(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) {
- // TODO: Once everyone is using @Allows and @Requires correctly, we can stop blindly allowing Triplets
- return Arrays.asList(new RodBindArgumentField(argumentDefinition), new InputIndexesArgumentField(argumentDefinition, Tribble.STANDARD_INDEX_EXTENSION));
- //return Collections.emptyList();
+ } else if (NumThreadsArgumentField.NUM_THREADS_FIELD.equals(argumentDefinition.fullName)) {
+ return Arrays.asList(new NumThreadsArgumentField(argumentDefinition));
} else if ("input_file".equals(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) {
return Arrays.asList(new InputTaggedFileDefinitionField(argumentDefinition), new InputIndexesArgumentField(argumentDefinition, BAMIndex.BAMIndexSuffix, ".bam"));
@@ -166,10 +164,13 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
fields.add(new OutputArgumentField(argumentDefinition, gatherClass));
- if (SAMFileWriter.class.isAssignableFrom(argumentDefinition.argumentType))
+ if (SAMFileWriter.class.isAssignableFrom(argumentDefinition.argumentType)) {
fields.add(new SAMFileWriterIndexArgumentField(argumentDefinition));
- else if (VCFWriter.class.isAssignableFrom(argumentDefinition.argumentType))
+ fields.add(new SAMFileWriterMD5ArgumentField(argumentDefinition));
+ }
+ else if (VCFWriter.class.isAssignableFrom(argumentDefinition.argumentType)) {
fields.add(new VCFWriterIndexArgumentField(argumentDefinition));
+ }
return fields;
@@ -228,7 +229,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
@Override protected String getRawFieldName() { return super.getRawFieldName() + "String"; }
@Override protected String getFullName() { return super.getFullName() + "String"; }
@Override protected String getRawShortFieldName() { return super.getRawShortFieldName() + "String"; }
- @Override protected String getFieldType() { return "List[String]"; }
+ @Override protected String getFieldType() { return "Seq[String]"; }
@Override protected String getDefaultValue() { return "Nil"; }
@Override public String getCommandLineTemplate() { return REPEAT_TEMPLATE; }
@@ -250,7 +251,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
}
@Override protected Class> getInnerType() { return File.class; }
- @Override protected String getFieldType() { return isMultiValued() ? "List[File]" : "File"; }
+ @Override protected String getFieldType() { return isMultiValued() ? "Seq[File]" : "File"; }
@Override protected String getDefaultValue() { return isMultiValued() ? "Nil" : "_"; }
}
@@ -294,7 +295,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
}
@Override protected Class> getInnerType() { return mapType(argumentDefinition.componentType); }
- @Override protected String getFieldType() { return String.format("List[%s]", getType(getInnerType())); }
+ @Override protected String getFieldType() { return String.format("Seq[%s]", getType(getInnerType())); }
@Override protected String getDefaultValue() { return "Nil"; }
@Override protected String getCommandLineTemplate() { return REPEAT_TEMPLATE; }
}
@@ -336,17 +337,16 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
}
// Allows the user to specify the track name, track type, and the file.
- public static class RodBindArgumentField extends ArgumentDefinitionField {
- public static final String ROD_BIND_FIELD = "rodBind";
+ public static class NumThreadsArgumentField extends OptionedArgumentField {
+ public static final String NUM_THREADS_FIELD = "num_threads";
- public RodBindArgumentField(ArgumentDefinition argumentDefinition) {
- super(argumentDefinition);
+ public NumThreadsArgumentField(ArgumentDefinition argumentDefinition) {
+ super(argumentDefinition, false);
}
- @Override protected Class> getInnerType() { return null; } // RodBind does not need to be imported.
- @Override protected String getFieldType() { return "List[RodBind]"; }
- @Override protected String getDefaultValue() { return "Nil"; }
- @Override protected String getCommandLineTemplate() {
- return " + repeat(\"%1$s\", %3$s, formatPrefix=RodBind.formatCommandLineParameter, spaceSeparated=true, escape=true, format=%2$s)";
+
+ @Override
+ protected String getFreezeFields() {
+ return String.format("if (num_threads.isDefined) nCoresRequest = num_threads%n");
}
}
@@ -356,7 +356,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
super(argumentDefinition);
}
@Override protected Class> getInnerType() { return null; } // TaggedFile does not need to be imported.
- @Override protected String getFieldType() { return argumentDefinition.isMultiValued ? "List[File]" : "File"; }
+ @Override protected String getFieldType() { return argumentDefinition.isMultiValued ? "Seq[File]" : "File"; }
@Override protected String getDefaultValue() { return argumentDefinition.isMultiValued ? "Nil" : "_"; }
@Override protected String getCommandLineTemplate() {
if (argumentDefinition.isMultiValued) {
@@ -395,10 +395,11 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
}
@Override protected String getFullName() { return this.indexFieldName; }
@Override protected boolean isRequired() { return false; }
- @Override protected String getFieldType() { return "List[File]"; }
+ @Override protected String getFieldType() { return "Seq[File]"; }
@Override protected String getDefaultValue() { return "Nil"; }
@Override protected Class> getInnerType() { return File.class; }
@Override protected String getRawFieldName() { return this.indexFieldName; }
+ @Override protected String getPrivacy() { return "private "; }
@Override protected String getFreezeFields() {
if (originalIsMultiValued) {
if (originalSuffix == null) {
@@ -434,53 +435,69 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
}
}
- // Tracks an automatically generated index
- private static abstract class OutputIndexArgumentField extends ArgumentField {
- protected final String indexFieldName;
+ // Tracks an automatically generated index, md5, etc.
+ private static abstract class AuxilliaryOutputArgumentField extends ArgumentField {
protected final String originalFieldName;
- public OutputIndexArgumentField(ArgumentDefinition originalArgumentDefinition) {
- this.indexFieldName = originalArgumentDefinition.fullName + "Index";
+ protected final String auxFieldName;
+ protected final String auxFieldLabel;
+ public AuxilliaryOutputArgumentField(ArgumentDefinition originalArgumentDefinition, String auxFieldLabel) {
this.originalFieldName = originalArgumentDefinition.fullName;
+ this.auxFieldName = originalArgumentDefinition.fullName + auxFieldLabel;
+ this.auxFieldLabel = auxFieldLabel;
}
@Override protected Class extends Annotation> getAnnotationIOClass() { return Output.class; }
@Override public String getCommandLineAddition() { return ""; }
- @Override protected String getDoc() { return "Automatically generated index for " + this.originalFieldName; }
- @Override protected String getFullName() { return this.indexFieldName; }
+ @Override protected String getDoc() { return String.format("Automatically generated %s for %s", auxFieldLabel.toLowerCase(), this.originalFieldName); }
+ @Override protected String getFullName() { return this.auxFieldName; }
@Override protected boolean isRequired() { return false; }
@Override protected String getFieldType() { return "File"; }
@Override protected String getDefaultValue() { return "_"; }
@Override protected Class> getInnerType() { return File.class; }
- @Override protected String getRawFieldName() { return this.indexFieldName; }
+ @Override protected String getRawFieldName() { return this.auxFieldName; }
+ @Override protected String getPrivacy() { return "private "; }
@Override public boolean isGather() { return true; }
@Override protected String getGatherAnnotation() {
- return String.format("@Gather(classOf[AutoIndexGatherFunction])%n");
+ return String.format("@Gather(enabled=false)%n");
}
}
- private static class VCFWriterIndexArgumentField extends OutputIndexArgumentField {
+ private static class VCFWriterIndexArgumentField extends AuxilliaryOutputArgumentField {
public VCFWriterIndexArgumentField(ArgumentDefinition originalArgumentDefinition) {
- super(originalArgumentDefinition);
+ super(originalArgumentDefinition, "Index");
}
@Override protected String getFreezeFields() {
return String.format(
("if (%2$s != null)%n" +
" if (!org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor.isCompressed(%2$s.getPath))%n" +
" %1$s = new File(%2$s.getPath + \"%3$s\")%n"),
- indexFieldName, originalFieldName, Tribble.STANDARD_INDEX_EXTENSION);
+ auxFieldName, originalFieldName, Tribble.STANDARD_INDEX_EXTENSION);
}
}
- private static class SAMFileWriterIndexArgumentField extends OutputIndexArgumentField {
+ private static class SAMFileWriterIndexArgumentField extends AuxilliaryOutputArgumentField {
public SAMFileWriterIndexArgumentField(ArgumentDefinition originalArgumentDefinition) {
- super(originalArgumentDefinition);
+ super(originalArgumentDefinition, "Index");
}
@Override protected String getFreezeFields() {
return String.format(
("if (%2$s != null)%n" +
" if (!%3$s)%n" +
" %1$s = new File(%2$s.getPath.stripSuffix(\".bam\") + \"%4$s\")%n"),
- indexFieldName, originalFieldName, SAMFileWriterArgumentTypeDescriptor.DISABLE_INDEXING_FULLNAME, BAMIndex.BAMIndexSuffix);
+ auxFieldName, originalFieldName, SAMFileWriterArgumentTypeDescriptor.DISABLE_INDEXING_FULLNAME, BAMIndex.BAMIndexSuffix);
+ }
+ }
+
+ private static class SAMFileWriterMD5ArgumentField extends AuxilliaryOutputArgumentField {
+ public SAMFileWriterMD5ArgumentField(ArgumentDefinition originalArgumentDefinition) {
+ super(originalArgumentDefinition, "MD5");
+ }
+ @Override protected String getFreezeFields() {
+ return String.format(
+ ("if (%2$s != null)%n" +
+ " if (%3$s)%n" +
+ " %1$s = new File(%2$s.getPath + \"%4$s\")%n"),
+ auxFieldName, originalFieldName, SAMFileWriterArgumentTypeDescriptor.ENABLE_MD5_FULLNAME, ".md5");
}
}
diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java
index e90933504..2428a13a8 100644
--- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java
+++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010, The Broad Institute
+ * Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@@ -56,7 +56,7 @@ public abstract class ArgumentField {
return String.format("%n" +
"/** %s */%n" +
"@%s(fullName=\"%s\", shortName=\"%s\", doc=\"%s\", required=%s, exclusiveOf=\"%s\", validation=\"%s\")%n" +
- "%svar %s: %s = %s%n" +
+ "%s%svar %s: %s = %s%n" +
"%s",
getDoc(),
getAnnotationIOClass().getSimpleName(),
@@ -66,7 +66,7 @@ public abstract class ArgumentField {
isRequired(),
getExclusiveOf(),
getValidation(),
- getGatherAnnotation(), getFieldName(), getFieldType(), getDefaultValue(),
+ getGatherAnnotation(), getPrivacy(), getFieldName(), getFieldType(), getDefaultValue(),
getDefineAddition());
}
@@ -143,6 +143,9 @@ public abstract class ArgumentField {
/** @return True if this field uses @Gather. */
public boolean isGather() { return false; }
+ /** @return Privacy for the field. */
+ protected String getPrivacy() { return ""; }
+
/** @return The raw field name, which will be checked against scala build in types. */
protected abstract String getRawFieldName();
/** @return The field name checked against reserved words. */
diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java
index 9c40fb976..a3f80af1c 100644
--- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java
+++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010, The Broad Institute
+ * Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@@ -34,13 +34,11 @@ import org.broadinstitute.sting.commandline.ParsingEngine;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.WalkerManager;
-import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.filters.FilterManager;
import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
-import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.walkers.PartitionBy;
import org.broadinstitute.sting.gatk.walkers.PartitionType;
import org.broadinstitute.sting.gatk.walkers.Walker;
@@ -85,7 +83,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
"%n" +
"/** A dynamicly generated list of classes that the GATK Extensions depend on, but are not be detected by default by BCEL. */%n" +
"class %s {%n" +
- "val types = List(%n%s)%n" +
+ "val types = Seq(%n%s)%n" +
"}%n";
@Output(fullName="output_directory", shortName="outDir", doc="Directory to output the generated scala", required=true)
@@ -95,10 +93,6 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
GenomeAnalysisEngine GATKEngine = new GenomeAnalysisEngine();
WalkerManager walkerManager = new WalkerManager();
FilterManager filterManager = new FilterManager();
- // HACK: We're currently relying on the fact that RMDTrackBuilder is used only from RMD type lookups, not
- // RMD track location. Therefore, no sequence dictionary is required. In the future, we should separate
- // RMD track lookups from track creation.
- RMDTrackBuilder trackBuilder = new RMDTrackBuilder(null,null,ValidationExclusion.TYPE.ALL);
/**
* Required main method implementation.
@@ -147,7 +141,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
String clpConstructor = String.format("analysisName = \"%s\"%njavaMainClass = \"%s\"%n", clpClassName, clp.getName());
writeClass("org.broadinstitute.sting.queue.function.JavaCommandLineFunction", clpClassName,
- false, clpConstructor, ArgumentDefinitionField.getArgumentFields(parser,clp), dependents, false);
+ false, clpConstructor, ArgumentDefinitionField.getArgumentFields(parser,clp), dependents);
if (clp == CommandLineGATK.class) {
for (Entry>> walkersByPackage: walkerManager.getWalkerNamesByPackage(false).entrySet()) {
@@ -169,7 +163,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
}
writeClass(GATK_EXTENSIONS_PACKAGE_NAME + "." + clpClassName, walkerName,
- isScatter, constructor, argumentFields, dependents, true);
+ isScatter, constructor, argumentFields, dependents);
} catch (Exception e) {
throw new ReviewedStingException("Error generating wrappers for walker " + walkerType, e);
}
@@ -242,8 +236,8 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
*/
private void writeClass(String baseClass, String className, boolean isScatter,
String constructor, List extends ArgumentField> argumentFields,
- Set> dependents, boolean isGATKWalker) throws IOException {
- String content = getContent(CLASS_TEMPLATE, baseClass, className, constructor, isScatter, "", argumentFields, dependents, isGATKWalker);
+ Set> dependents) throws IOException {
+ String content = getContent(CLASS_TEMPLATE, baseClass, className, constructor, isScatter, "", argumentFields, dependents);
writeFile(GATK_EXTENSIONS_PACKAGE_NAME + "." + className, content);
}
@@ -257,7 +251,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
*/
private void writeFilter(String className, List extends ArgumentField> argumentFields, Set> dependents) throws IOException {
String content = getContent(TRAIT_TEMPLATE, "org.broadinstitute.sting.queue.function.CommandLineFunction",
- className, "", false, String.format(" + \" -read_filter %s\"", className), argumentFields, dependents, false);
+ className, "", false, String.format(" + \" -read_filter %s\"", className), argumentFields, dependents);
writeFile(GATK_EXTENSIONS_PACKAGE_NAME + "." + className, content);
}
@@ -351,8 +345,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
*/
private static String getContent(String scalaTemplate, String baseClass, String className,
String constructor, boolean isScatter, String commandLinePrefix,
- List extends ArgumentField> argumentFields, Set> dependents,
- boolean isGATKWalker) {
+ List extends ArgumentField> argumentFields, Set> dependents) {
StringBuilder arguments = new StringBuilder();
StringBuilder commandLine = new StringBuilder(commandLinePrefix);
@@ -376,9 +369,6 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
if (isGather)
importSet.add("import org.broadinstitute.sting.commandline.Gather");
- // Needed for ShellUtils.escapeShellArgument()
- importSet.add("import org.broadinstitute.sting.queue.util.ShellUtils");
-
// Sort the imports so that the are always in the same order.
List sortedImports = new ArrayList(importSet);
Collections.sort(sortedImports);
@@ -386,10 +376,8 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
StringBuffer freezeFieldOverride = new StringBuffer();
for (String freezeField: freezeFields)
freezeFieldOverride.append(freezeField);
- if (freezeFieldOverride.length() > 0 || isGATKWalker) {
- freezeFieldOverride.insert(0, String.format("override def freezeFieldValues = {%nsuper.freezeFieldValues%n"));
- if ( isGATKWalker )
- freezeFieldOverride.append(String.format("if ( num_threads.isDefined ) nCoresRequest = num_threads%n"));
+ if (freezeFieldOverride.length() > 0) {
+ freezeFieldOverride.insert(0, String.format("override def freezeFieldValues() {%nsuper.freezeFieldValues()%n"));
freezeFieldOverride.append(String.format("}%n%n"));
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java b/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java
index 345161416..6941b888b 100644
--- a/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java
+++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java
@@ -145,7 +145,7 @@ public class GenomeLoc implements Comparable, Serializable, HasGenome
}
return new GenomeLoc(getContig(), this.contigIndex,
- Math.min(getStart(), that.getStart()),
+ Math.min( getStart(), that.getStart() ),
Math.max( getStop(), that.getStop()) );
}
@@ -465,4 +465,8 @@ public class GenomeLoc implements Comparable, Serializable, HasGenome
private final static double overlapPercent(final GenomeLoc gl1, final GenomeLoc gl2) {
return (1.0 * gl1.intersect(gl2).size()) / gl1.size();
}
+
+ public long sizeOfOverlap( final GenomeLoc that ) {
+ return ( this.overlapsP(that) ? Math.min( getStop(), that.getStop() ) - Math.max( getStart(), that.getStart() ) : 0L );
+ }
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
index 737f4bb5f..408faf61a 100644
--- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
@@ -56,17 +56,62 @@ public class MathUtils {
private MathUtils() {
}
- @Requires({"d > 0.0"})
- public static int fastPositiveRound(double d) {
- return (int) (d + 0.5);
+ // A fast implementation of the Math.round() method. This method does not perform
+ // under/overflow checking, so this shouldn't be used in the general case (but is fine
+ // if one is already make those checks before calling in to the rounding).
+ public static int fastRound(double d) {
+ return (d > 0) ? (int)(d + 0.5d) : (int)(d - 0.5d);
}
- public static int fastRound(double d) {
- if (d > 0.0) {
- return fastPositiveRound(d);
- } else {
- return -1 * fastPositiveRound(-1 * d);
+ public static double approximateLog10SumLog10(final double[] vals) {
+ return approximateLog10SumLog10(vals, vals.length);
+ }
+
+ public static double approximateLog10SumLog10(final double[] vals, final int endIndex) {
+
+ final int maxElementIndex = MathUtils.maxElementIndex(vals, endIndex);
+ double approxSum = vals[maxElementIndex];
+ if ( approxSum == Double.NEGATIVE_INFINITY )
+ return approxSum;
+
+ for ( int i = 0; i < endIndex; i++ ) {
+ if ( i == maxElementIndex || vals[i] == Double.NEGATIVE_INFINITY )
+ continue;
+
+ final double diff = approxSum - vals[i];
+ if ( diff < MathUtils.MAX_JACOBIAN_TOLERANCE ) {
+ // See notes from the 2-inout implementation below
+ final int ind = fastRound(diff / MathUtils.JACOBIAN_LOG_TABLE_STEP); // hard rounding
+ approxSum += MathUtils.jacobianLogTable[ind];
+ }
+ }
+
+ return approxSum;
+ }
+
+ public static double approximateLog10SumLog10(double small, double big) {
+ // make sure small is really the smaller value
+ if ( small > big ) {
+ final double t = big;
+ big = small;
+ small = t;
}
+
+ if ( small == Double.NEGATIVE_INFINITY || big == Double.NEGATIVE_INFINITY )
+ return big;
+
+ final double diff = big - small;
+ if ( diff >= MathUtils.MAX_JACOBIAN_TOLERANCE )
+ return big;
+
+ // OK, so |y-x| < tol: we use the following identity then:
+ // we need to compute log10(10^x + 10^y)
+ // By Jacobian logarithm identity, this is equal to
+ // max(x,y) + log10(1+10^-abs(x-y))
+ // we compute the second term as a table lookup with integer quantization
+ // we have pre-stored correction for 0,0.1,0.2,... 10.0
+ final int ind = fastRound(diff / MathUtils.JACOBIAN_LOG_TABLE_STEP); // hard rounding
+ return big + MathUtils.jacobianLogTable[ind];
}
public static double sum(Collection numbers) {
@@ -541,11 +586,15 @@ public class MathUtils {
return normalizeFromLog10(array, false);
}
- public static int maxElementIndex(double[] array) {
+ public static int maxElementIndex(final double[] array) {
+ return maxElementIndex(array, array.length);
+ }
+
+ public static int maxElementIndex(final double[] array, final int endIndex) {
if (array == null) throw new IllegalArgumentException("Array cannot be null!");
int maxI = -1;
- for (int i = 0; i < array.length; i++) {
+ for (int i = 0; i < endIndex; i++) {
if (maxI == -1 || array[i] > array[maxI])
maxI = i;
}
@@ -553,11 +602,15 @@ public class MathUtils {
return maxI;
}
- public static int maxElementIndex(int[] array) {
+ public static int maxElementIndex(final int[] array) {
+ return maxElementIndex(array, array.length);
+ }
+
+ public static int maxElementIndex(final int[] array, int endIndex) {
if (array == null) throw new IllegalArgumentException("Array cannot be null!");
int maxI = -1;
- for (int i = 0; i < array.length; i++) {
+ for (int i = 0; i < endIndex; i++) {
if (maxI == -1 || array[i] > array[maxI])
maxI = i;
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRead.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRead.java
new file mode 100644
index 000000000..8d08a29b6
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRead.java
@@ -0,0 +1,19 @@
+package org.broadinstitute.sting.utils.activeregion;
+
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: 1/4/12
+ */
+
+public class ActiveRead {
+ final public GATKSAMRecord read;
+ final public boolean isPrimaryRegion;
+
+ ActiveRead( final GATKSAMRecord read, final boolean isPrimaryRegion ) {
+ this.read = read;
+ this.isPrimaryRegion = isPrimaryRegion;
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java
new file mode 100644
index 000000000..e8908480c
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java
@@ -0,0 +1,55 @@
+package org.broadinstitute.sting.utils.activeregion;
+
+import net.sf.picard.reference.IndexedFastaSequenceFile;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocParser;
+import org.broadinstitute.sting.utils.HasGenomeLocation;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+
+import java.util.ArrayList;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: 1/4/12
+ */
+
+public class ActiveRegion implements HasGenomeLocation {
+
+ private final ArrayList reads = new ArrayList();
+ private byte[] reference = null;
+ private final GenomeLoc loc;
+ private GenomeLoc referenceLoc = null;
+ private final GenomeLocParser genomeLocParser;
+ public final boolean isActive;
+
+ public ActiveRegion( final GenomeLoc loc, final boolean isActive, final GenomeLocParser genomeLocParser ) {
+ this.loc = loc;
+ this.isActive = isActive;
+ this.genomeLocParser = genomeLocParser;
+ referenceLoc = loc;
+ }
+
+ // add each read to the bin and extend the reference genome loc if needed
+ public void add( final GATKSAMRecord read, final boolean isPrimaryRegion ) {
+ referenceLoc = referenceLoc.union( genomeLocParser.createGenomeLoc( read ) );
+ reads.add( new ActiveRead(read, isPrimaryRegion) );
+ }
+
+ public ArrayList getReads() { return reads; }
+
+ public byte[] getReference( final IndexedFastaSequenceFile referenceReader ) {
+ // set up the reference if we haven't done so yet
+ if ( reference == null ) {
+ reference = referenceReader.getSubsequenceAt(referenceLoc.getContig(), referenceLoc.getStart(), referenceLoc.getStop()).getBases();
+ }
+
+ return reference;
+ }
+
+ public GenomeLoc getLocation() { return loc; }
+
+ public GenomeLoc getReferenceLocation() { return referenceLoc; }
+
+ public int size() { return reads.size(); }
+}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/SortingVCFWriterBase.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/SortingVCFWriterBase.java
index c299511db..84ecc7fcd 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/SortingVCFWriterBase.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/SortingVCFWriterBase.java
@@ -27,10 +27,8 @@ package org.broadinstitute.sting.utils.codecs.vcf;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
-import java.util.Comparator;
-import java.util.PriorityQueue;
-import java.util.Set;
-import java.util.TreeSet;
+import java.util.*;
+import java.util.concurrent.PriorityBlockingQueue;
/**
* This class writes VCF files, allowing records to be passed in unsorted.
@@ -39,20 +37,26 @@ import java.util.TreeSet;
public abstract class SortingVCFWriterBase implements VCFWriter {
// The VCFWriter to which to actually write the sorted VCF records
- private VCFWriter innerWriter = null;
+ private final VCFWriter innerWriter;
// the current queue of un-emitted records
- private PriorityQueue queue = null;
+ private final Queue queue;
// The locus until which we are permitted to write out (inclusive)
protected Integer mostUpstreamWritableLoc;
protected static final int BEFORE_MOST_UPSTREAM_LOC = 0; // No real locus index is <= 0
// The set of chromosomes already passed over and to which it is forbidden to return
- private Set finishedChromosomes = null;
+ private final Set finishedChromosomes;
// Should we call innerWriter.close() in close()
- private boolean takeOwnershipOfInner;
+ private final boolean takeOwnershipOfInner;
+
+ // --------------------------------------------------------------------------------
+ //
+ // Constructors
+ //
+ // --------------------------------------------------------------------------------
/**
* create a local-sorting VCF writer, given an inner VCF writer to write to
@@ -62,16 +66,27 @@ public abstract class SortingVCFWriterBase implements VCFWriter {
*/
public SortingVCFWriterBase(VCFWriter innerWriter, boolean takeOwnershipOfInner) {
this.innerWriter = innerWriter;
- this.queue = new PriorityQueue(50, new VariantContextComparator());
- this.mostUpstreamWritableLoc = BEFORE_MOST_UPSTREAM_LOC;
this.finishedChromosomes = new TreeSet();
this.takeOwnershipOfInner = takeOwnershipOfInner;
+
+ // has to be PriorityBlockingQueue to be thread-safe
+ // see http://getsatisfaction.com/gsa/topics/missing_loci_output_in_multi_thread_mode_when_implement_sortingvcfwriterbase?utm_content=topic_link&utm_medium=email&utm_source=new_topic
+ this.queue = new PriorityBlockingQueue(50, new VariantContextComparator());
+
+ this.mostUpstreamWritableLoc = BEFORE_MOST_UPSTREAM_LOC;
}
public SortingVCFWriterBase(VCFWriter innerWriter) {
this(innerWriter, false); // by default, don't own inner
}
+ // --------------------------------------------------------------------------------
+ //
+ // public interface functions
+ //
+ // --------------------------------------------------------------------------------
+
+ @Override
public void writeHeader(VCFHeader header) {
innerWriter.writeHeader(header);
}
@@ -79,6 +94,7 @@ public abstract class SortingVCFWriterBase implements VCFWriter {
/**
* attempt to close the VCF file; we need to flush the queue first
*/
+ @Override
public void close() {
stopWaitingToSort();
@@ -86,27 +102,14 @@ public abstract class SortingVCFWriterBase implements VCFWriter {
innerWriter.close();
}
- private void stopWaitingToSort() {
- emitRecords(true);
- mostUpstreamWritableLoc = BEFORE_MOST_UPSTREAM_LOC;
- }
-
- protected void emitSafeRecords() {
- emitRecords(false);
- }
-
- protected void noteCurrentRecord(VariantContext vc) {
- // did the user break the contract by giving a record too late?
- if (mostUpstreamWritableLoc != null && vc.getStart() < mostUpstreamWritableLoc) // went too far back, since may have already written anything that is <= mostUpstreamWritableLoc
- throw new IllegalArgumentException("Permitted to write any record upstream of position " + mostUpstreamWritableLoc + ", but a record at " + vc.getChr() + ":" + vc.getStart() + " was just added.");
- }
/**
* add a record to the file
*
* @param vc the Variant Context object
*/
- public void add(VariantContext vc) {
+ @Override
+ public synchronized void add(VariantContext vc) {
/* Note that the code below does not prevent the successive add()-ing of: (chr1, 10), (chr20, 200), (chr15, 100)
since there is no implicit ordering of chromosomes:
*/
@@ -125,7 +128,43 @@ public abstract class SortingVCFWriterBase implements VCFWriter {
emitSafeRecords();
}
- private void emitRecords(boolean emitUnsafe) {
+ /**
+ * Gets a string representation of this object.
+ * @return a string representation of this object
+ */
+ @Override
+ public String toString() {
+ return getClass().getName();
+ }
+
+ // --------------------------------------------------------------------------------
+ //
+ // protected interface functions for subclasses to use
+ //
+ // --------------------------------------------------------------------------------
+
+ private synchronized void stopWaitingToSort() {
+ emitRecords(true);
+ mostUpstreamWritableLoc = BEFORE_MOST_UPSTREAM_LOC;
+ }
+
+ protected synchronized void emitSafeRecords() {
+ emitRecords(false);
+ }
+
+ protected void noteCurrentRecord(VariantContext vc) {
+ // did the user break the contract by giving a record too late?
+ if (mostUpstreamWritableLoc != null && vc.getStart() < mostUpstreamWritableLoc) // went too far back, since may have already written anything that is <= mostUpstreamWritableLoc
+ throw new IllegalArgumentException("Permitted to write any record upstream of position " + mostUpstreamWritableLoc + ", but a record at " + vc.getChr() + ":" + vc.getStart() + " was just added.");
+ }
+
+ // --------------------------------------------------------------------------------
+ //
+ // private implementation functions
+ //
+ // --------------------------------------------------------------------------------
+
+ private synchronized void emitRecords(boolean emitUnsafe) {
while (!queue.isEmpty()) {
VCFRecord firstRec = queue.peek();
@@ -140,15 +179,6 @@ public abstract class SortingVCFWriterBase implements VCFWriter {
}
}
- /**
- * Gets a string representation of this object.
- * @return a string representation of this object
- */
- @Override
- public String toString() {
- return getClass().getName();
- }
-
private static class VariantContextComparator implements Comparator {
public int compare(VCFRecord r1, VCFRecord r2) {
return r1.vc.getStart() - r2.vc.getStart();
diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java
index 7fa2f6230..cc0b1ae67 100755
--- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java
@@ -186,17 +186,21 @@ public class ReadUtils {
* @return the reference coordinate for the adaptor boundary (effectively the first base IN the adaptor, closest to the read. NULL if the read is unmapped or the mate is mapped to another contig.
*/
public static Integer getAdaptorBoundary(final SAMRecord read) {
+ final int MAXIMUM_ADAPTOR_LENGTH = 8;
final int insertSize = Math.abs(read.getInferredInsertSize()); // the inferred insert size can be negative if the mate is mapped before the read (so we take the absolute value)
- if (insertSize == 0 || read.getReadUnmappedFlag()) // no adaptors in reads with mates in another
- return null; // chromosome or unmapped pairs
-
- int adaptorBoundary; // the reference coordinate for the adaptor boundary (effectively the first base IN the adaptor, closest to the read)
+ if (insertSize == 0 || read.getReadUnmappedFlag()) // no adaptors in reads with mates in another chromosome or unmapped pairs
+ return null;
+
+ Integer adaptorBoundary; // the reference coordinate for the adaptor boundary (effectively the first base IN the adaptor, closest to the read)
if (read.getReadNegativeStrandFlag())
adaptorBoundary = read.getMateAlignmentStart() - 1; // case 1 (see header)
else
adaptorBoundary = read.getAlignmentStart() + insertSize + 1; // case 2 (see header)
+ if ( (adaptorBoundary < read.getAlignmentStart() - MAXIMUM_ADAPTOR_LENGTH) || (adaptorBoundary > read.getAlignmentEnd() + MAXIMUM_ADAPTOR_LENGTH) )
+ adaptorBoundary = null; // we are being conservative by not allowing the adaptor boundary to go beyond what we belive is the maximum size of an adaptor
+
return adaptorBoundary;
}
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsUnitTest.java
similarity index 74%
rename from public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerUnitTest.java
rename to public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsUnitTest.java
index 8cd10048a..0fcaad3bf 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsUnitTest.java
@@ -1,20 +1,19 @@
package org.broadinstitute.sting.gatk.walkers;
+import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.sam.ArtificialReadsTraversal;
import org.broadinstitute.sting.utils.sam.ArtificialSAMFileWriter;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
-import net.sf.samtools.SAMRecord;
-import net.sf.samtools.SAMFileHeader;
-
-import static org.testng.Assert.assertEquals;
-import static org.testng.Assert.assertTrue;
-
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+
/*
* Copyright (c) 2009 The Broad Institute
@@ -44,11 +43,11 @@ import org.testng.annotations.Test;
/**
* @author aaron
*
- * Class PrintReadsWalkerUnitTest
+ * Class PrintReadsUnitTest
*
* This tests the print reads walker, using the artificial reads traversal
*/
-public class PrintReadsWalkerUnitTest extends BaseTest {
+public class PrintReadsUnitTest extends BaseTest {
/**
* our private fake reads traversal. This traversal seeds the
@@ -60,19 +59,23 @@ public class PrintReadsWalkerUnitTest extends BaseTest {
private ReferenceContext bases = null;
//private ReferenceContext ref = new ReferenceContext()
+ PrintReadsWalker walker;
+ ArtificialSAMFileWriter writer;
+
@BeforeMethod
public void before() {
trav = new ArtificialReadsTraversal();
readTotal = ( ( trav.endingChr - trav.startingChr ) + 1 ) * trav.readsPerChr + trav.unMappedReads;
+
+ walker = new PrintReadsWalker();
+ writer = new ArtificialSAMFileWriter();
+ walker.out = writer;
+ walker.initialize();
}
/** test that we get out the same number of reads we put in */
@Test
public void testReadCount() {
- PrintReadsWalker walker = new PrintReadsWalker();
- ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter();
- walker.out = writer;
-
trav.traverse(walker, null, writer);
assertEquals(writer.getRecords().size(), readTotal);
}
@@ -80,10 +83,6 @@ public class PrintReadsWalkerUnitTest extends BaseTest {
/** test that we're ok with a null read */
@Test
public void testNullRead() {
- PrintReadsWalker walker = new PrintReadsWalker();
- ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter();
- walker.out = writer;
-
SAMRecord rec = walker.map(bases, null, null);
assertTrue(rec == null);
}
@@ -91,10 +90,6 @@ public class PrintReadsWalkerUnitTest extends BaseTest {
/** tes that we get the read we put into the map function */
@Test
public void testReturnRead() {
- PrintReadsWalker walker = new PrintReadsWalker();
- ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter();
- walker.out = writer;
-
SAMFileHeader head = ArtificialSAMUtils.createArtificialSamHeader(3,1,1000);
GATKSAMRecord rec = ArtificialSAMUtils.createArtificialRead(head, "FakeRead", 1, 1, 50);
SAMRecord ret = walker.map(bases, rec, null);
@@ -102,20 +97,6 @@ public class PrintReadsWalkerUnitTest extends BaseTest {
assertTrue(ret.getReadName().equals(rec.getReadName()));
}
- /** test that the read makes it to the output source */
- @Test
- public void testReducingRead() {
- PrintReadsWalker walker = new PrintReadsWalker();
- ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter();
- walker.out = writer;
-
- SAMFileHeader head = ArtificialSAMUtils.createArtificialSamHeader(3,1,1000);
- SAMRecord rec = ArtificialSAMUtils.createArtificialRead(head, "FakeRead", 1, 1, 50);
- SAMRecord ret = walker.map(bases, null,null);
- walker.reduce(ret,writer);
-
- assertTrue(writer.getRecords().size() == 1);
- }
}
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
index 0aec94663..174a46bdd 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
@@ -32,7 +32,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testHasAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
- Arrays.asList("e70eb5f80c93e366dcbe3cf684c154e4"));
+ Arrays.asList("604328867fc9aaf3e71fa0f9ca2ba5c9"));
executeTest("test file has annotations, asking for annotations, #1", spec);
}
@@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testNoAnnotsAsking1() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
- Arrays.asList("1e52761fdff73a5361b5eb0a6e5d9dad"));
+ Arrays.asList("bbde8c92d27ad2a7ec1ff2d095d459eb"));
executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
}
@@ -82,7 +82,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testExcludeAnnotations() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
- Arrays.asList("bb4eebfaffc230cb8a31e62e7b53a300"));
+ Arrays.asList("8ec9f79cab84f26d8250f00d99d18aac"));
executeTest("test exclude annotations", spec);
}
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalkerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalkerIntegrationTest.java
index 02332b64e..c9e91e664 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalkerIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalkerIntegrationTest.java
@@ -62,7 +62,7 @@ public class CallableLociWalkerIntegrationTest extends WalkerTest {
public void testCallableLociWalker3() {
String gatk_args = commonArgs + " -format BED -L 1:10,000,000-11,000,000 -minDepth 10 -maxDepth 100 --minBaseQuality 10 --minMappingQuality 20 -summary %s";
WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2,
- Arrays.asList("4496551d4493857e5153d8172965e527", "b0667e31af9aec02eaf73ca73ec16937"));
+ Arrays.asList("b7d26a470ef906590249f2fa45fd6bdd", "da431d393f7c2b2b3e27556b86c1dbc7"));
executeTest("formatBed lots of arguments", spec);
}
}
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java
index f2f72978f..1c58346b4 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java
@@ -55,25 +55,25 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest {
spec.setOutputFileLocation(baseOutputFile);
// now add the expected files that get generated
- spec.addAuxFile("2f072fd8b41b5ac1108797f89376c797", baseOutputFile);
- spec.addAuxFile("d17ac7cc0b58ba801d2b0727a363d615", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_counts"));
- spec.addAuxFile("c05190c9e6239cdb1cd486edcbc23505", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_proportions"));
+ spec.addAuxFile("0f9603eb1ca4a26828e82d8c8f4991f6", baseOutputFile);
+ spec.addAuxFile("51e6c09a307654f43811af35238fb179", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_counts"));
+ spec.addAuxFile("229b9b5bc2141c86dbc69c8acc9eba6a", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_proportions"));
spec.addAuxFile("9cd395f47b329b9dd00ad024fcac9929", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_statistics"));
- spec.addAuxFile("c94a52b4e73a7995319e0b570c80d2f7", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary"));
- spec.addAuxFile("1970a44efb7ace4e51a37f0bd2dc84d1", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_statistics"));
- spec.addAuxFile("c321c542be25359d2e26d45cbeb6d7ab", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_summary"));
- spec.addAuxFile("9023cc8939777d515cd2895919a99688", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_counts"));
- spec.addAuxFile("3597b69e90742c5dd7c83fbc74d079f3", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_proportions"));
+ spec.addAuxFile("e69ee59f447816c025c09a56e321cef8", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary"));
+ spec.addAuxFile("fa054b665d1ae537ada719da7713e11b", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_statistics"));
+ spec.addAuxFile("28dec9383b3a323a5ce7d96d62712917", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_summary"));
+ spec.addAuxFile("a836b92ac17b8ff9788e2aaa9116b5d4", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_counts"));
+ spec.addAuxFile("d32a8c425fadcc4c048bd8b48d0f61e5", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_proportions"));
spec.addAuxFile("7b9d0e93bf5b5313995be7010ef1f528", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_statistics"));
- spec.addAuxFile("1a6ea3aa759fb154ccc4e171ebca9d02", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_summary"));
- spec.addAuxFile("b492644ff06b4ffb044d5075cd168abf", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_statistics"));
- spec.addAuxFile("77cef87dc4083a7b60b7a7b38b4c0bd8", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_summary"));
- spec.addAuxFile("8e1adbe37b98bb2271ba13932d5c947f", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_counts"));
- spec.addAuxFile("761d2f9daf2ebaf43abf65c8fd2fcd05", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_proportions"));
+ spec.addAuxFile("4656c8797696cf5ef0cdc5971271236a", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_summary"));
+ spec.addAuxFile("6f1d7f2120a4ac524c6026498f45295a", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_statistics"));
+ spec.addAuxFile("69c424bca013159942337b67fdf31ff8", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_summary"));
+ spec.addAuxFile("6909d50a7da337cd294828b32b945eb8", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_counts"));
+ spec.addAuxFile("a395dafde101971d2b9e5ddb6cd4b7d0", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_proportions"));
spec.addAuxFile("df0ba76e0e6082c0d29fcfd68efc6b77", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_statistics"));
- spec.addAuxFile("0582b4681dbc02ece2dfe2752dcfd228", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary"));
- spec.addAuxFile("0685214965bf1863f7ce8de2e38af060", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics"));
- spec.addAuxFile("7a0cd8a5ebaaa82621fd3b5aed9c32fe", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary"));
+ spec.addAuxFile("185b910e499c08a8b88dd3ed1ac9e8ec", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary"));
+ spec.addAuxFile("d5d11b686689467b5a8836f0a07f447d", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics"));
+ spec.addAuxFile("ad1a2775a31b1634daf64e691676bb96", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary"));
execute("testBaseOutputNoFiltering",spec);
}
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
index 7c0dba558..32fa8679e 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@@ -28,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
- Arrays.asList("66ed60c6c1190754abd8a0a9d1d8d61e"));
+ Arrays.asList("d61c7055bd09024abb8902bde6bd3960"));
executeTest("test MultiSample Pilot1", spec);
}
@@ -189,7 +189,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,100,000",
1,
- Arrays.asList("2b2729414ae855d390e7940956745bce"));
+ Arrays.asList("f0fbe472f155baf594b1eeb58166edef"));
executeTest(String.format("test multiple technologies"), spec);
}
@@ -208,7 +208,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -L 1:10,000,000-10,100,000" +
" -baq CALCULATE_AS_NECESSARY",
1,
- Arrays.asList("95c6120efb92e5a325a5cec7d77c2dab"));
+ Arrays.asList("8c87c749a7bb5a76ed8504d4ec254272"));
executeTest(String.format("test calling with BAQ"), spec);
}
@@ -265,7 +265,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
- Arrays.asList("fa4f3ee67d98b64102a8a3ec81a3bc81"));
+ Arrays.asList("c60a44ba94a80a0cb1fba8b6f90a13cd"));
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1);
}
@@ -275,7 +275,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
- Arrays.asList("df90890e43d735573a3b3e4f289ca46b"));
+ Arrays.asList("36ce53ae4319718ad9c8ae391deebc8c"));
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
}
@@ -285,7 +285,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
- Arrays.asList("cff6dd0f4eb1ef0b6fc476da6ffead19"));
+ Arrays.asList("d356cbaf240d7025d1aecdabaff3a3e0"));
executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
}
@@ -294,7 +294,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
- Arrays.asList("1e2a4aab26e9ab0dae709d33a669e036"));
+ Arrays.asList("fcd590a55f5fec2a9b7e628187d6b8a8"));
executeTest("test MultiSample Phase1 indels with complicated records", spec4);
}
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java
index 65de6697b..b53daaf39 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java
@@ -34,8 +34,8 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
new CCTest( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "ab4940a16ab990181bd8368c76b23853" );
new CCTest( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "17d4b8001c982a70185e344929cf3941");
- new CCTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "36c0c467b6245c2c6c4e9c956443a154" );
- new CCTest( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "955a8fa2ddb2b04c406766ccd9ac45cc" );
+ new CCTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "714e65d6cb51ae32221a77ce84cbbcdc" );
+ new CCTest( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "932f0063abb2a23c22ec992ef8d36aa5" );
return CCTest.getTests(CCTest.class);
}
@@ -91,8 +91,8 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
public Object[][] createTRTestData() {
new TRTest( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "0b7123ae9f4155484b68e4a4f96c5504" );
new TRTest( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "d04cf1f6df486e45226ebfbf93a188a5");
- new TRTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "b2f4757bc47cf23bd9a09f756c250787" );
- new TRTest( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "502c7df4d4923c4d078b014bf78bed34" );
+ new TRTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "74314e5562c1a65547bb0edaacffe602" );
+ new TRTest( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "41c2f82f7789421f3690ed3c35b8f2e4" );
return TRTest.getTests(TRTest.class);
}
@@ -291,7 +291,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testCountCovariatesNoIndex() {
HashMap e = new HashMap();
- e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "828d247c6e8ef5ebdf3603dc0ce79f61" );
+ e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "aac7df368ca589dc0a66d5bd9ad007e3" );
for ( Map.Entry entry : e.entrySet() ) {
String bam = entry.getKey();
@@ -317,7 +317,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test(dependsOnMethods = "testCountCovariatesNoIndex")
public void testTableRecalibratorNoIndex() {
HashMap e = new HashMap();
- e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "991f093a0e610df235d28ada418ebf33" );
+ e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "02249d9933481052df75c58a2a1a8e63" );
for ( Map.Entry entry : e.entrySet() ) {
String bam = entry.getKey();
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
index b3555b145..5c3a43c96 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
@@ -450,4 +450,21 @@ public class VariantEvalIntegrationTest extends WalkerTest {
);
executeTest("testIntervalStrat", spec);
}
+
+ @Test
+ public void testModernVCFWithLargeIndels() {
+ WalkerTestSpec spec = new WalkerTestSpec(
+ buildCommandLine(
+ "-T VariantEval",
+ "-R " + b37KGReference,
+ "-eval " + validationDataLocation + "/NA12878.HiSeq.WGS.b37_decoy.indel.recalibrated.vcf",
+ "-L 20",
+ "-D " + b37dbSNP132,
+ "-o %s"
+ ),
+ 1,
+ Arrays.asList("a6f8b32fa732632da13dfe3ddcc73cef")
+ );
+ executeTest("testModernVCFWithLargeIndels", spec);
+ }
}
diff --git a/public/java/test/org/broadinstitute/sting/utils/interval/IntervalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/interval/IntervalIntegrationTest.java
index 3fb330853..a8364419d 100644
--- a/public/java/test/org/broadinstitute/sting/utils/interval/IntervalIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/interval/IntervalIntegrationTest.java
@@ -83,6 +83,28 @@ public class IntervalIntegrationTest extends WalkerTest {
executeTest("testUnmappedReadInclusion",spec);
}
+ @Test
+ public void testMixedMappedAndUnmapped() {
+ WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
+ "-T PrintReads" +
+ " -I " + validationDataLocation + "MV1994.bam" +
+ " -R " + validationDataLocation + "Escherichia_coli_K12_MG1655.fasta" +
+ " -L Escherichia_coli_K12:4630000-4639675" +
+ " -L unmapped" +
+ " -U",
+ 0, // two output files
+ Collections.emptyList());
+
+ // our base file
+ File baseOutputFile = createTempFile("testUnmappedReadInclusion",".bam");
+ spec.setOutputFileLocation(baseOutputFile);
+ spec.addAuxFile("083ef1e9ded868e0d12c05a1354c0319",createTempFileFromBase(baseOutputFile.getAbsolutePath()));
+ spec.addAuxFile("fa90ff91ac0cc689c71a3460a3530b8b", createTempFileFromBase(baseOutputFile.getAbsolutePath().substring(0,baseOutputFile.getAbsolutePath().indexOf(".bam"))+".bai"));
+
+ executeTest("testUnmappedReadInclusion",spec);
+ }
+
+
@Test(enabled = false)
public void testUnmappedReadExclusion() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java
index b9f831028..367f6294d 100755
--- a/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java
@@ -103,10 +103,31 @@ public class ReadUtilsUnitTest extends BaseTest {
read.setReadNegativeStrandFlag(false);
boundary = ReadUtils.getAdaptorBoundary(read);
Assert.assertNull(boundary);
+ read.setInferredInsertSize(10);
// Test case 6: read is unmapped
read.setReadUnmappedFlag(true);
boundary = ReadUtils.getAdaptorBoundary(read);
Assert.assertNull(boundary);
+ read.setReadUnmappedFlag(false);
+
+ // Test case 7: reads don't overlap and look like this:
+ // <--------|
+ // |------>
+ // first read:
+ myStart = 980;
+ read.setAlignmentStart(myStart);
+ read.setInferredInsertSize(20);
+ read.setReadNegativeStrandFlag(true);
+ boundary = ReadUtils.getAdaptorBoundary(read);
+ Assert.assertNull(boundary);
+
+ // second read:
+ myStart = 1000;
+ read.setAlignmentStart(myStart);
+ read.setMateAlignmentStart(980);
+ read.setReadNegativeStrandFlag(false);
+ boundary = ReadUtils.getAdaptorBoundary(read);
+ Assert.assertNull(boundary);
}
}
diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala
index 621afe817..e26541e98 100755
--- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala
@@ -29,14 +29,14 @@ class DataProcessingPipeline extends QScript {
var reference: File = _
@Input(doc="dbsnp ROD to use (must be in VCF format)", fullName="dbsnp", shortName="D", required=true)
- var dbSNP: List[File] = List()
+ var dbSNP: Seq[File] = Seq()
/****************************************************************************
* Optional Parameters
****************************************************************************/
@Input(doc="extra VCF files to use as reference indels for Indel Realignment", fullName="extra_indels", shortName="indels", required=false)
- var indels: List[File] = List()
+ var indels: Seq[File] = Seq()
@Input(doc="The path to the binary of bwa (usually BAM files have already been mapped - but if you want to remap this is the option)", fullName="path_to_bwa", shortName="bwa", required=false)
var bwaPath: File = _
@@ -118,13 +118,13 @@ class DataProcessingPipeline extends QScript {
// Because the realignment only happens after these scripts are executed, in case you are using
// bwa realignment, this function will operate over the original bam files and output over the
// (to be realigned) bam files.
- def createSampleFiles(bamFiles: List[File], realignedBamFiles: List[File]): Map[String, List[File]] = {
+ def createSampleFiles(bamFiles: Seq[File], realignedBamFiles: Seq[File]): Map[String, Seq[File]] = {
// Creating a table with SAMPLE information from each input BAM file
- val sampleTable = scala.collection.mutable.Map.empty[String, List[File]]
+ val sampleTable = scala.collection.mutable.Map.empty[String, Seq[File]]
val realignedIterator = realignedBamFiles.iterator
for (bam <- bamFiles) {
- val rBam = realignedIterator.next // advance to next element in the realignedBam list so they're in sync.
+ val rBam = realignedIterator.next() // advance to next element in the realignedBam list so they're in sync.
val samReader = new SAMFileReader(bam)
val header = samReader.getFileHeader
@@ -138,12 +138,12 @@ class DataProcessingPipeline extends QScript {
for (rg <- readGroups) {
val sample = rg.getSample
if (!sampleTable.contains(sample))
- sampleTable(sample) = List(rBam)
+ sampleTable(sample) = Seq(rBam)
else if ( !sampleTable(sample).contains(rBam))
sampleTable(sample) :+= rBam
}
}
- return sampleTable.toMap
+ sampleTable.toMap
}
// Rebuilds the Read Group string to give BWA
@@ -161,8 +161,8 @@ class DataProcessingPipeline extends QScript {
// Takes a list of processed BAM files and realign them using the BWA option requested (bwase or bwape).
// Returns a list of realigned BAM files.
- def performAlignment(bams: List[File]): List[File] = {
- var realignedBams: List[File] = List()
+ def performAlignment(bams: Seq[File]): Seq[File] = {
+ var realignedBams: Seq[File] = Seq()
var index = 1
for (bam <- bams) {
// first revert the BAM file to the original qualities
@@ -194,10 +194,10 @@ class DataProcessingPipeline extends QScript {
realignedBams :+= rgRealignedBamFile
index = index + 1
}
- return realignedBams
+ realignedBams
}
- def getIndelCleaningModel(): ConsensusDeterminationModel = {
+ def getIndelCleaningModel: ConsensusDeterminationModel = {
if (cleaningModel == "KNOWNS_ONLY")
ConsensusDeterminationModel.KNOWNS_ONLY
else if (cleaningModel == "USE_SW")
@@ -206,17 +206,17 @@ class DataProcessingPipeline extends QScript {
ConsensusDeterminationModel.USE_READS
}
- def revertBams(bams: List[File], removeAlignmentInformation: Boolean): List[File] = {
- var revertedBAMList: List[File] = List()
+ def revertBams(bams: Seq[File], removeAlignmentInformation: Boolean): Seq[File] = {
+ var revertedBAMList: Seq[File] = Seq()
for (bam <- bams)
revertedBAMList :+= revertBAM(bam, removeAlignmentInformation)
- return revertedBAMList
+ revertedBAMList
}
def revertBAM(bam: File, removeAlignmentInformation: Boolean): File = {
val revertedBAM = swapExt(bam, ".bam", ".reverted.bam")
add(revert(bam, revertedBAM, removeAlignmentInformation))
- return revertedBAM
+ revertedBAM
}
/****************************************************************************
@@ -224,22 +224,22 @@ class DataProcessingPipeline extends QScript {
****************************************************************************/
- def script = {
+ def script() {
// final output list of processed bam files
- var cohortList: List[File] = List()
+ var cohortList: Seq[File] = Seq()
// sets the model for the Indel Realigner
- cleanModelEnum = getIndelCleaningModel()
+ cleanModelEnum = getIndelCleaningModel
// keep a record of the number of contigs in the first bam file in the list
- val bams = QScriptUtils.createListFromFile(input)
+ val bams = QScriptUtils.createSeqFromFile(input)
if (nContigs < 0)
nContigs = QScriptUtils.getNumberOfContigs(bams(0))
val realignedBAMs = if (useBWApe || useBWAse || useBWAsw) {performAlignment(bams)} else {revertBams(bams, false)}
// generate a BAM file per sample joining all per lane files if necessary
- val sampleBAMFiles: Map[String, List[File]] = createSampleFiles(bams, realignedBAMs)
+ val sampleBAMFiles: Map[String, Seq[File]] = createSampleFiles(bams, realignedBAMs)
// if this is a 'knowns only' indel realignment run, do it only once for all samples.
val globalIntervals = new File(outputDir + projectName + ".intervals")
@@ -317,7 +317,7 @@ class DataProcessingPipeline extends QScript {
this.maxRecordsInRam = 100000
}
- case class target (inBams: List[File], outIntervals: File) extends RealignerTargetCreator with CommandLineGATKArgs {
+ case class target (inBams: Seq[File], outIntervals: File) extends RealignerTargetCreator with CommandLineGATKArgs {
if (cleanModelEnum != ConsensusDeterminationModel.KNOWNS_ONLY)
this.input_file = inBams
this.out = outIntervals
@@ -330,7 +330,7 @@ class DataProcessingPipeline extends QScript {
this.jobName = queueLogDir + outIntervals + ".target"
}
- case class clean (inBams: List[File], tIntervals: File, outBam: File) extends IndelRealigner with CommandLineGATKArgs {
+ case class clean (inBams: Seq[File], tIntervals: File, outBam: File) extends IndelRealigner with CommandLineGATKArgs {
this.input_file = inBams
this.targetIntervals = tIntervals
this.out = outBam
@@ -347,11 +347,11 @@ class DataProcessingPipeline extends QScript {
case class cov (inBam: File, outRecalFile: File) extends CountCovariates with CommandLineGATKArgs {
this.knownSites ++= qscript.dbSNP
- this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate")
+ this.covariate ++= Seq("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate")
this.input_file :+= inBam
this.recal_file = outRecalFile
if (!defaultPlatform.isEmpty) this.default_platform = defaultPlatform
- if (!qscript.intervalString.isEmpty()) this.intervalsString ++= List(qscript.intervalString)
+ if (!qscript.intervalString.isEmpty) this.intervalsString ++= Seq(qscript.intervalString)
else if (qscript.intervals != null) this.intervals :+= qscript.intervals
this.scatterCount = nContigs
this.analysisName = queueLogDir + outRecalFile + ".covariates"
@@ -363,7 +363,7 @@ class DataProcessingPipeline extends QScript {
this.recal_file = inRecalFile
this.baq = CalculationMode.CALCULATE_AS_NECESSARY
this.out = outBam
- if (!qscript.intervalString.isEmpty()) this.intervalsString ++= List(qscript.intervalString)
+ if (!qscript.intervalString.isEmpty) this.intervalsString ++= Seq(qscript.intervalString)
else if (qscript.intervals != null) this.intervals :+= qscript.intervals
this.no_pg_tag = qscript.testMode
this.scatterCount = nContigs
@@ -395,7 +395,7 @@ class DataProcessingPipeline extends QScript {
this.jobName = queueLogDir + outBam + ".dedup"
}
- case class joinBams (inBams: List[File], outBam: File) extends MergeSamFiles with ExternalCommonArgs {
+ case class joinBams (inBams: Seq[File], outBam: File) extends MergeSamFiles with ExternalCommonArgs {
this.input = inBams
this.output = outBam
this.analysisName = queueLogDir + outBam + ".joinBams"
@@ -495,7 +495,7 @@ class DataProcessingPipeline extends QScript {
this.jobName = queueLogDir + outBam + ".bwasw"
}
- case class writeList(inBams: List[File], outBamList: File) extends ListWriterFunction {
+ case class writeList(inBams: Seq[File], outBamList: File) extends ListWriterFunction {
this.inputFiles = inBams
this.listFile = outBamList
this.analysisName = queueLogDir + outBamList + ".bamList"
diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala
index c06601a2d..2f0715ae9 100755
--- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala
@@ -46,20 +46,24 @@ class MethodsDevelopmentCallingPipeline extends QScript {
val bamList: File,
val goldStandard_VCF: File,
val intervals: String,
- val titvTarget: Double,
- val trancheTarget: Double,
+ val indelTranchTarget: Double,
+ val snpTrancheTarget: Double,
val isLowpass: Boolean,
val isExome: Boolean,
val nSamples: Int) {
val name = qscript.outputDir + baseName
val clusterFile = new File(name + ".clusters")
- val rawVCF = new File(name + ".raw.vcf")
+ val rawSnpVCF = new File(name + ".raw.vcf")
val rawIndelVCF = new File(name + ".raw.indel.vcf")
val filteredIndelVCF = new File(name + ".filtered.indel.vcf")
- val recalibratedVCF = new File(name + ".recalibrated.vcf")
- val tranchesFile = new File(name + ".tranches")
- val vqsrRscript = name + ".vqsr.r"
- val recalFile = new File(name + ".tranches.recal")
+ val recalibratedSnpVCF = new File(name + ".snp.recalibrated.vcf")
+ val recalibratedIndelVCF = new File(name + ".indel.recalibrated.vcf")
+ val tranchesSnpFile = new File(name + ".snp.tranches")
+ val tranchesIndelFile = new File(name + ".indel.tranches")
+ val vqsrSnpRscript = name + ".snp.vqsr.r"
+ val vqsrIndelRscript = name + ".indel.vqsr.r"
+ val recalSnpFile = new File(name + ".snp.tranches.recal")
+ val recalIndelFile = new File(name + ".indel.tranches.recal")
val goldStandardRecalibratedVCF = new File(name + "goldStandard.recalibrated.vcf")
val goldStandardTranchesFile = new File(name + "goldStandard.tranches")
val goldStandardRecalFile = new File(name + "goldStandard.tranches.recal")
@@ -88,6 +92,7 @@ class MethodsDevelopmentCallingPipeline extends QScript {
val training_1000G = "/humgen/1kg/processing/official_release/phase1/projectConsensus/phase1.wgs.projectConsensus.v2b.recal.highQuality.vcf"
val badSites_1000G = "/humgen/1kg/processing/official_release/phase1/projectConsensus/phase1.wgs.projectConsensus.v2b.recal.terrible.vcf"
val projectConsensus_1000G = "/humgen/1kg/processing/official_release/phase1/projectConsensus/ALL.wgs.projectConsensus_v2b.20101123.snps.sites.vcf"
+ val indelGoldStandardCallset = "/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/GoldStandardIndel/gold.standard.indel.MillsAnd1000G.b37.vcf"
val lowPass: Boolean = true
val exome: Boolean = true
@@ -101,69 +106,69 @@ class MethodsDevelopmentCallingPipeline extends QScript {
"NA12878_gold" -> new Target("NA12878.goldStandard", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-hpprojects/dev/carneiro/NA12878/data/goldStandard.list"),
new File("/humgen/gsa-hpprojects/dev/carneiro/NA12878/analysis/snps/NA12878.HiSeq19.filtered.vcf"), // ** There is no gold standard for the gold standard **
- "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 2.14, 99.0, lowPass, !exome, 391),
+ "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 90.0, 99.0, lowPass, !exome, 391),
"NA12878_wgs_b37" -> new Target("NA12878.HiSeq.WGS.b37", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.bam"),
new File("/humgen/gsa-hpprojects/dev/carneiro/NA12878/analysis/snps/NA12878.HiSeq19.filtered.vcf"),
- "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 2.14, 99.0, !lowPass, !exome, 1),
+ "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 90.0, 99.0, !lowPass, !exome, 1),
"NA12878_wgs_decoy" -> new Target("NA12878.HiSeq.WGS.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37_decoy.NA12878.clean.dedup.recal.bam"),
new File("/humgen/gsa-hpprojects/dev/carneiro/NA12878/analysis/snps/NA12878.HiSeq19.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
- "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 2.14, 99.0, !lowPass, !exome, 1),
+ "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 90.0, 99.0, !lowPass, !exome, 1),
"NA12878_wgs_hg18" -> new Target("NA12878.HiSeq.WGS.hg18", hg18, dbSNP_hg18_129, hapmap_hg18,
"/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/HiSeq.WGS.cleaned.indels.10.mask",
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam"),
new File("/home/radon01/depristo/work/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/HiSeq.WGS.cleaned.ug.snpfiltered.indelfiltered.vcf"),
- "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg18.intervals", 2.14, 99.0, !lowPass, !exome, 1),
+ "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg18.intervals", 90.0, 99.0, !lowPass, !exome, 1),
"NA12878_wex_b37" -> new Target("NA12878.HiSeq.WEx.b37", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/seq/picard_aggregation/C339/NA12878/v3/NA12878.bam"),
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
- "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 3.3, 98.0, !lowPass, exome, 1),
+ "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 90.0, 98.0, !lowPass, exome, 1),
"NA12878_wex_hg18" -> new Target("NA12878.HiSeq.WEx.hg18", hg18, dbSNP_hg18_129, hapmap_hg18,
"/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/GA2.WEx.cleaned.indels.10.mask",
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.WEx.cleaned.recal.bam"),
new File("/home/radon01/depristo/work/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.vcf"),
- "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.targets.interval_list", 3.3, 98.0, !lowPass, exome, 1),
+ "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.targets.interval_list", 90.0, 98.0, !lowPass, exome, 1),
"NA12878_wex_decoy" -> new Target("NA12878.HiSeq.WEx.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WEx.b37_decoy.NA12878.clean.dedup.recal.bam"),
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
- "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 3.3, 98.0, !lowPass, exome, 1),
+ "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 90.0, 98.0, !lowPass, exome, 1),
"CEUTrio_wex_b37" -> new Target("CEUTrio.HiSeq.WEx.b37", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WEx.bwa.cleaned.recal.bam"),
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"),
- "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 3.3, 98.0, !lowPass, exome, 3),
+ "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 90.0, 98.0, !lowPass, exome, 3),
"CEUTrio_wgs_b37" -> new Target("CEUTrio.HiSeq.WGS.b37", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.bwa.cleaned.recal.bam"),
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
- "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 2.3, 99.0, !lowPass, !exome, 3),
+ "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 90.0, 99.0, !lowPass, !exome, 3),
"CEUTrio_wex_decoy" -> new Target("CEUTrio.HiSeq.WEx.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WEx.b37_decoy.list"),
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
- "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 3.3, 98.0, !lowPass, exome, 3),
+ "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 90.0, 98.0, !lowPass, exome, 3),
"CEUTrio_wgs_decoy" -> new Target("CEUTrio.HiSeq.WGS.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37_decoy.list"),
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
- "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 2.3, 99.0, !lowPass, !exome, 3),
+ "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 90.0, 99.0, !lowPass, !exome, 3),
"GA2hg19" -> new Target("NA12878.GA2.hg19", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.GA2.WGS.bwa.cleaned.hg19.bam"),
new File("/humgen/gsa-hpprojects/dev/carneiro/NA12878/analysis/snps/NA12878.GA2.hg19.filtered.vcf"),
- "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 2.14, 99.0, !lowPass, !exome, 1),
+ "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 90.0, 99.0, !lowPass, !exome, 1),
"FIN" -> new Target("FIN", b37, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/1kg/processing/pipeline_test_bams/FIN.79sample.Nov2010.chr20.bam"),
new File("/humgen/gsa-hpprojects/dev/data/AugChr20Calls_v4_3state/ALL.august.v4.chr20.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
- "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.chr20.hg19.intervals", 2.3, 99.0, lowPass, !exome, 79),
+ "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.chr20.hg19.intervals", 90.0, 99.0, lowPass, !exome, 79),
"TGPWExGdA" -> new Target("1000G.WEx.GdA", b37, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/1kg/processing/pipeline_test_bams/Barcoded_1000G_WEx_Reduced_Plate_1.cleaned.list"), // BUGBUG: reduce from 60 to 20 people
new File("/humgen/gsa-scr1/delangel/NewUG/calls/AugustRelease.filtered_Q50_QD5.0_SB0.0.allSamples.SNPs_hg19.WEx_UG_newUG_MQC.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
- "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 2.6, 99.0, !lowPass, exome, 96),
+ "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 90.0, 99.0, !lowPass, exome, 96),
"LowPassN60" -> new Target("lowpass.N60", b36, dbSNP_b36, hapmap_b36, indelMask_b36,
new File("/humgen/1kg/analysis/bamsForDataProcessingPapers/lowpass_b36/lowpass.chr20.cleaned.matefixed.bam"), // the bam list to call from
new File("/home/radon01/depristo/work/oneOffProjects/VQSRCutByNRS/lowpass.N60.chr20.filtered.vcf"), // the gold standard VCF file to run through the VQSR
- "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.chr20.b36.intervals", 2.3, 99.0, lowPass, !exome, 60), // chunked interval list to use with Queue's scatter/gather functionality
+ "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.chr20.b36.intervals", 90.0, 99.0, lowPass, !exome, 60), // chunked interval list to use with Queue's scatter/gather functionality
"LowPassEUR363Nov" -> new Target("EUR.nov2010", b37, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/1kg/processing/pipeline_test_bams/EUR.363sample.Nov2010.chr20.bam"),
new File("/humgen/gsa-hpprojects/dev/data/AugChr20Calls_v4_3state/ALL.august.v4.chr20.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
- "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.chr20.hg19.intervals", 2.3, 99.0, lowPass, !exome, 363)
+ "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.chr20.hg19.intervals", 90.0, 99.0, lowPass, !exome, 363)
)
@@ -181,15 +186,15 @@ class MethodsDevelopmentCallingPipeline extends QScript {
val goldStandard = true
for (target <- targets) {
if( !skipCalling ) {
- if (!noIndels) add(new indelCall(target), new indelFilter(target), new indelEvaluation(target))
+ if (!noIndels) add(new indelCall(target), new indelRecal(target), new indelCut(target), new indelEvaluation(target))
add(new snpCall(target))
- add(new VQSR(target, !goldStandard))
- add(new applyVQSR(target, !goldStandard))
+ add(new snpRecal(target, !goldStandard))
+ add(new snpCut(target, !goldStandard))
add(new snpEvaluation(target))
}
if ( runGoldStandard ) {
- add(new VQSR(target, goldStandard))
- add(new applyVQSR(target, goldStandard))
+ add(new snpRecal(target, goldStandard))
+ add(new snpCut(target, goldStandard))
}
}
}
@@ -222,7 +227,7 @@ class MethodsDevelopmentCallingPipeline extends QScript {
this.min_base_quality_score = minimumBaseQuality
if (qscript.deletions >= 0)
this.max_deletion_fraction = qscript.deletions
- this.out = t.rawVCF
+ this.out = t.rawSnpVCF
this.glm = org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel.Model.SNP
this.baq = if (noBAQ || t.isExome) {org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.OFF} else {org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.CALCULATE_AS_NECESSARY}
this.analysisName = t.name + "_UGs"
@@ -257,79 +262,115 @@ class MethodsDevelopmentCallingPipeline extends QScript {
this.jobName = queueLogDir + t.name + ".indelfilter"
}
- // 3.) Variant Quality Score Recalibration - Generate Recalibration table
- class VQSR(t: Target, goldStandard: Boolean) extends VariantRecalibrator with UNIVERSAL_GATK_ARGS {
+ class VQSRBase(t: Target) extends VariantRecalibrator with UNIVERSAL_GATK_ARGS {
this.nt = 2
this.reference_sequence = t.reference
this.intervalsString ++= List(t.intervals)
- this.input :+= ( if ( goldStandard ) { t.goldStandard_VCF } else { t.rawVCF } )
- this.resource :+= new TaggedFile( t.hapmapFile, "training=true,truth=true,prior=15.0" )
- this.resource :+= new TaggedFile( omni_b37, "training=true,truth=true,prior=12.0" )
- this.resource :+= new TaggedFile( training_1000G, "training=true,prior=10.0" )
+ this.allPoly = true
+ this.tranche ++= List("100.0", "99.9", "99.5", "99.3", "99.0", "98.9", "98.8", "98.5", "98.4", "98.3", "98.2", "98.1", "98.0", "97.9", "97.8", "97.5", "97.0", "95.0", "90.0")
+ }
+
+ class snpRecal(t: Target, goldStandard: Boolean) extends VQSRBase(t) with UNIVERSAL_GATK_ARGS {
+ this.input :+= ( if ( goldStandard ) { t.goldStandard_VCF } else { t.rawSnpVCF } )
+ this.resource :+= new TaggedFile( t.hapmapFile, "known=false,training=true,truth=true,prior=15.0" )
+ this.resource :+= new TaggedFile( omni_b37, "known=false,training=true,truth=true,prior=12.0" )
+ this.resource :+= new TaggedFile( training_1000G, "known=false,training=true,prior=10.0" )
this.resource :+= new TaggedFile( t.dbsnpFile, "known=true,prior=2.0" )
this.resource :+= new TaggedFile( projectConsensus_1000G, "prior=8.0" )
this.use_annotation ++= List("QD", "HaplotypeScore", "MQRankSum", "ReadPosRankSum", "MQ", "FS")
- if(t.nSamples >= 10) { // InbreedingCoeff is a population-wide statistic that requires at least 10 samples to calculate
- this.use_annotation ++= List("InbreedingCoeff")
- }
- if(!t.isExome) {
+ if(t.nSamples >= 10)
+ this.use_annotation ++= List("InbreedingCoeff") // InbreedingCoeff is a population-wide statistic that requires at least 10 samples to calculate
+ if(!t.isExome)
this.use_annotation ++= List("DP")
- } else { // exome specific parameters
+ else { // exome specific parameters
this.resource :+= new TaggedFile( badSites_1000G, "bad=true,prior=2.0" )
this.mG = 6
- if(t.nSamples <= 3) { // very few exome samples means very few variants
+ if(t.nSamples <= 3) { // very few exome samples means very few variants
this.mG = 4
this.percentBad = 0.04
}
}
- this.tranches_file = if ( goldStandard ) { t.goldStandardTranchesFile } else { t.tranchesFile }
- this.recal_file = if ( goldStandard ) { t.goldStandardRecalFile } else { t.recalFile }
- this.allPoly = true
- this.tranche ++= List("100.0", "99.9", "99.5", "99.3", "99.0", "98.9", "98.8", "98.5", "98.4", "98.3", "98.2", "98.1", "98.0", "97.9", "97.8", "97.5", "97.0", "95.0", "90.0")
- this.rscript_file = t.vqsrRscript
- this.analysisName = t.name + "_VQSR"
- this.jobName = queueLogDir + t.name + ".VQSR"
+ this.tranches_file = if ( goldStandard ) { t.goldStandardTranchesFile } else { t.tranchesSnpFile }
+ this.recal_file = if ( goldStandard ) { t.goldStandardRecalFile } else { t.recalSnpFile }
+ this.rscript_file = t.vqsrSnpRscript
+ this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP
+ this.analysisName = t.name + "_VQSRs"
+ this.jobName = queueLogDir + t.name + ".snprecal"
}
+ class indelRecal(t: Target) extends VQSRBase(t) with UNIVERSAL_GATK_ARGS {
+ this.input :+= t.rawIndelVCF
+ this.resource :+= new TaggedFile(indelGoldStandardCallset, "known=false,training=true,truth=true,prior=12.0" )
+ this.resource :+= new TaggedFile( t.dbsnpFile, "known=true,prior=2.0" )
+ this.use_annotation ++= List("QD", "HaplotypeScore", "ReadPosRankSum", "FS")
+ if(t.nSamples >= 10)
+ this.use_annotation ++= List("InbreedingCoeff") // InbreedingCoeff is a population-wide statistic that requires at least 10 samples to calculate
+ this.tranches_file = t.tranchesIndelFile
+ this.recal_file = t.recalIndelFile
+ this.rscript_file = t.vqsrIndelRscript
+ this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL
+ this.analysisName = t.name + "_VQSRi"
+ this.jobName = queueLogDir + t.name + ".indelrecal"
+ }
+
+
// 4.) Apply the recalibration table to the appropriate tranches
- class applyVQSR (t: Target, goldStandard: Boolean) extends ApplyRecalibration with UNIVERSAL_GATK_ARGS {
+ class applyVQSRBase (t: Target) extends ApplyRecalibration with UNIVERSAL_GATK_ARGS {
this.memoryLimit = 6
this.reference_sequence = t.reference
this.intervalsString ++= List(t.intervals)
- this.input :+= ( if ( goldStandard ) { t.goldStandard_VCF } else { t.rawVCF } )
- this.tranches_file = if ( goldStandard ) { t.goldStandardTranchesFile } else { t.tranchesFile}
- this.recal_file = if ( goldStandard ) { t.goldStandardRecalFile } else { t.recalFile }
- this.ts_filter_level = t.trancheTarget
- this.out = t.recalibratedVCF
- this.analysisName = t.name + "_AVQSR"
- this.jobName = queueLogDir + t.name + ".applyVQSR"
}
+ class snpCut (t: Target, goldStandard: Boolean) extends applyVQSRBase(t) {
+ this.input :+= ( if ( goldStandard ) { t.goldStandard_VCF } else { t.rawSnpVCF } )
+ this.tranches_file = if ( goldStandard ) { t.goldStandardTranchesFile } else { t.tranchesSnpFile}
+ this.recal_file = if ( goldStandard ) { t.goldStandardRecalFile } else { t.recalSnpFile }
+ this.ts_filter_level = t.snpTrancheTarget
+ this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP
+ this.out = t.recalibratedSnpVCF
+ this.analysisName = t.name + "_AVQSRs"
+ this.jobName = queueLogDir + t.name + ".snpcut"
+ }
+
+ class indelCut (t: Target) extends applyVQSRBase(t) {
+ this.input :+= t.rawIndelVCF
+ this.tranches_file = t.tranchesIndelFile
+ this.recal_file = t.recalIndelFile
+ this.ts_filter_level = t.indelTranchTarget
+ this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL
+ this.out = t.recalibratedIndelVCF
+ this.analysisName = t.name + "_AVQSRi"
+ this.jobName = queueLogDir + t.name + ".indelcut"
+ }
+
+
// 5.) Variant Evaluation Base(OPTIONAL)
class EvalBase(t: Target) extends VariantEval with UNIVERSAL_GATK_ARGS {
this.memoryLimit = 3
- this.reference_sequence = t.reference
this.comp :+= new TaggedFile(t.hapmapFile, "hapmap" )
- this.intervalsString ++= List(t.intervals)
this.D = new File(t.dbsnpFile)
+ this.reference_sequence = t.reference
+ this.intervalsString ++= List(t.intervals)
this.sample = samples
}
// 5a.) SNP Evaluation (OPTIONAL) based on the cut vcf
class snpEvaluation(t: Target) extends EvalBase(t) {
if (t.reference == b37 || t.reference == hg19) this.comp :+= new TaggedFile( omni_b37, "omni" )
- this.eval :+= t.recalibratedVCF
+ this.eval :+= t.recalibratedSnpVCF
this.out = t.evalFile
this.analysisName = t.name + "_VEs"
- this.jobName = queueLogDir + t.name + ".snp.eval"
+ this.jobName = queueLogDir + t.name + ".snpeval"
}
// 5b.) Indel Evaluation (OPTIONAL)
class indelEvaluation(t: Target) extends EvalBase(t) {
- this.eval :+= t.filteredIndelVCF
- this.evalModule :+= "IndelStatistics"
+ this.eval :+= t.recalibratedIndelVCF
+ this.comp :+= new TaggedFile(indelGoldStandardCallset, "indelGS" )
+ this.noEV = true
+ this.evalModule = List("CompOverlap", "CountVariants", "TiTvVariantEvaluator", "ValidationReport", "IndelStatistics")
this.out = t.evalIndelFile
this.analysisName = t.name + "_VEi"
- this.jobName = queueLogDir + queueLogDir + t.name + ".indel.eval"
+ this.jobName = queueLogDir + queueLogDir + t.name + ".indeleval"
}
}
diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala
index 4896eaed3..2f954713e 100755
--- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala
@@ -53,9 +53,9 @@ class PacbioProcessingPipeline extends QScript {
val queueLogDir: String = ".qlog/"
- def script = {
+ def script() {
- val fileList: List[File] = QScriptUtils.createListFromFile(input)
+ val fileList: Seq[File] = QScriptUtils.createSeqFromFile(input)
for (file: File <- fileList) {
diff --git a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala
index 32913deb4..7a22e700b 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011, The Broad Institute
+ * Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@@ -36,6 +36,7 @@ import org.broadinstitute.sting.utils.io.IOUtils
import org.broadinstitute.sting.utils.help.ApplicationDetails
import java.util.{ResourceBundle, Arrays}
import org.broadinstitute.sting.utils.text.TextFormattingUtils
+import org.apache.commons.io.FilenameUtils
/**
* Entry point of Queue. Compiles and runs QScripts passed in to the command line.
@@ -61,6 +62,7 @@ object QCommandLine extends Logging {
CommandLineProgram.start(qCommandLine, argv)
try {
Runtime.getRuntime.removeShutdownHook(shutdownHook)
+ qCommandLine.shutdown()
} catch {
case _ => /* ignore, example 'java.lang.IllegalStateException: Shutdown in progress' */
}
@@ -78,10 +80,10 @@ object QCommandLine extends Logging {
class QCommandLine extends CommandLineProgram with Logging {
@Input(fullName="script", shortName="S", doc="QScript scala file", required=true)
@ClassType(classOf[File])
- private var scripts = List.empty[File]
+ var scripts = Seq.empty[File]
@ArgumentCollection
- private val settings = new QGraphSettings
+ val settings = new QGraphSettings
private val qScriptManager = new QScriptManager
private val qGraph = new QGraph
@@ -91,7 +93,7 @@ class QCommandLine extends CommandLineProgram with Logging {
private lazy val pluginManager = {
qScriptClasses = IOUtils.tempDir("Q-Classes-", "", settings.qSettings.tempDirectory)
qScriptManager.loadScripts(scripts, qScriptClasses)
- new PluginManager[QScript](classOf[QScript], List(qScriptClasses.toURI.toURL))
+ new PluginManager[QScript](classOf[QScript], Seq(qScriptClasses.toURI.toURL))
}
QFunction.parsingEngine = new ParsingEngine(this)
@@ -101,12 +103,16 @@ class QCommandLine extends CommandLineProgram with Logging {
* functions, and then builds and runs a QGraph based on the dependencies.
*/
def execute = {
+ if (settings.qSettings.runName == null)
+ settings.qSettings.runName = FilenameUtils.removeExtension(scripts.head.getName)
+
qGraph.settings = settings
val allQScripts = pluginManager.createAllTypes();
for (script <- allQScripts) {
logger.info("Scripting " + pluginManager.getName(script.getClass.asSubclass(classOf[QScript])))
loadArgumentsIntoObject(script)
+ script.qSettings = settings.qSettings
try {
script.script()
} catch {
@@ -120,22 +126,34 @@ class QCommandLine extends CommandLineProgram with Logging {
// Execute the job graph
qGraph.run()
+ val functionsAndStatus = qGraph.getFunctionsAndStatus
+ val success = qGraph.success
+
// walk over each script, calling onExecutionDone
for (script <- allQScripts) {
- script.onExecutionDone(qGraph.getFunctionsAndStatus(script.functions), qGraph.success)
- if ( ! settings.disableJobReport ) {
- val jobStringName = (QScriptUtils.?(settings.jobReportFile)).getOrElse(settings.qSettings.jobNamePrefix + ".jobreport.txt")
+ val scriptFunctions = functionsAndStatus.filterKeys(f => script.functions.contains(f))
+ script.onExecutionDone(scriptFunctions, success)
+ }
- if (!shuttingDown) {
- val reportFile = new File(jobStringName)
- logger.info("Writing JobLogging GATKReport to file " + reportFile)
- QJobReport.printReport(qGraph.getFunctionsAndStatus(script.functions), reportFile)
+ logger.info("Script %s with %d total jobs".format(if (success) "completed successfully" else "failed", functionsAndStatus.size))
- if ( settings.run ) {
- val pdfFile = new File(jobStringName + ".pdf")
- logger.info("Plotting JobLogging GATKReport to file " + pdfFile)
- QJobReport.plotReport(reportFile, pdfFile)
- }
+ if (!settings.disableJobReport) {
+ val jobStringName = {
+ if (settings.jobReportFile != null)
+ settings.jobReportFile
+ else
+ settings.qSettings.runName + ".jobreport.txt"
+ }
+
+ if (!shuttingDown) {
+ val reportFile = IOUtils.absolute(settings.qSettings.runDirectory, jobStringName)
+ logger.info("Writing JobLogging GATKReport to file " + reportFile)
+ QJobReport.printReport(functionsAndStatus, reportFile)
+
+ if (settings.run) {
+ val pdfFile = IOUtils.absolute(settings.qSettings.runDirectory, FilenameUtils.removeExtension(jobStringName) + ".pdf")
+ logger.info("Plotting JobLogging GATKReport to file " + pdfFile)
+ QJobReport.plotReport(reportFile, pdfFile)
}
}
}
@@ -179,20 +197,20 @@ class QCommandLine extends CommandLineProgram with Logging {
override def getApplicationDetails : ApplicationDetails = {
new ApplicationDetails(createQueueHeader(),
- List.empty[String],
+ Seq.empty[String],
ApplicationDetails.createDefaultRunningInstructions(getClass.asInstanceOf[Class[CommandLineProgram]]),
"")
}
- private def createQueueHeader() : List[String] = {
- List(String.format("Queue v%s, Compiled %s", getQueueVersion, getBuildTimestamp),
- "Copyright (c) 2011 The Broad Institute",
+ private def createQueueHeader() : Seq[String] = {
+ Seq(String.format("Queue v%s, Compiled %s", getQueueVersion, getBuildTimestamp),
+ "Copyright (c) 2012 The Broad Institute",
"Please view our documentation at http://www.broadinstitute.org/gsa/wiki",
"For support, please view our support site at http://getsatisfaction.com/gsa")
}
private def getQueueVersion : String = {
- var stingResources : ResourceBundle = TextFormattingUtils.loadResourceBundle("StingText")
+ val stingResources : ResourceBundle = TextFormattingUtils.loadResourceBundle("StingText")
if ( stingResources.containsKey("org.broadinstitute.sting.queue.QueueVersion.version") ) {
stingResources.getString("org.broadinstitute.sting.queue.QueueVersion.version")
@@ -203,7 +221,7 @@ class QCommandLine extends CommandLineProgram with Logging {
}
private def getBuildTimestamp : String = {
- var stingResources : ResourceBundle = TextFormattingUtils.loadResourceBundle("StingText")
+ val stingResources : ResourceBundle = TextFormattingUtils.loadResourceBundle("StingText")
if ( stingResources.containsKey("build.timestamp") ) {
stingResources.getString("build.timestamp")
diff --git a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala
index fce65c997..6f887ea00 100755
--- a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala
@@ -27,7 +27,6 @@ package org.broadinstitute.sting.queue
import engine.JobRunInfo
import org.broadinstitute.sting.queue.function.QFunction
import annotation.target.field
-import io.Source
import util.{StringFileConversions, PrimitiveOptionConversions, Logging}
/**
@@ -53,6 +52,11 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon
type ArgumentCollection = org.broadinstitute.sting.commandline.ArgumentCollection @field
type Gather = org.broadinstitute.sting.commandline.Gather @field
+ /**
+ * Default settings for QFunctions
+ */
+ var qSettings: QSettings = _
+
/**
* Builds the CommandLineFunctions that will be used to run this script and adds them to this.functions directly or using the add() utility method.
*/
@@ -60,18 +64,14 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon
/**
* A default handler for the onExecutionDone() function. By default this doesn't do anything
- * except print out a fine status message.
*/
def onExecutionDone(jobs: Map[QFunction, JobRunInfo], success: Boolean) {
- logger.info("Script %s with %d total jobs".format(if (success) "completed successfully" else "failed", jobs.size))
- // this is too much output
- // for ( (f, info) <- jobs ) logger.info(" %s %s".format(f.jobName, info))
}
/**
* The command line functions that will be executed for this QScript.
*/
- var functions = List.empty[QFunction]
+ var functions = Seq.empty[QFunction]
/**
* Exchanges the extension on a file.
@@ -98,22 +98,20 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon
* Adds one or more command line functions to be run.
* @param functions Functions to add.
*/
- def add(functions: QFunction*) = {
+ def add(functions: QFunction*) {
functions.foreach(function => function.addOrder = QScript.nextAddOrder)
this.functions ++= functions
}
- def addAll(functions: List[QFunction]) {
+ def addAll(functions: Seq[QFunction]) {
functions.foreach( f => add(f) )
}
-
- def extractFileEntries(in: File): List[File] = Source.fromFile(in).getLines().toList
}
object QScript {
private var addOrder = 0
private def nextAddOrder = {
addOrder += 1
- List(addOrder)
+ Seq(addOrder)
}
}
diff --git a/public/scala/src/org/broadinstitute/sting/queue/QScriptManager.scala b/public/scala/src/org/broadinstitute/sting/queue/QScriptManager.scala
index 512a9f8dd..74487917f 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/QScriptManager.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/QScriptManager.scala
@@ -20,7 +20,7 @@ class QScriptManager() extends Logging {
* Compiles and loads the scripts in the files into the current classloader.
* Heavily based on scala/src/compiler/scala/tools/ant/Scalac.scala
*/
- def loadScripts(scripts: List[File], tempDir: File) {
+ def loadScripts(scripts: Seq[File], tempDir: File) {
if (scripts.size > 0) {
val settings = new Settings((error: String) => logger.error(error))
settings.deprecation.value = true
@@ -36,7 +36,7 @@ class QScriptManager() extends Logging {
logger.info("Compiling %s QScript%s".format(scripts.size, plural(scripts.size)))
logger.debug("Compilation directory: " + settings.outdir.value)
- run.compileFiles(scripts.map(new PlainFile(_)))
+ run.compileFiles(scripts.toList.map(new PlainFile(_)))
reporter.printSummary()
if (reporter.hasErrors) {
diff --git a/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala b/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala
index e8ac26a57..d9fed4ce8 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011, The Broad Institute
+ * Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@@ -25,15 +25,14 @@
package org.broadinstitute.sting.queue
import java.io.File
-import org.broadinstitute.sting.commandline.{ArgumentCollection, Argument}
-import org.broadinstitute.sting.queue.util.{SystemUtils, EmailSettings}
+import org.broadinstitute.sting.commandline.Argument
/**
* Default settings settable on the command line and passed to CommandLineFunctions.
*/
class QSettings {
- @Argument(fullName="job_name_prefix", shortName="jobPrefix", doc="Default name prefix for compute farm jobs.", required=false)
- var jobNamePrefix: String = QSettings.processNamePrefix
+ @Argument(fullName="run_name", shortName="runName", doc="A name for this run used for various status messages.", required=false)
+ var runName: String = _
@Argument(fullName="job_project", shortName="jobProject", doc="Default project for compute farm jobs.", required=false)
var jobProject: String = _
@@ -45,13 +44,13 @@ class QSettings {
var jobPriority: Option[Int] = None
@Argument(fullName="job_native_arg", shortName="jobNative", doc="Native arguments to pass to the job runner.", required=false)
- var jobNativeArgs: List[String] = Nil
+ var jobNativeArgs: Seq[String] = Nil
@Argument(fullName="job_resource_request", shortName="jobResReq", doc="Resource requests to pass to the job runner.", required=false)
- var jobResourceRequests: List[String] = Nil
+ var jobResourceRequests: Seq[String] = Nil
@Argument(fullName="job_environment_name", shortName="jobEnv", doc="Environment names for the job runner.", required=false)
- var jobEnvironmentNames: List[String] = Nil
+ var jobEnvironmentNames: Seq[String] = Nil
@Argument(fullName="memory_limit", shortName="memLimit", doc="Default memory limit for jobs, in gigabytes.", required=false)
var memoryLimit: Option[Double] = None
@@ -77,15 +76,4 @@ class QSettings {
@Argument(fullName="job_scatter_gather_directory", shortName="jobSGDir", doc="Default directory to place scatter gather output for compute farm jobs.", required=false)
var jobScatterGatherDirectory: File = _
-
- @ArgumentCollection
- val emailSettings = new EmailSettings
-}
-
-/**
- * Default settings settable on the command line and passed to CommandLineFunctions.
- */
-object QSettings {
- /** A semi-unique job prefix using the host name and the process id. */
- private val processNamePrefix = "Q-" + SystemUtils.pidAtHost
}
diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala
index 55ed94267..8225d28ab 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala
@@ -1,3 +1,27 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
package org.broadinstitute.sting.queue.engine
import org.broadinstitute.sting.queue.function.QFunction
@@ -28,15 +52,18 @@ class FunctionEdge(val function: QFunction, val inputs: QNode, val outputs: QNod
val myRunInfo: JobRunInfo = JobRunInfo.default // purely for dryRun testing
+ /**
+ * When using reset status this variable tracks the old status
+ */
+ var resetFromStatus: RunnerStatus.Value = null
+
/**
* Initializes with the current status of the function.
*/
private var currentStatus = {
- val isDone = function.isDone
- val isFail = function.isFail
- if (isFail.isDefined && isFail.get)
+ if (function.isFail)
RunnerStatus.FAILED
- else if (isDone.isDefined && isDone.get)
+ else if (function.isDone)
RunnerStatus.DONE
else
RunnerStatus.PENDING
@@ -136,13 +163,15 @@ class FunctionEdge(val function: QFunction, val inputs: QNode, val outputs: QNod
* Resets the edge to pending status.
*/
def resetToPending(cleanOutputs: Boolean) {
+ if (resetFromStatus == null)
+ resetFromStatus = currentStatus
currentStatus = RunnerStatus.PENDING
if (cleanOutputs)
function.deleteOutputs()
runner = null
}
- override def dotString = function.dotString
+ override def shortDescription = function.shortDescription
/**
* Returns the path to the file to use for logging errors.
diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/InProcessRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/InProcessRunner.scala
index d006cde4b..be5622360 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/engine/InProcessRunner.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/engine/InProcessRunner.scala
@@ -3,7 +3,8 @@ package org.broadinstitute.sting.queue.engine
import org.broadinstitute.sting.queue.function.InProcessFunction
import java.util.Date
import org.broadinstitute.sting.utils.Utils
-import org.apache.commons.io.FileUtils
+import org.apache.commons.io.{IOUtils, FileUtils}
+import java.io.PrintStream
/**
* Runs a function that executes in process and does not fork out an external process.
@@ -16,12 +17,24 @@ class InProcessRunner(val function: InProcessFunction) extends JobRunner[InProce
getRunInfo.exechosts = Utils.resolveHostname()
runStatus = RunnerStatus.RUNNING
- function.run()
+ function.jobOutputStream = new PrintStream(FileUtils.openOutputStream(function.jobOutputFile))
+ function.jobErrorStream = {
+ if (function.jobErrorFile != null)
+ new PrintStream(FileUtils.openOutputStream(function.jobErrorFile))
+ else
+ function.jobOutputStream
+ }
+ try {
+ function.run()
+ function.jobOutputStream.println("%s%nDone.".format(function.description))
+ } finally {
+ IOUtils.closeQuietly(function.jobOutputStream)
+ if (function.jobErrorFile != null)
+ IOUtils.closeQuietly(function.jobErrorStream)
+ }
- getRunInfo.doneTime = new Date()
- val content = "%s%nDone.".format(function.description)
- FileUtils.writeStringToFile(function.jobOutputFile, content)
runStatus = RunnerStatus.DONE
+ getRunInfo.doneTime = new Date()
}
def status = runStatus
diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/MappingEdge.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/MappingEdge.scala
index 1d56009f3..17f0561fa 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/engine/MappingEdge.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/engine/MappingEdge.scala
@@ -1,3 +1,27 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
package org.broadinstitute.sting.queue.engine
/**
@@ -10,5 +34,5 @@ class MappingEdge(val inputs: QNode, val outputs: QNode) extends QEdge {
* @return