diff --git a/build.xml b/build.xml
index 7c81c1f20..dbdafa3d9 100644
--- a/build.xml
+++ b/build.xml
@@ -1,5 +1,5 @@
-
+
+
+
+
+
+
+
+
diff --git a/ivy.xml b/ivy.xml
index 4f41904ba..f5ff15c30 100644
--- a/ivy.xml
+++ b/ivy.xml
@@ -1,3 +1,26 @@
+
@@ -21,7 +44,6 @@
-
@@ -40,7 +62,7 @@
-
+
diff --git a/public/java/src/org/broadinstitute/sting/commandline/Gather.java b/public/java/src/org/broadinstitute/sting/commandline/Gather.java
index 59c3f50cb..d452f708e 100644
--- a/public/java/src/org/broadinstitute/sting/commandline/Gather.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/Gather.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010, The Broad Institute
+ * Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@@ -34,5 +34,6 @@ import java.lang.annotation.*;
@Retention(RetentionPolicy.RUNTIME)
@Target({ElementType.FIELD})
public @interface Gather {
- Class value();
+ Class value() default Gather.class;
+ boolean enabled() default true;
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
index f954d7650..ede8e9340 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@@ -443,7 +443,7 @@ public class GenomeAnalysisEngine {
if(!readsDataSource.hasIndex() && intervals != null && !argCollection.allowIntervalsWithUnindexedBAM)
throw new UserException.CommandLineException("Cannot perform interval processing when reads are present but no index is available.");
- if(walker instanceof LocusWalker) {
+ if(walker instanceof LocusWalker || walker instanceof ActiveRegionWalker) {
if (readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.coordinate)
throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.coordinate, "Locus walkers can only traverse coordinate-sorted data. Please resort your input BAM file(s) or set the Sort Order tag in the header appropriately.");
if(intervals == null)
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BlockInputStream.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BlockInputStream.java
index e377f865d..a95eb7b8e 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BlockInputStream.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BlockInputStream.java
@@ -31,13 +31,13 @@ import net.sf.samtools.util.BlockCompressedFilePointerUtil;
import net.sf.samtools.util.BlockCompressedInputStream;
import net.sf.samtools.util.RuntimeEOFException;
import net.sf.samtools.util.SeekableStream;
-import org.broad.tribble.util.BlockCompressedStreamConstants;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.Arrays;
+import java.util.Iterator;
import java.util.LinkedList;
/**
@@ -120,6 +120,12 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
// TODO: Kill the region when all we want to do is start at the beginning of the stream and run to the end of the stream.
this.position = new SAMReaderPosition(reader,this,new GATKBAMFileSpan(new GATKChunk(0,Long.MAX_VALUE)));
+ // The block offsets / block positions guarantee that the ending offset/position in the data structure maps to
+ // the point in the file just following the last read. These two arrays should never be empty; initializing
+ // to 0 to match the position above.
+ this.blockOffsets.add(0);
+ this.blockPositions.add(0L);
+
try {
if(validate) {
System.out.printf("BlockInputStream %s: BGZF block validation mode activated%n",this);
@@ -143,34 +149,52 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
public long getFilePointer() {
long filePointer;
synchronized(lock) {
- if(buffer.remaining() > 0) {
- // If there's data in the buffer, figure out from whence it came.
- final long blockAddress = blockPositions.size() > 0 ? blockPositions.get(0) : 0;
- final int blockOffset = buffer.position();
- filePointer = blockAddress << 16 | blockOffset;
- }
- else {
- // Otherwise, find the next position to load.
- filePointer = position.getBlockAddress() << 16;
- }
+ // Find the current block within the input stream.
+ int blockIndex;
+ for(blockIndex = 0; blockIndex+1 < blockOffsets.size() && buffer.position() >= blockOffsets.get(blockIndex + 1); blockIndex++)
+ ;
+ filePointer = blockPositions.get(blockIndex) + (buffer.position()-blockOffsets.get(blockIndex));
}
- if(validatingInputStream != null && filePointer != validatingInputStream.getFilePointer())
- throw new ReviewedStingException(String.format("Position of input stream is invalid; expected (block address, block offset) = (%d,%d), got (%d,%d)",
- BlockCompressedFilePointerUtil.getBlockAddress(filePointer),BlockCompressedFilePointerUtil.getBlockOffset(filePointer),
- BlockCompressedFilePointerUtil.getBlockAddress(validatingInputStream.getFilePointer()),BlockCompressedFilePointerUtil.getBlockOffset(validatingInputStream.getFilePointer())));
+// if(validatingInputStream != null && filePointer != validatingInputStream.getFilePointer())
+// throw new ReviewedStingException(String.format("Position of input stream is invalid; expected (block address, block offset) = (%d,%d), got (%d,%d)",
+// BlockCompressedFilePointerUtil.getBlockAddress(validatingInputStream.getFilePointer()),BlockCompressedFilePointerUtil.getBlockOffset(validatingInputStream.getFilePointer()),
+// BlockCompressedFilePointerUtil.getBlockAddress(filePointer),BlockCompressedFilePointerUtil.getBlockOffset(filePointer)));
return filePointer;
}
public void seek(long target) {
- // TODO: Validate the seek point.
//System.out.printf("Thread %s, BlockInputStream %s: seeking to block %d, offset %d%n",Thread.currentThread().getId(),this,BlockCompressedFilePointerUtil.getBlockAddress(target),BlockCompressedFilePointerUtil.getBlockOffset(target));
synchronized(lock) {
clearBuffers();
- position.advancePosition(BlockCompressedFilePointerUtil.getBlockAddress(target));
- waitForBufferFill();
- buffer.position(BlockCompressedFilePointerUtil.getBlockOffset(target));
+
+ // Ensure that the position filled in by submitAccessPlan() is in sync with the seek target just specified.
+ position.advancePosition(target);
+
+ // If the position advances past the end of the target, that must mean that we seeked to a point at the end
+ // of one of the chunk list's subregions. Make a note of our current position and punt on loading any data.
+ if(target < position.getBlockAddress() << 16) {
+ blockOffsets.clear();
+ blockOffsets.add(0);
+ blockPositions.clear();
+ blockPositions.add(target);
+ }
+ else {
+ waitForBufferFill();
+ // A buffer fill will load the relevant data from the shard, but the buffer position still needs to be
+ // advanced as appropriate.
+ Iterator blockOffsetIterator = blockOffsets.descendingIterator();
+ Iterator blockPositionIterator = blockPositions.descendingIterator();
+ while(blockOffsetIterator.hasNext() && blockPositionIterator.hasNext()) {
+ final int blockOffset = blockOffsetIterator.next();
+ final long blockPosition = blockPositionIterator.next();
+ if((blockPosition >> 16) == (target >> 16) && (blockPosition&0xFFFF) < (target&0xFFFF)) {
+ buffer.position(blockOffset + (int)(target&0xFFFF)-(int)(blockPosition&0xFFFF));
+ break;
+ }
+ }
+ }
if(validatingInputStream != null) {
try {
@@ -191,14 +215,17 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
buffer.clear();
buffer.limit(0);
+ // Clear everything except the last block offset / position
blockOffsets.clear();
- blockPositions.clear();
+ blockOffsets.add(0);
+ while(blockPositions.size() > 1)
+ blockPositions.removeFirst();
}
public boolean eof() {
synchronized(lock) {
// TODO: Handle multiple empty BGZF blocks at end of the file.
- return position != null && position.getBlockAddress() >= length;
+ return position != null && (position.getBlockAddress() < 0 || position.getBlockAddress() >= length);
}
}
@@ -216,7 +243,7 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
// Assume that the access plan is going to tell us to start where we are and move forward.
// If this isn't the case, we'll soon receive a seek request and the buffer will be forced to reset.
if(this.position != null && position.getBlockAddress() < this.position.getBlockAddress())
- position.advancePosition(this.position.getBlockAddress());
+ position.advancePosition(this.position.getBlockAddress() << 16);
}
this.position = position;
}
@@ -225,14 +252,15 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
// Compact buffer to maximize storage space.
int bytesToRemove = 0;
- // Look ahead to see if we can compact away the first block in the series.
- while(blockOffsets.size() > 1 && buffer.position() < blockOffsets.get(1)) {
- bytesToRemove += blockOffsets.remove();
+ // Look ahead to see if we can compact away the first blocks in the series.
+ while(blockOffsets.size() > 1 && buffer.position() >= blockOffsets.get(1)) {
+ blockOffsets.remove();
blockPositions.remove();
+ bytesToRemove = blockOffsets.peek();
}
// If we end up with an empty block at the end of the series, compact this as well.
- if(buffer.remaining() == 0 && !blockOffsets.isEmpty() && buffer.position() >= blockOffsets.peek()) {
+ if(buffer.remaining() == 0 && blockOffsets.size() > 1 && buffer.position() >= blockOffsets.peek()) {
bytesToRemove += buffer.position();
blockOffsets.remove();
blockPositions.remove();
@@ -241,11 +269,17 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
int finalBufferStart = buffer.position() - bytesToRemove;
int finalBufferSize = buffer.remaining();
+ // Position the buffer to remove the unneeded data, and compact it away.
buffer.position(bytesToRemove);
buffer.compact();
+ // Reset the limits for reading.
buffer.position(finalBufferStart);
buffer.limit(finalBufferStart+finalBufferSize);
+
+ // Shift everything in the offset buffer down to accommodate the bytes removed from the buffer.
+ for(int i = 0; i < blockOffsets.size(); i++)
+ blockOffsets.set(i,blockOffsets.get(i)-bytesToRemove);
}
/**
@@ -253,6 +287,7 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
* MUST be called from a thread that is NOT the reader thread.
* @param incomingBuffer The data being pushed into this input stream.
* @param position target position for the data.
+ * @param filePosition the current position of the file pointer
*/
public void copyIntoBuffer(final ByteBuffer incomingBuffer, final SAMReaderPosition position, final long filePosition) {
synchronized(lock) {
@@ -262,7 +297,12 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
buffer.limit(buffer.capacity());
// Advance the position to take the most recent read into account.
- long lastReadPosition = position.getBlockAddress();
+ final long lastBlockAddress = position.getBlockAddress();
+ final int blockOffsetStart = position.getFirstOffsetInBlock();
+ final int blockOffsetEnd = position.getLastOffsetInBlock();
+
+ // Where did this read end? It either ended in the middle of a block (for a bounding chunk) or it ended at the start of the next block.
+ final long endOfRead = (blockOffsetEnd < incomingBuffer.remaining()) ? (lastBlockAddress << 16) | blockOffsetEnd : filePosition << 16;
byte[] validBytes = null;
if(validatingInputStream != null) {
@@ -277,7 +317,7 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
incomingBuffer.position(pos);
long currentFilePointer = validatingInputStream.getFilePointer();
- validatingInputStream.seek(lastReadPosition << 16);
+ validatingInputStream.seek(lastBlockAddress << 16);
validatingInputStream.read(validBytes);
validatingInputStream.seek(currentFilePointer);
@@ -286,7 +326,7 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
}
this.position = position;
- position.advancePosition(filePosition);
+ position.advancePosition(filePosition << 16);
if(buffer.remaining() < incomingBuffer.remaining()) {
//System.out.printf("Thread %s: waiting for available space in buffer; buffer remaining = %d, incoming buffer remaining = %d%n",Thread.currentThread().getId(),buffer.remaining(),incomingBuffer.remaining());
@@ -294,12 +334,21 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
//System.out.printf("Thread %s: waited for available space in buffer; buffer remaining = %d, incoming buffer remaining = %d%n", Thread.currentThread().getId(), buffer.remaining(), incomingBuffer.remaining());
}
- // Queue list of block offsets / block positions.
+ // Remove the last position in the list and add in the last read position, in case the two are different.
+ blockOffsets.removeLast();
blockOffsets.add(buffer.position());
- blockPositions.add(lastReadPosition);
+ blockPositions.removeLast();
+ blockPositions.add(lastBlockAddress << 16 | blockOffsetStart);
+ // Stream the buffer into the data stream.
+ incomingBuffer.position(blockOffsetStart);
+ incomingBuffer.limit(Math.min(incomingBuffer.limit(),blockOffsetEnd));
buffer.put(incomingBuffer);
+ // Then, add the last position read to the very end of the list, just past the end of the last buffer.
+ blockOffsets.add(buffer.position());
+ blockPositions.add(endOfRead);
+
// Set up the buffer for reading.
buffer.flip();
bufferFilled = true;
@@ -380,23 +429,21 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
lock.notify();
}
- if(validatingInputStream != null) {
- byte[] validBytes = new byte[length];
- try {
- validatingInputStream.read(validBytes,offset,length);
- for(int i = offset; i < offset+length; i++) {
- if(bytes[i] != validBytes[i]) {
- System.out.printf("Thread %s: preparing to throw an exception because contents don't match%n",Thread.currentThread().getId());
- throw new ReviewedStingException(String.format("Thread %s: blockInputStream %s attempting to return wrong set of bytes; mismatch at offset %d",Thread.currentThread().getId(),this,i));
- }
- }
- }
- catch(IOException ex) {
- throw new ReviewedStingException("Unable to validate against Picard input stream",ex);
- }
- }
+// if(validatingInputStream != null) {
+// byte[] validBytes = new byte[length];
+// try {
+// validatingInputStream.read(validBytes,offset,length);
+// for(int i = offset; i < offset+length; i++) {
+// if(bytes[i] != validBytes[i])
+// throw new ReviewedStingException(String.format("Thread %s: blockInputStream %s attempting to return wrong set of bytes; mismatch at offset %d",Thread.currentThread().getId(),this,i));
+// }
+// }
+// catch(IOException ex) {
+// throw new ReviewedStingException("Unable to validate against Picard input stream",ex);
+// }
+// }
- return length - remaining;
+ return eof() ? -1 : length-remaining;
}
public void close() {
@@ -424,7 +471,6 @@ public class BlockInputStream extends SeekableStream implements BAMInputStream {
lock.wait();
}
catch(InterruptedException ex) {
- // TODO: handle me.
throw new ReviewedStingException("Interrupt occurred waiting for buffer to fill",ex);
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderPosition.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderPosition.java
index f9f6539a7..0a6173c1e 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderPosition.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderPosition.java
@@ -75,6 +75,22 @@ class SAMReaderPosition {
return nextBlockAddress;
}
+ /**
+ * Retrieves the first offset of interest in the block returned by getBlockAddress().
+ * @return First block of interest in this segment.
+ */
+ public int getFirstOffsetInBlock() {
+ return (nextBlockAddress == positionIterator.peek().getBlockStart()) ? positionIterator.peek().getBlockOffsetStart() : 0;
+ }
+
+ /**
+ * Retrieves the last offset of interest in the block returned by getBlockAddress().
+ * @return First block of interest in this segment.
+ */
+ public int getLastOffsetInBlock() {
+ return (nextBlockAddress == positionIterator.peek().getBlockEnd()) ? positionIterator.peek().getBlockOffsetEnd() : 65536;
+ }
+
public void reset() {
initialize();
}
@@ -95,26 +111,27 @@ class SAMReaderPosition {
* @param filePosition The current position within the file.
*/
void advancePosition(final long filePosition) {
- nextBlockAddress = filePosition;
+ nextBlockAddress = filePosition >> 16;
// Check the current file position against the iterator; if the iterator is before the current file position,
// draw the iterator forward. Remember when performing the check that coordinates are half-open!
- try {
- while(positionIterator.hasNext() && isFilePositionPastEndOfChunk(filePosition,positionIterator.peek())) {
- positionIterator.next();
- // Check to see if the iterator has more data available.
- if(positionIterator.hasNext() && filePosition < positionIterator.peek().getBlockStart()) {
- nextBlockAddress = positionIterator.peek().getBlockStart();
- break;
- }
+ while(positionIterator.hasNext() && isFilePositionPastEndOfChunk(filePosition,positionIterator.peek())) {
+ positionIterator.next();
+
+ // If the block iterator has shot past the file pointer, bring the file pointer flush with the start of the current block.
+ if(positionIterator.hasNext() && filePosition < positionIterator.peek().getChunkStart()) {
+ nextBlockAddress = positionIterator.peek().getBlockStart();
+ //System.out.printf("SAMReaderPosition: next block address advanced to %d%n",nextBlockAddress);
+ break;
}
}
- catch(Exception ex) {
- throw new ReviewedStingException("");
- }
+
+ // If we've shot off the end of the block pointer, notify consumers that iteration is complete.
+ if(!positionIterator.hasNext())
+ nextBlockAddress = -1;
}
private boolean isFilePositionPastEndOfChunk(final long filePosition, final GATKChunk chunk) {
- return (filePosition > chunk.getBlockEnd() || (filePosition == chunk.getBlockEnd() && chunk.getBlockOffsetEnd() == 0));
+ return filePosition >= chunk.getChunkEnd();
}
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java
index 39e1bdc72..eec440820 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java
@@ -11,7 +11,6 @@ import org.broadinstitute.sting.gatk.io.ThreadLocalOutputTracker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
-import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.threading.ThreadPoolMonitor;
import java.util.Collection;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
index ff5e1064b..774b532f3 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
@@ -10,6 +10,7 @@ import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
import org.broadinstitute.sting.gatk.io.OutputTracker;
+import org.broadinstitute.sting.gatk.traversals.TraverseActiveRegions;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.SampleUtils;
@@ -55,7 +56,6 @@ public class LinearMicroScheduler extends MicroScheduler {
traversalEngine.startTimersIfNecessary();
if(shard.getShardType() == Shard.ShardType.LOCUS) {
- LocusWalker lWalker = (LocusWalker)walker;
WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(),
getReadIterator(shard), shard.getGenomeLocs(), SampleUtils.getSAMFileSamples(engine));
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
@@ -77,6 +77,12 @@ public class LinearMicroScheduler extends MicroScheduler {
done = walker.isDone();
}
+ // Special function call to empty out the work queue. Ugly for now but will be cleaned up when we push this functionality more into the engine
+ if( traversalEngine instanceof TraverseActiveRegions ) {
+ final Object result = ((TraverseActiveRegions) traversalEngine).endTraversal(walker, accumulator.getReduceInit());
+ accumulator.accumulate(null, result); // Assumes only used with StandardAccumulator
+ }
+
Object result = accumulator.finishTraversal();
printOnTraversalDone(result);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
index d013db7e8..508099708 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
@@ -128,6 +128,8 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
traversalEngine = new TraverseDuplicates();
} else if (walker instanceof ReadPairWalker) {
traversalEngine = new TraverseReadPairs();
+ } else if (walker instanceof ActiveRegionWalker) {
+ traversalEngine = new TraverseActiveRegions();
} else {
throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type.");
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java b/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java
index d36a3b576..6acaadd50 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/TreeReducer.java
@@ -27,8 +27,8 @@ import java.util.concurrent.Future;
public class TreeReducer implements Callable {
private HierarchicalMicroScheduler microScheduler;
private TreeReducible walker;
- private final Future lhs;
- private final Future rhs;
+ private Future lhs;
+ private Future rhs;
/**
* Create a one-sided reduce. Result will be a simple pass-through of the result.
@@ -99,6 +99,10 @@ public class TreeReducer implements Callable {
long endTime = System.currentTimeMillis();
+ // Constituent bits of this tree reduces are no longer required. Throw them away.
+ this.lhs = null;
+ this.rhs = null;
+
microScheduler.reportTreeReduceTime( endTime - startTime );
return result;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
new file mode 100644
index 000000000..01bfe396a
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
@@ -0,0 +1,213 @@
+package org.broadinstitute.sting.gatk.traversals;
+
+import net.sf.samtools.SAMRecord;
+import org.apache.log4j.Logger;
+import org.broadinstitute.sting.gatk.WalkerManager;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.datasources.providers.*;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
+import org.broadinstitute.sting.gatk.walkers.DataSource;
+import org.broadinstitute.sting.gatk.walkers.Walker;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
+import org.broadinstitute.sting.utils.pileup.PileupElement;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+
+import java.util.ArrayList;
+import java.util.LinkedHashSet;
+import java.util.LinkedList;
+import java.util.Queue;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: 12/9/11
+ */
+
+public class TraverseActiveRegions extends TraversalEngine,LocusShardDataProvider> {
+ /**
+ * our log, which we want to capture anything from this class
+ */
+ protected static Logger logger = Logger.getLogger(TraversalEngine.class);
+
+ private final Queue workQueue = new LinkedList();
+ private final LinkedHashSet myReads = new LinkedHashSet();
+
+ @Override
+ protected String getTraversalType() {
+ return "active regions";
+ }
+
+ @Override
+ public T traverse( final ActiveRegionWalker walker,
+ final LocusShardDataProvider dataProvider,
+ T sum) {
+ logger.debug(String.format("TraverseActiveRegion.traverse: Shard is %s", dataProvider));
+
+ LocusView locusView = getLocusView( walker, dataProvider );
+
+ int minStart = Integer.MAX_VALUE;
+ final LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
+
+ if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all
+
+ final ArrayList isActiveList = new ArrayList();
+
+ //ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider );
+ ReferenceOrderedView referenceOrderedDataView = null;
+ if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA )
+ referenceOrderedDataView = new ManagingReferenceOrderedView( dataProvider );
+ else
+ referenceOrderedDataView = (RodLocusView)locusView;
+
+ // We keep processing while the next reference location is within the interval
+ while( locusView.hasNext() ) {
+ final AlignmentContext locus = locusView.next();
+ GenomeLoc location = locus.getLocation();
+
+ dataProvider.getShard().getReadMetrics().incrementNumIterations();
+
+ if ( locus.hasExtendedEventPileup() ) {
+ // if the alignment context we received holds an "extended" pileup (i.e. pileup of insertions/deletions
+ // associated with the current site), we need to update the location. The updated location still starts
+ // at the current genomic position, but it has to span the length of the longest deletion (if any).
+ location = engine.getGenomeLocParser().setStop(location,location.getStop()+locus.getExtendedEventPileup().getMaxDeletionLength());
+
+ // it is possible that the new expanded location spans the current shard boundary; the next method ensures
+ // that when it is the case, the reference sequence held by the ReferenceView will be reloaded so that
+ // the view has all the bases we are gonna need. If the location fits within the current view bounds,
+ // the next call will not do anything to the view:
+ referenceView.expandBoundsToAccomodateLoc(location);
+ }
+
+ // create reference context. Note that if we have a pileup of "extended events", the context will
+ // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
+ final ReferenceContext refContext = referenceView.getReferenceContext(location);
+
+ // Iterate forward to get all reference ordered data covering this location
+ final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
+
+ // Call the walkers isActive function for this locus and add them to the list to be integrated later
+ final boolean isActive = walker.isActive( tracker, refContext, locus );
+ isActiveList.add( new ActiveRegion(location, isActive, engine.getGenomeLocParser()) );
+
+ // Grab all the previously unseen reads from this pileup and add them to the massive read list
+ for( final PileupElement p : locus.getBasePileup() ) {
+ final SAMRecord read = p.getRead();
+ if( !myReads.contains(read) ) {
+ myReads.add(read);
+ }
+ }
+
+ // If this is the last pileup for this shard then need to calculate the minimum alignment start so that
+ // we know which active regions in the work queue are now safe to process
+ if( !locusView.hasNext() ) {
+ for( final PileupElement p : locus.getBasePileup() ) {
+ final SAMRecord read = p.getRead();
+ if( read.getAlignmentStart() < minStart ) { minStart = read.getAlignmentStart(); }
+ }
+ }
+ printProgress(dataProvider.getShard(),locus.getLocation());
+ }
+
+ // Take the individual isActive calls and integrate them into contiguous active regions and
+ // add these blocks of work to the work queue
+ final ArrayList activeRegions = integrateActiveList( isActiveList );
+ logger.debug("Integrated " + isActiveList.size() + " isActive calls into " + activeRegions.size() + " regions." );
+ workQueue.addAll( activeRegions );
+ }
+
+ while( workQueue.peek().getLocation().getStop() < minStart ) {
+ final ActiveRegion activeRegion = workQueue.remove();
+ sum = processActiveRegion( activeRegion, myReads, workQueue, sum, walker );
+ }
+
+ return sum;
+ }
+
+ // Special function called in LinearMicroScheduler to empty out the work queue. Ugly for now but will be cleaned up when we push this functionality more into the engine
+ public T endTraversal( final Walker walker, T sum) {
+ while( workQueue.peek() != null ) {
+ final ActiveRegion activeRegion = workQueue.remove();
+ sum = processActiveRegion( activeRegion, myReads, workQueue, sum, (ActiveRegionWalker) walker );
+ }
+
+ return sum;
+ }
+
+ private T processActiveRegion( final ActiveRegion activeRegion, final LinkedHashSet reads, final Queue workQueue, final T sum, final ActiveRegionWalker walker ) {
+ final ArrayList placedReads = new ArrayList();
+ for( final SAMRecord read : reads ) {
+ final GenomeLoc readLoc = this.engine.getGenomeLocParser().createGenomeLoc( read );
+ if( activeRegion.getLocation().overlapsP( readLoc ) ) {
+ // The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region)
+ long maxOverlap = activeRegion.getLocation().sizeOfOverlap( readLoc );
+ ActiveRegion bestRegion = activeRegion;
+ for( final ActiveRegion otherRegionToTest : workQueue ) {
+ if( otherRegionToTest.getLocation().sizeOfOverlap(readLoc) >= maxOverlap ) {
+ maxOverlap = otherRegionToTest.getLocation().sizeOfOverlap(readLoc);
+ bestRegion = otherRegionToTest;
+ }
+ }
+ bestRegion.add( (GATKSAMRecord) read, true );
+
+ // The read is also added to all other region in which it overlaps but marked as non-primary
+ if( !bestRegion.equals(activeRegion) ) {
+ activeRegion.add( (GATKSAMRecord) read, false );
+ }
+ for( final ActiveRegion otherRegionToTest : workQueue ) {
+ if( !bestRegion.equals(otherRegionToTest) && otherRegionToTest.getLocation().overlapsP( readLoc ) ) {
+ activeRegion.add( (GATKSAMRecord) read, false );
+ }
+ }
+ placedReads.add( read );
+ }
+ }
+ reads.removeAll( placedReads ); // remove all the reads which have been placed into their active region
+
+ logger.debug(">> Map call with " + activeRegion.getReads().size() + " " + (activeRegion.isActive ? "active" : "inactive") + " reads @ " + activeRegion.getLocation() + " with full extent: " + activeRegion.getReferenceLocation());
+ final M x = walker.map( activeRegion, null ); // BUGBUG: tracker needs to be filled in and passed to the walker
+ return walker.reduce( x, sum );
+ }
+
+ /**
+ * Gets the best view of loci for this walker given the available data.
+ * @param walker walker to interrogate.
+ * @param dataProvider Data which which to drive the locus view.
+ * @return A view of the locus data, where one iteration of the locus view maps to one iteration of the traversal.
+ */
+ private LocusView getLocusView( Walker walker, LocusShardDataProvider dataProvider ) {
+ DataSource dataSource = WalkerManager.getWalkerDataSource(walker);
+ if( dataSource == DataSource.READS )
+ return new CoveredLocusView(dataProvider);
+ else if( dataSource == DataSource.REFERENCE ) //|| ! GenomeAnalysisEngine.instance.getArguments().enableRodWalkers )
+ return new AllLocusView(dataProvider);
+ else if( dataSource == DataSource.REFERENCE_ORDERED_DATA )
+ return new RodLocusView(dataProvider);
+ else
+ throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource);
+ }
+
+ // integrate active regions into contiguous chunks based on active status
+ private ArrayList integrateActiveList( final ArrayList activeList ) {
+ final ArrayList returnList = new ArrayList();
+ ActiveRegion prevLocus = activeList.remove(0);
+ ActiveRegion startLocus = prevLocus;
+ for( final ActiveRegion thisLocus : activeList ) {
+ if( prevLocus.isActive != thisLocus.isActive ) {
+ returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(startLocus.getLocation().getContig(), startLocus.getLocation().getStart(), prevLocus.getLocation().getStart()),
+ prevLocus.isActive, engine.getGenomeLocParser() ) );
+ startLocus = thisLocus;
+ }
+ prevLocus = thisLocus;
+ }
+ // output the last region if necessary
+ if( startLocus != prevLocus ) {
+ returnList.add( new ActiveRegion( engine.getGenomeLocParser().createGenomeLoc(startLocus.getLocation().getContig(), startLocus.getLocation().getStart(), prevLocus.getLocation().getStart()),
+ prevLocus.isActive, engine.getGenomeLocParser() ) );
+ }
+ return returnList;
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
new file mode 100644
index 000000000..d2891c959
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
@@ -0,0 +1,29 @@
+package org.broadinstitute.sting.gatk.walkers;
+
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: 12/7/11
+ */
+
+@By(DataSource.READS)
+@Requires({DataSource.READS, DataSource.REFERENCE_BASES})
+@PartitionBy(PartitionType.READ)
+public abstract class ActiveRegionWalker extends Walker {
+ // Do we actually want to operate on the context?
+ public boolean filter(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
+ return true; // We are keeping all the reads
+ }
+
+ // Determine active status over the AlignmentContext
+ public abstract boolean isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
+
+ // Map over the ActiveRegion
+ public abstract MapType map(final ActiveRegion activeRegion, final ReadMetaDataTracker metaDataTracker);
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
index b30a25414..ace780dd0 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
@@ -31,6 +31,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@@ -72,25 +73,28 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
this.logger = logger;
}
- /**
- * Must be overridden by concrete subclasses
- *
- * @param tracker rod data
- * @param ref reference context
- * @param contexts stratified alignment contexts
- * @param contextType stratified context type
- * @param priors priors to use for GLs
- * @param alternateAlleleToUse the alternate allele to use, null if not set
- * @param useBAQedPileup should we use the BAQed pileup or the raw one?
- * @return variant context where genotypes are no-called but with GLs
- */
- public abstract VariantContext getLikelihoods(RefMetaDataTracker tracker,
- ReferenceContext ref,
- Map contexts,
- AlignmentContextUtils.ReadOrientation contextType,
- GenotypePriors priors,
- Allele alternateAlleleToUse,
- boolean useBAQedPileup);
+ /**
+ * Can be overridden by concrete subclasses
+ *
+ * @param tracker rod data
+ * @param ref reference context
+ * @param contexts stratified alignment contexts
+ * @param contextType stratified context type
+ * @param priors priors to use for GLs
+ * @param alternateAlleleToUse the alternate allele to use, null if not set
+ * @param useBAQedPileup should we use the BAQed pileup or the raw one?
+ * @param locParser Genome Loc Parser
+ * @return variant context where genotypes are no-called but with GLs
+ */
+ public abstract VariantContext getLikelihoods(RefMetaDataTracker tracker,
+ ReferenceContext ref,
+ Map contexts,
+ AlignmentContextUtils.ReadOrientation contextType,
+ GenotypePriors priors,
+ Allele alternateAlleleToUse,
+ boolean useBAQedPileup,
+ GenomeLocParser locParser);
+
protected int getFilteredDepth(ReadBackedPileup pileup) {
int count = 0;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
index fe2086d47..0756caf03 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
@@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
@@ -54,17 +55,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
private final boolean getAlleleListFromVCF;
private boolean DEBUG = false;
-
+ private final boolean doMultiAllelicCalls;
private boolean ignoreSNPAllelesWhenGenotypingIndels = false;
-
+ private final int maxAlternateAlleles;
private PairHMMIndelErrorModel pairModel;
private static ThreadLocal>> indelLikelihoodMap =
new ThreadLocal>>() {
- protected synchronized HashMap> initialValue() {
- return new HashMap>();
- }
- };
+ protected synchronized HashMap> initialValue() {
+ return new HashMap>();
+ }
+ };
private LinkedHashMap haplotypeMap;
@@ -81,12 +82,14 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
protected IndelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
super(UAC, logger);
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY,UAC.INDEL_GAP_CONTINUATION_PENALTY,
- UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.BANDED_INDEL_COMPUTATION);
+ UAC.OUTPUT_DEBUG_INDEL_INFO, !UAC.DONT_DO_BANDED_INDEL_COMPUTATION);
alleleList = new ArrayList();
getAlleleListFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
minIndelCountForGenotyping = UAC.MIN_INDEL_COUNT_FOR_GENOTYPING;
HAPLOTYPE_SIZE = UAC.INDEL_HAPLOTYPE_SIZE;
DEBUG = UAC.OUTPUT_DEBUG_INDEL_INFO;
+ maxAlternateAlleles = UAC.MAX_ALTERNATE_ALLELES;
+ doMultiAllelicCalls = UAC.MULTI_ALLELIC;
haplotypeMap = new LinkedHashMap();
ignoreSNPAllelesWhenGenotypingIndels = UAC.IGNORE_SNP_ALLELES;
@@ -95,7 +98,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
private ArrayList computeConsensusAlleles(ReferenceContext ref,
Map contexts,
- AlignmentContextUtils.ReadOrientation contextType) {
+ AlignmentContextUtils.ReadOrientation contextType, GenomeLocParser locParser) {
Allele refAllele=null, altAllele=null;
GenomeLoc loc = ref.getLocus();
ArrayList aList = new ArrayList();
@@ -114,7 +117,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
if (insCount < minIndelCountForGenotyping && delCount < minIndelCountForGenotyping)
return aList;
-
+
for ( Map.Entry sample : contexts.entrySet() ) {
// todo -- warning, can be duplicating expensive partition here
AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType);
@@ -126,9 +129,9 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
for ( ExtendedEventPileupElement p : indelPileup.toExtendedIterable() ) {
//SAMRecord read = p.getRead();
- GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
+ GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
if (read == null)
- continue;
+ continue;
if(ReadUtils.is454Read(read)) {
continue;
}
@@ -208,63 +211,69 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
}
}
-/* if (DEBUG) {
- int icount = indelPileup.getNumberOfInsertions();
- int dcount = indelPileup.getNumberOfDeletions();
- if (icount + dcount > 0)
- {
- List> eventStrings = indelPileup.getEventStringsWithCounts(ref.getBases());
- System.out.format("#ins: %d, #del:%d\n", insCount, delCount);
-
- for (int i=0 ; i < eventStrings.size() ; i++ ) {
- System.out.format("%s:%d,",eventStrings.get(i).first,eventStrings.get(i).second);
- // int k=0;
- }
- System.out.println();
- }
- } */
}
+ Collection vcs = new ArrayList();
int maxAlleleCnt = 0;
String bestAltAllele = "";
+
for (String s : consensusIndelStrings.keySet()) {
- int curCnt = consensusIndelStrings.get(s);
- if (curCnt > maxAlleleCnt) {
- maxAlleleCnt = curCnt;
- bestAltAllele = s;
+ int curCnt = consensusIndelStrings.get(s), stop = 0;
+ // if observed count if above minimum threshold, we will genotype this allele
+ if (curCnt < minIndelCountForGenotyping)
+ continue;
+
+ if (s.startsWith("D")) {
+ // get deletion length
+ int dLen = Integer.valueOf(s.substring(1));
+ // get ref bases of accurate deletion
+ int startIdxInReference = 1+loc.getStart()-ref.getWindow().getStart();
+ stop = loc.getStart() + dLen;
+ byte[] refBases = Arrays.copyOfRange(ref.getBases(),startIdxInReference,startIdxInReference+dLen);
+
+ if (Allele.acceptableAlleleBases(refBases)) {
+ refAllele = Allele.create(refBases,true);
+ altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
+ }
+ }
+ else {
+ // insertion case
+ if (Allele.acceptableAlleleBases(s)) {
+ refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
+ altAllele = Allele.create(s, false);
+ stop = loc.getStart();
+ }
}
-// if (DEBUG)
-// System.out.format("Key:%s, number: %d\n",s,consensusIndelStrings.get(s) );
- } //gdebug-
- if (maxAlleleCnt < minIndelCountForGenotyping)
- return aList;
- if (bestAltAllele.startsWith("D")) {
- // get deletion length
- int dLen = Integer.valueOf(bestAltAllele.substring(1));
- // get ref bases of accurate deletion
- int startIdxInReference = 1+loc.getStart()-ref.getWindow().getStart();
+ ArrayList vcAlleles = new ArrayList();
+ vcAlleles.add(refAllele);
+ vcAlleles.add(altAllele);
- //System.out.println(new String(ref.getBases()));
- byte[] refBases = Arrays.copyOfRange(ref.getBases(),startIdxInReference,startIdxInReference+dLen);
+ final VariantContextBuilder builder = new VariantContextBuilder().source("");
+ builder.loc(loc.getContig(), loc.getStart(), stop);
+ builder.alleles(vcAlleles);
+ builder.referenceBaseForIndel(ref.getBase());
+ builder.noGenotypes();
+ if (doMultiAllelicCalls)
+ vcs.add(builder.make());
+ else {
+ if (curCnt > maxAlleleCnt) {
+ maxAlleleCnt = curCnt;
+ vcs.clear();
+ vcs.add(builder.make());
+ }
- if (Allele.acceptableAlleleBases(refBases)) {
- refAllele = Allele.create(refBases,true);
- altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
}
}
- else {
- // insertion case
- if (Allele.acceptableAlleleBases(bestAltAllele)) {
- refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
- altAllele = Allele.create(bestAltAllele, false);
- }
- }
- if (refAllele != null && altAllele != null) {
- aList.add(0,refAllele);
- aList.add(1,altAllele);
- }
+
+ if (vcs.isEmpty())
+ return aList; // nothing else to do, no alleles passed minimum count criterion
+
+ VariantContext mergedVC = VariantContextUtils.simpleMerge(locParser, vcs, null, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.UNSORTED, false, false, null, false, false);
+
+ aList = new ArrayList(mergedVC.getAlleles());
+
return aList;
}
@@ -277,7 +286,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
AlignmentContextUtils.ReadOrientation contextType,
GenotypePriors priors,
Allele alternateAlleleToUse,
- boolean useBAQedPileup) {
+ boolean useBAQedPileup, GenomeLocParser locParser) {
if ( tracker == null )
return null;
@@ -294,17 +303,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
haplotypeMap.clear();
if (getAlleleListFromVCF) {
- for( final VariantContext vc_input : tracker.getValues(UAC.alleles, loc) ) {
- if( vc_input != null &&
- allowableTypes.contains(vc_input.getType()) &&
- ref.getLocus().getStart() == vc_input.getStart()) {
- vc = vc_input;
- break;
- }
- }
- // ignore places where we don't have a variant
- if ( vc == null )
- return null;
+ for( final VariantContext vc_input : tracker.getValues(UAC.alleles, loc) ) {
+ if( vc_input != null &&
+ allowableTypes.contains(vc_input.getType()) &&
+ ref.getLocus().getStart() == vc_input.getStart()) {
+ vc = vc_input;
+ break;
+ }
+ }
+ // ignore places where we don't have a variant
+ if ( vc == null )
+ return null;
alleleList.clear();
if (ignoreSNPAllelesWhenGenotypingIndels) {
@@ -323,7 +332,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
}
else {
- alleleList = computeConsensusAlleles(ref,contexts, contextType);
+ alleleList = computeConsensusAlleles(ref,contexts, contextType, locParser);
if (alleleList.isEmpty())
return null;
}
@@ -340,7 +349,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
if (alleleList.isEmpty())
return null;
-
+
refAllele = alleleList.get(0);
altAllele = alleleList.get(1);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
index eee89674a..81c766e4d 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
@@ -30,10 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.utils.BaseUtils;
-import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.MathUtils;
-import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.StingException;
@@ -66,7 +63,8 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
final AlignmentContextUtils.ReadOrientation contextType,
final GenotypePriors priors,
final Allele alternateAlleleToUse,
- final boolean useBAQedPileup) {
+ final boolean useBAQedPileup,
+ final GenomeLocParser locParser) {
if ( !(priors instanceof DiploidSNPGenotypePriors) )
throw new StingException("Only diploid-based SNP priors are supported in the SNP GL model");
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
index 5713432b4..16159393f 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
@@ -109,7 +109,7 @@ public class UnifiedArgumentCollection {
* For advanced users only.
*/
@Advanced
- @Argument(fullName = "multiallelic", shortName = "multiallelic", doc = "Allow the discovery of multiple alleles (SNPs only)", required = false)
+ @Argument(fullName = "multiallelic", shortName = "multiallelic", doc = "Allow the discovery of multiple alleles", required = false)
public boolean MULTI_ALLELIC = false;
/**
@@ -146,8 +146,8 @@ public class UnifiedArgumentCollection {
public int INDEL_HAPLOTYPE_SIZE = 80;
@Hidden
- @Argument(fullName = "bandedIndel", shortName = "bandedIndel", doc = "Banded Indel likelihood computation", required = false)
- public boolean BANDED_INDEL_COMPUTATION = false;
+ @Argument(fullName = "noBandedIndel", shortName = "noBandedIndel", doc = "Don't do Banded Indel likelihood computation", required = false)
+ public boolean DONT_DO_BANDED_INDEL_COMPUTATION = false;
@Hidden
@Argument(fullName = "indelDebug", shortName = "indelDebug", doc = "Output indel debug info", required = false)
@@ -184,7 +184,7 @@ public class UnifiedArgumentCollection {
// todo- arguments to remove
uac.IGNORE_SNP_ALLELES = IGNORE_SNP_ALLELES;
- uac.BANDED_INDEL_COMPUTATION = BANDED_INDEL_COMPUTATION;
+ uac.DONT_DO_BANDED_INDEL_COMPUTATION = DONT_DO_BANDED_INDEL_COMPUTATION;
uac.MULTI_ALLELIC = MULTI_ALLELIC;
return uac;
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
index 34be88dbb..5d73e8d28 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@@ -243,7 +243,7 @@ public class UnifiedGenotyperEngine {
glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
}
- return glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine);
+ return glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser);
}
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, AlignmentContext rawContext) {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java
index a271d3c35..3c7c6f00c 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java
@@ -46,6 +46,7 @@ import java.util.*;
public class VariantSummary extends VariantEvaluator implements StandardEval {
final protected static Logger logger = Logger.getLogger(VariantSummary.class);
+ /** Indels with size greater than this value are tallied in the CNV column */
private final static int MAX_INDEL_LENGTH = 50;
private final static double MIN_CNV_OVERLAP = 0.5;
private VariantEvalWalker walker;
@@ -196,14 +197,16 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
}
private final boolean overlapsKnownCNV(VariantContext cnv) {
- final GenomeLoc loc = walker.getGenomeLocParser().createGenomeLoc(cnv, true);
- IntervalTree intervalTree = knownCNVs.get(loc.getContig());
+ if ( knownCNVs != null ) {
+ final GenomeLoc loc = walker.getGenomeLocParser().createGenomeLoc(cnv, true);
+ IntervalTree intervalTree = knownCNVs.get(loc.getContig());
- final Iterator> nodeIt = intervalTree.overlappers(loc.getStart(), loc.getStop());
- while ( nodeIt.hasNext() ) {
- final double overlapP = loc.reciprocialOverlapFraction(nodeIt.next().getValue());
- if ( overlapP > MIN_CNV_OVERLAP )
- return true;
+ final Iterator> nodeIt = intervalTree.overlappers(loc.getStart(), loc.getStop());
+ while ( nodeIt.hasNext() ) {
+ final double overlapP = loc.reciprocialOverlapFraction(nodeIt.next().getValue());
+ if ( overlapP > MIN_CNV_OVERLAP )
+ return true;
+ }
}
return false;
@@ -224,7 +227,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
allVariantCounts.inc(type, ALL);
// type specific calculations
- if ( type == Type.SNP ) {
+ if ( type == Type.SNP && eval.isBiallelic() ) {
titvTable = VariantContextUtils.isTransition(eval) ? transitionsPerSample : transversionsPerSample;
titvTable.inc(type, ALL);
}
diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java
index cdfc329e8..71640c66a 100644
--- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java
+++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010, The Broad Institute
+ * Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@@ -70,17 +70,18 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
" * Short name of %1$s%n" +
" * @return Short name of %1$s%n" +
" */%n" +
- "def %3$s = this.%1$s%n" +
+ "%5$sdef %3$s = this.%1$s%n" +
"%n" +
"/**%n" +
" * Short name of %1$s%n" +
" * @param value Short name of %1$s%n" +
" */%n" +
- "def %4$s(value: %2$s) { this.%1$s = value }%n",
+ "%5$sdef %4$s(value: %2$s) { this.%1$s = value }%n",
getFieldName(),
getFieldType(),
getShortFieldGetter(),
- getShortFieldSetter());
+ getShortFieldSetter(),
+ getPrivacy());
}
protected static final String REQUIRED_TEMPLATE = " + required(\"%1$s\", %3$s, spaceSeparated=true, escape=true, format=%2$s)";
@@ -135,11 +136,8 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
new IntervalFileArgumentField(argumentDefinition),
new IntervalStringArgumentField(argumentDefinition));
- // ROD Bindings are set by the RodBindField
- } else if (RodBindArgumentField.ROD_BIND_FIELD.equals(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) {
- // TODO: Once everyone is using @Allows and @Requires correctly, we can stop blindly allowing Triplets
- return Arrays.asList(new RodBindArgumentField(argumentDefinition), new InputIndexesArgumentField(argumentDefinition, Tribble.STANDARD_INDEX_EXTENSION));
- //return Collections.emptyList();
+ } else if (NumThreadsArgumentField.NUM_THREADS_FIELD.equals(argumentDefinition.fullName)) {
+ return Arrays.asList(new NumThreadsArgumentField(argumentDefinition));
} else if ("input_file".equals(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) {
return Arrays.asList(new InputTaggedFileDefinitionField(argumentDefinition), new InputIndexesArgumentField(argumentDefinition, BAMIndex.BAMIndexSuffix, ".bam"));
@@ -166,10 +164,13 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
fields.add(new OutputArgumentField(argumentDefinition, gatherClass));
- if (SAMFileWriter.class.isAssignableFrom(argumentDefinition.argumentType))
+ if (SAMFileWriter.class.isAssignableFrom(argumentDefinition.argumentType)) {
fields.add(new SAMFileWriterIndexArgumentField(argumentDefinition));
- else if (VCFWriter.class.isAssignableFrom(argumentDefinition.argumentType))
+ fields.add(new SAMFileWriterMD5ArgumentField(argumentDefinition));
+ }
+ else if (VCFWriter.class.isAssignableFrom(argumentDefinition.argumentType)) {
fields.add(new VCFWriterIndexArgumentField(argumentDefinition));
+ }
return fields;
@@ -228,7 +229,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
@Override protected String getRawFieldName() { return super.getRawFieldName() + "String"; }
@Override protected String getFullName() { return super.getFullName() + "String"; }
@Override protected String getRawShortFieldName() { return super.getRawShortFieldName() + "String"; }
- @Override protected String getFieldType() { return "List[String]"; }
+ @Override protected String getFieldType() { return "Seq[String]"; }
@Override protected String getDefaultValue() { return "Nil"; }
@Override public String getCommandLineTemplate() { return REPEAT_TEMPLATE; }
@@ -250,7 +251,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
}
@Override protected Class> getInnerType() { return File.class; }
- @Override protected String getFieldType() { return isMultiValued() ? "List[File]" : "File"; }
+ @Override protected String getFieldType() { return isMultiValued() ? "Seq[File]" : "File"; }
@Override protected String getDefaultValue() { return isMultiValued() ? "Nil" : "_"; }
}
@@ -294,7 +295,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
}
@Override protected Class> getInnerType() { return mapType(argumentDefinition.componentType); }
- @Override protected String getFieldType() { return String.format("List[%s]", getType(getInnerType())); }
+ @Override protected String getFieldType() { return String.format("Seq[%s]", getType(getInnerType())); }
@Override protected String getDefaultValue() { return "Nil"; }
@Override protected String getCommandLineTemplate() { return REPEAT_TEMPLATE; }
}
@@ -336,17 +337,16 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
}
// Allows the user to specify the track name, track type, and the file.
- public static class RodBindArgumentField extends ArgumentDefinitionField {
- public static final String ROD_BIND_FIELD = "rodBind";
+ public static class NumThreadsArgumentField extends OptionedArgumentField {
+ public static final String NUM_THREADS_FIELD = "num_threads";
- public RodBindArgumentField(ArgumentDefinition argumentDefinition) {
- super(argumentDefinition);
+ public NumThreadsArgumentField(ArgumentDefinition argumentDefinition) {
+ super(argumentDefinition, false);
}
- @Override protected Class> getInnerType() { return null; } // RodBind does not need to be imported.
- @Override protected String getFieldType() { return "List[RodBind]"; }
- @Override protected String getDefaultValue() { return "Nil"; }
- @Override protected String getCommandLineTemplate() {
- return " + repeat(\"%1$s\", %3$s, formatPrefix=RodBind.formatCommandLineParameter, spaceSeparated=true, escape=true, format=%2$s)";
+
+ @Override
+ protected String getFreezeFields() {
+ return String.format("if (num_threads.isDefined) nCoresRequest = num_threads%n");
}
}
@@ -356,7 +356,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
super(argumentDefinition);
}
@Override protected Class> getInnerType() { return null; } // TaggedFile does not need to be imported.
- @Override protected String getFieldType() { return argumentDefinition.isMultiValued ? "List[File]" : "File"; }
+ @Override protected String getFieldType() { return argumentDefinition.isMultiValued ? "Seq[File]" : "File"; }
@Override protected String getDefaultValue() { return argumentDefinition.isMultiValued ? "Nil" : "_"; }
@Override protected String getCommandLineTemplate() {
if (argumentDefinition.isMultiValued) {
@@ -395,10 +395,11 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
}
@Override protected String getFullName() { return this.indexFieldName; }
@Override protected boolean isRequired() { return false; }
- @Override protected String getFieldType() { return "List[File]"; }
+ @Override protected String getFieldType() { return "Seq[File]"; }
@Override protected String getDefaultValue() { return "Nil"; }
@Override protected Class> getInnerType() { return File.class; }
@Override protected String getRawFieldName() { return this.indexFieldName; }
+ @Override protected String getPrivacy() { return "private "; }
@Override protected String getFreezeFields() {
if (originalIsMultiValued) {
if (originalSuffix == null) {
@@ -434,53 +435,69 @@ public abstract class ArgumentDefinitionField extends ArgumentField {
}
}
- // Tracks an automatically generated index
- private static abstract class OutputIndexArgumentField extends ArgumentField {
- protected final String indexFieldName;
+ // Tracks an automatically generated index, md5, etc.
+ private static abstract class AuxilliaryOutputArgumentField extends ArgumentField {
protected final String originalFieldName;
- public OutputIndexArgumentField(ArgumentDefinition originalArgumentDefinition) {
- this.indexFieldName = originalArgumentDefinition.fullName + "Index";
+ protected final String auxFieldName;
+ protected final String auxFieldLabel;
+ public AuxilliaryOutputArgumentField(ArgumentDefinition originalArgumentDefinition, String auxFieldLabel) {
this.originalFieldName = originalArgumentDefinition.fullName;
+ this.auxFieldName = originalArgumentDefinition.fullName + auxFieldLabel;
+ this.auxFieldLabel = auxFieldLabel;
}
@Override protected Class extends Annotation> getAnnotationIOClass() { return Output.class; }
@Override public String getCommandLineAddition() { return ""; }
- @Override protected String getDoc() { return "Automatically generated index for " + this.originalFieldName; }
- @Override protected String getFullName() { return this.indexFieldName; }
+ @Override protected String getDoc() { return String.format("Automatically generated %s for %s", auxFieldLabel.toLowerCase(), this.originalFieldName); }
+ @Override protected String getFullName() { return this.auxFieldName; }
@Override protected boolean isRequired() { return false; }
@Override protected String getFieldType() { return "File"; }
@Override protected String getDefaultValue() { return "_"; }
@Override protected Class> getInnerType() { return File.class; }
- @Override protected String getRawFieldName() { return this.indexFieldName; }
+ @Override protected String getRawFieldName() { return this.auxFieldName; }
+ @Override protected String getPrivacy() { return "private "; }
@Override public boolean isGather() { return true; }
@Override protected String getGatherAnnotation() {
- return String.format("@Gather(classOf[AutoIndexGatherFunction])%n");
+ return String.format("@Gather(enabled=false)%n");
}
}
- private static class VCFWriterIndexArgumentField extends OutputIndexArgumentField {
+ private static class VCFWriterIndexArgumentField extends AuxilliaryOutputArgumentField {
public VCFWriterIndexArgumentField(ArgumentDefinition originalArgumentDefinition) {
- super(originalArgumentDefinition);
+ super(originalArgumentDefinition, "Index");
}
@Override protected String getFreezeFields() {
return String.format(
("if (%2$s != null)%n" +
" if (!org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor.isCompressed(%2$s.getPath))%n" +
" %1$s = new File(%2$s.getPath + \"%3$s\")%n"),
- indexFieldName, originalFieldName, Tribble.STANDARD_INDEX_EXTENSION);
+ auxFieldName, originalFieldName, Tribble.STANDARD_INDEX_EXTENSION);
}
}
- private static class SAMFileWriterIndexArgumentField extends OutputIndexArgumentField {
+ private static class SAMFileWriterIndexArgumentField extends AuxilliaryOutputArgumentField {
public SAMFileWriterIndexArgumentField(ArgumentDefinition originalArgumentDefinition) {
- super(originalArgumentDefinition);
+ super(originalArgumentDefinition, "Index");
}
@Override protected String getFreezeFields() {
return String.format(
("if (%2$s != null)%n" +
" if (!%3$s)%n" +
" %1$s = new File(%2$s.getPath.stripSuffix(\".bam\") + \"%4$s\")%n"),
- indexFieldName, originalFieldName, SAMFileWriterArgumentTypeDescriptor.DISABLE_INDEXING_FULLNAME, BAMIndex.BAMIndexSuffix);
+ auxFieldName, originalFieldName, SAMFileWriterArgumentTypeDescriptor.DISABLE_INDEXING_FULLNAME, BAMIndex.BAMIndexSuffix);
+ }
+ }
+
+ private static class SAMFileWriterMD5ArgumentField extends AuxilliaryOutputArgumentField {
+ public SAMFileWriterMD5ArgumentField(ArgumentDefinition originalArgumentDefinition) {
+ super(originalArgumentDefinition, "MD5");
+ }
+ @Override protected String getFreezeFields() {
+ return String.format(
+ ("if (%2$s != null)%n" +
+ " if (%3$s)%n" +
+ " %1$s = new File(%2$s.getPath + \"%4$s\")%n"),
+ auxFieldName, originalFieldName, SAMFileWriterArgumentTypeDescriptor.ENABLE_MD5_FULLNAME, ".md5");
}
}
diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java
index e90933504..2428a13a8 100644
--- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java
+++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentField.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010, The Broad Institute
+ * Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@@ -56,7 +56,7 @@ public abstract class ArgumentField {
return String.format("%n" +
"/** %s */%n" +
"@%s(fullName=\"%s\", shortName=\"%s\", doc=\"%s\", required=%s, exclusiveOf=\"%s\", validation=\"%s\")%n" +
- "%svar %s: %s = %s%n" +
+ "%s%svar %s: %s = %s%n" +
"%s",
getDoc(),
getAnnotationIOClass().getSimpleName(),
@@ -66,7 +66,7 @@ public abstract class ArgumentField {
isRequired(),
getExclusiveOf(),
getValidation(),
- getGatherAnnotation(), getFieldName(), getFieldType(), getDefaultValue(),
+ getGatherAnnotation(), getPrivacy(), getFieldName(), getFieldType(), getDefaultValue(),
getDefineAddition());
}
@@ -143,6 +143,9 @@ public abstract class ArgumentField {
/** @return True if this field uses @Gather. */
public boolean isGather() { return false; }
+ /** @return Privacy for the field. */
+ protected String getPrivacy() { return ""; }
+
/** @return The raw field name, which will be checked against scala build in types. */
protected abstract String getRawFieldName();
/** @return The field name checked against reserved words. */
diff --git a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java
index 9c40fb976..a3f80af1c 100644
--- a/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java
+++ b/public/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010, The Broad Institute
+ * Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@@ -34,13 +34,11 @@ import org.broadinstitute.sting.commandline.ParsingEngine;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.WalkerManager;
-import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.filters.FilterManager;
import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
-import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.walkers.PartitionBy;
import org.broadinstitute.sting.gatk.walkers.PartitionType;
import org.broadinstitute.sting.gatk.walkers.Walker;
@@ -85,7 +83,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
"%n" +
"/** A dynamicly generated list of classes that the GATK Extensions depend on, but are not be detected by default by BCEL. */%n" +
"class %s {%n" +
- "val types = List(%n%s)%n" +
+ "val types = Seq(%n%s)%n" +
"}%n";
@Output(fullName="output_directory", shortName="outDir", doc="Directory to output the generated scala", required=true)
@@ -95,10 +93,6 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
GenomeAnalysisEngine GATKEngine = new GenomeAnalysisEngine();
WalkerManager walkerManager = new WalkerManager();
FilterManager filterManager = new FilterManager();
- // HACK: We're currently relying on the fact that RMDTrackBuilder is used only from RMD type lookups, not
- // RMD track location. Therefore, no sequence dictionary is required. In the future, we should separate
- // RMD track lookups from track creation.
- RMDTrackBuilder trackBuilder = new RMDTrackBuilder(null,null,ValidationExclusion.TYPE.ALL);
/**
* Required main method implementation.
@@ -147,7 +141,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
String clpConstructor = String.format("analysisName = \"%s\"%njavaMainClass = \"%s\"%n", clpClassName, clp.getName());
writeClass("org.broadinstitute.sting.queue.function.JavaCommandLineFunction", clpClassName,
- false, clpConstructor, ArgumentDefinitionField.getArgumentFields(parser,clp), dependents, false);
+ false, clpConstructor, ArgumentDefinitionField.getArgumentFields(parser,clp), dependents);
if (clp == CommandLineGATK.class) {
for (Entry>> walkersByPackage: walkerManager.getWalkerNamesByPackage(false).entrySet()) {
@@ -169,7 +163,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
}
writeClass(GATK_EXTENSIONS_PACKAGE_NAME + "." + clpClassName, walkerName,
- isScatter, constructor, argumentFields, dependents, true);
+ isScatter, constructor, argumentFields, dependents);
} catch (Exception e) {
throw new ReviewedStingException("Error generating wrappers for walker " + walkerType, e);
}
@@ -242,8 +236,8 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
*/
private void writeClass(String baseClass, String className, boolean isScatter,
String constructor, List extends ArgumentField> argumentFields,
- Set> dependents, boolean isGATKWalker) throws IOException {
- String content = getContent(CLASS_TEMPLATE, baseClass, className, constructor, isScatter, "", argumentFields, dependents, isGATKWalker);
+ Set> dependents) throws IOException {
+ String content = getContent(CLASS_TEMPLATE, baseClass, className, constructor, isScatter, "", argumentFields, dependents);
writeFile(GATK_EXTENSIONS_PACKAGE_NAME + "." + className, content);
}
@@ -257,7 +251,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
*/
private void writeFilter(String className, List extends ArgumentField> argumentFields, Set> dependents) throws IOException {
String content = getContent(TRAIT_TEMPLATE, "org.broadinstitute.sting.queue.function.CommandLineFunction",
- className, "", false, String.format(" + \" -read_filter %s\"", className), argumentFields, dependents, false);
+ className, "", false, String.format(" + \" -read_filter %s\"", className), argumentFields, dependents);
writeFile(GATK_EXTENSIONS_PACKAGE_NAME + "." + className, content);
}
@@ -351,8 +345,7 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
*/
private static String getContent(String scalaTemplate, String baseClass, String className,
String constructor, boolean isScatter, String commandLinePrefix,
- List extends ArgumentField> argumentFields, Set> dependents,
- boolean isGATKWalker) {
+ List extends ArgumentField> argumentFields, Set> dependents) {
StringBuilder arguments = new StringBuilder();
StringBuilder commandLine = new StringBuilder(commandLinePrefix);
@@ -376,9 +369,6 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
if (isGather)
importSet.add("import org.broadinstitute.sting.commandline.Gather");
- // Needed for ShellUtils.escapeShellArgument()
- importSet.add("import org.broadinstitute.sting.queue.util.ShellUtils");
-
// Sort the imports so that the are always in the same order.
List sortedImports = new ArrayList(importSet);
Collections.sort(sortedImports);
@@ -386,10 +376,8 @@ public class GATKExtensionsGenerator extends CommandLineProgram {
StringBuffer freezeFieldOverride = new StringBuffer();
for (String freezeField: freezeFields)
freezeFieldOverride.append(freezeField);
- if (freezeFieldOverride.length() > 0 || isGATKWalker) {
- freezeFieldOverride.insert(0, String.format("override def freezeFieldValues = {%nsuper.freezeFieldValues%n"));
- if ( isGATKWalker )
- freezeFieldOverride.append(String.format("if ( num_threads.isDefined ) nCoresRequest = num_threads%n"));
+ if (freezeFieldOverride.length() > 0) {
+ freezeFieldOverride.insert(0, String.format("override def freezeFieldValues() {%nsuper.freezeFieldValues()%n"));
freezeFieldOverride.append(String.format("}%n%n"));
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java b/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java
index 345161416..6941b888b 100644
--- a/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java
+++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java
@@ -145,7 +145,7 @@ public class GenomeLoc implements Comparable, Serializable, HasGenome
}
return new GenomeLoc(getContig(), this.contigIndex,
- Math.min(getStart(), that.getStart()),
+ Math.min( getStart(), that.getStart() ),
Math.max( getStop(), that.getStop()) );
}
@@ -465,4 +465,8 @@ public class GenomeLoc implements Comparable, Serializable, HasGenome
private final static double overlapPercent(final GenomeLoc gl1, final GenomeLoc gl2) {
return (1.0 * gl1.intersect(gl2).size()) / gl1.size();
}
+
+ public long sizeOfOverlap( final GenomeLoc that ) {
+ return ( this.overlapsP(that) ? Math.min( getStop(), that.getStop() ) - Math.max( getStart(), that.getStart() ) : 0L );
+ }
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRead.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRead.java
new file mode 100644
index 000000000..8d08a29b6
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRead.java
@@ -0,0 +1,19 @@
+package org.broadinstitute.sting.utils.activeregion;
+
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: 1/4/12
+ */
+
+public class ActiveRead {
+ final public GATKSAMRecord read;
+ final public boolean isPrimaryRegion;
+
+ ActiveRead( final GATKSAMRecord read, final boolean isPrimaryRegion ) {
+ this.read = read;
+ this.isPrimaryRegion = isPrimaryRegion;
+ }
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java
new file mode 100644
index 000000000..e8908480c
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java
@@ -0,0 +1,55 @@
+package org.broadinstitute.sting.utils.activeregion;
+
+import net.sf.picard.reference.IndexedFastaSequenceFile;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocParser;
+import org.broadinstitute.sting.utils.HasGenomeLocation;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+
+import java.util.ArrayList;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: 1/4/12
+ */
+
+public class ActiveRegion implements HasGenomeLocation {
+
+ private final ArrayList reads = new ArrayList();
+ private byte[] reference = null;
+ private final GenomeLoc loc;
+ private GenomeLoc referenceLoc = null;
+ private final GenomeLocParser genomeLocParser;
+ public final boolean isActive;
+
+ public ActiveRegion( final GenomeLoc loc, final boolean isActive, final GenomeLocParser genomeLocParser ) {
+ this.loc = loc;
+ this.isActive = isActive;
+ this.genomeLocParser = genomeLocParser;
+ referenceLoc = loc;
+ }
+
+ // add each read to the bin and extend the reference genome loc if needed
+ public void add( final GATKSAMRecord read, final boolean isPrimaryRegion ) {
+ referenceLoc = referenceLoc.union( genomeLocParser.createGenomeLoc( read ) );
+ reads.add( new ActiveRead(read, isPrimaryRegion) );
+ }
+
+ public ArrayList getReads() { return reads; }
+
+ public byte[] getReference( final IndexedFastaSequenceFile referenceReader ) {
+ // set up the reference if we haven't done so yet
+ if ( reference == null ) {
+ reference = referenceReader.getSubsequenceAt(referenceLoc.getContig(), referenceLoc.getStart(), referenceLoc.getStop()).getBases();
+ }
+
+ return reference;
+ }
+
+ public GenomeLoc getLocation() { return loc; }
+
+ public GenomeLoc getReferenceLocation() { return referenceLoc; }
+
+ public int size() { return reads.size(); }
+}
\ No newline at end of file
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
index 646ede836..d89c9ef6f 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@@ -265,7 +265,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
- Arrays.asList("fa4f3ee67d98b64102a8a3ec81a3bc81"));
+ Arrays.asList("c60a44ba94a80a0cb1fba8b6f90a13cd"));
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1);
}
@@ -275,7 +275,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
- Arrays.asList("df90890e43d735573a3b3e4f289ca46b"));
+ Arrays.asList("36ce53ae4319718ad9c8ae391deebc8c"));
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
}
@@ -285,7 +285,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
- Arrays.asList("cff6dd0f4eb1ef0b6fc476da6ffead19"));
+ Arrays.asList("d356cbaf240d7025d1aecdabaff3a3e0"));
executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
}
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
index b3555b145..5c3a43c96 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
@@ -450,4 +450,21 @@ public class VariantEvalIntegrationTest extends WalkerTest {
);
executeTest("testIntervalStrat", spec);
}
+
+ @Test
+ public void testModernVCFWithLargeIndels() {
+ WalkerTestSpec spec = new WalkerTestSpec(
+ buildCommandLine(
+ "-T VariantEval",
+ "-R " + b37KGReference,
+ "-eval " + validationDataLocation + "/NA12878.HiSeq.WGS.b37_decoy.indel.recalibrated.vcf",
+ "-L 20",
+ "-D " + b37dbSNP132,
+ "-o %s"
+ ),
+ 1,
+ Arrays.asList("a6f8b32fa732632da13dfe3ddcc73cef")
+ );
+ executeTest("testModernVCFWithLargeIndels", spec);
+ }
}
diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala
index 621afe817..e26541e98 100755
--- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala
@@ -29,14 +29,14 @@ class DataProcessingPipeline extends QScript {
var reference: File = _
@Input(doc="dbsnp ROD to use (must be in VCF format)", fullName="dbsnp", shortName="D", required=true)
- var dbSNP: List[File] = List()
+ var dbSNP: Seq[File] = Seq()
/****************************************************************************
* Optional Parameters
****************************************************************************/
@Input(doc="extra VCF files to use as reference indels for Indel Realignment", fullName="extra_indels", shortName="indels", required=false)
- var indels: List[File] = List()
+ var indels: Seq[File] = Seq()
@Input(doc="The path to the binary of bwa (usually BAM files have already been mapped - but if you want to remap this is the option)", fullName="path_to_bwa", shortName="bwa", required=false)
var bwaPath: File = _
@@ -118,13 +118,13 @@ class DataProcessingPipeline extends QScript {
// Because the realignment only happens after these scripts are executed, in case you are using
// bwa realignment, this function will operate over the original bam files and output over the
// (to be realigned) bam files.
- def createSampleFiles(bamFiles: List[File], realignedBamFiles: List[File]): Map[String, List[File]] = {
+ def createSampleFiles(bamFiles: Seq[File], realignedBamFiles: Seq[File]): Map[String, Seq[File]] = {
// Creating a table with SAMPLE information from each input BAM file
- val sampleTable = scala.collection.mutable.Map.empty[String, List[File]]
+ val sampleTable = scala.collection.mutable.Map.empty[String, Seq[File]]
val realignedIterator = realignedBamFiles.iterator
for (bam <- bamFiles) {
- val rBam = realignedIterator.next // advance to next element in the realignedBam list so they're in sync.
+ val rBam = realignedIterator.next() // advance to next element in the realignedBam list so they're in sync.
val samReader = new SAMFileReader(bam)
val header = samReader.getFileHeader
@@ -138,12 +138,12 @@ class DataProcessingPipeline extends QScript {
for (rg <- readGroups) {
val sample = rg.getSample
if (!sampleTable.contains(sample))
- sampleTable(sample) = List(rBam)
+ sampleTable(sample) = Seq(rBam)
else if ( !sampleTable(sample).contains(rBam))
sampleTable(sample) :+= rBam
}
}
- return sampleTable.toMap
+ sampleTable.toMap
}
// Rebuilds the Read Group string to give BWA
@@ -161,8 +161,8 @@ class DataProcessingPipeline extends QScript {
// Takes a list of processed BAM files and realign them using the BWA option requested (bwase or bwape).
// Returns a list of realigned BAM files.
- def performAlignment(bams: List[File]): List[File] = {
- var realignedBams: List[File] = List()
+ def performAlignment(bams: Seq[File]): Seq[File] = {
+ var realignedBams: Seq[File] = Seq()
var index = 1
for (bam <- bams) {
// first revert the BAM file to the original qualities
@@ -194,10 +194,10 @@ class DataProcessingPipeline extends QScript {
realignedBams :+= rgRealignedBamFile
index = index + 1
}
- return realignedBams
+ realignedBams
}
- def getIndelCleaningModel(): ConsensusDeterminationModel = {
+ def getIndelCleaningModel: ConsensusDeterminationModel = {
if (cleaningModel == "KNOWNS_ONLY")
ConsensusDeterminationModel.KNOWNS_ONLY
else if (cleaningModel == "USE_SW")
@@ -206,17 +206,17 @@ class DataProcessingPipeline extends QScript {
ConsensusDeterminationModel.USE_READS
}
- def revertBams(bams: List[File], removeAlignmentInformation: Boolean): List[File] = {
- var revertedBAMList: List[File] = List()
+ def revertBams(bams: Seq[File], removeAlignmentInformation: Boolean): Seq[File] = {
+ var revertedBAMList: Seq[File] = Seq()
for (bam <- bams)
revertedBAMList :+= revertBAM(bam, removeAlignmentInformation)
- return revertedBAMList
+ revertedBAMList
}
def revertBAM(bam: File, removeAlignmentInformation: Boolean): File = {
val revertedBAM = swapExt(bam, ".bam", ".reverted.bam")
add(revert(bam, revertedBAM, removeAlignmentInformation))
- return revertedBAM
+ revertedBAM
}
/****************************************************************************
@@ -224,22 +224,22 @@ class DataProcessingPipeline extends QScript {
****************************************************************************/
- def script = {
+ def script() {
// final output list of processed bam files
- var cohortList: List[File] = List()
+ var cohortList: Seq[File] = Seq()
// sets the model for the Indel Realigner
- cleanModelEnum = getIndelCleaningModel()
+ cleanModelEnum = getIndelCleaningModel
// keep a record of the number of contigs in the first bam file in the list
- val bams = QScriptUtils.createListFromFile(input)
+ val bams = QScriptUtils.createSeqFromFile(input)
if (nContigs < 0)
nContigs = QScriptUtils.getNumberOfContigs(bams(0))
val realignedBAMs = if (useBWApe || useBWAse || useBWAsw) {performAlignment(bams)} else {revertBams(bams, false)}
// generate a BAM file per sample joining all per lane files if necessary
- val sampleBAMFiles: Map[String, List[File]] = createSampleFiles(bams, realignedBAMs)
+ val sampleBAMFiles: Map[String, Seq[File]] = createSampleFiles(bams, realignedBAMs)
// if this is a 'knowns only' indel realignment run, do it only once for all samples.
val globalIntervals = new File(outputDir + projectName + ".intervals")
@@ -317,7 +317,7 @@ class DataProcessingPipeline extends QScript {
this.maxRecordsInRam = 100000
}
- case class target (inBams: List[File], outIntervals: File) extends RealignerTargetCreator with CommandLineGATKArgs {
+ case class target (inBams: Seq[File], outIntervals: File) extends RealignerTargetCreator with CommandLineGATKArgs {
if (cleanModelEnum != ConsensusDeterminationModel.KNOWNS_ONLY)
this.input_file = inBams
this.out = outIntervals
@@ -330,7 +330,7 @@ class DataProcessingPipeline extends QScript {
this.jobName = queueLogDir + outIntervals + ".target"
}
- case class clean (inBams: List[File], tIntervals: File, outBam: File) extends IndelRealigner with CommandLineGATKArgs {
+ case class clean (inBams: Seq[File], tIntervals: File, outBam: File) extends IndelRealigner with CommandLineGATKArgs {
this.input_file = inBams
this.targetIntervals = tIntervals
this.out = outBam
@@ -347,11 +347,11 @@ class DataProcessingPipeline extends QScript {
case class cov (inBam: File, outRecalFile: File) extends CountCovariates with CommandLineGATKArgs {
this.knownSites ++= qscript.dbSNP
- this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate")
+ this.covariate ++= Seq("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate")
this.input_file :+= inBam
this.recal_file = outRecalFile
if (!defaultPlatform.isEmpty) this.default_platform = defaultPlatform
- if (!qscript.intervalString.isEmpty()) this.intervalsString ++= List(qscript.intervalString)
+ if (!qscript.intervalString.isEmpty) this.intervalsString ++= Seq(qscript.intervalString)
else if (qscript.intervals != null) this.intervals :+= qscript.intervals
this.scatterCount = nContigs
this.analysisName = queueLogDir + outRecalFile + ".covariates"
@@ -363,7 +363,7 @@ class DataProcessingPipeline extends QScript {
this.recal_file = inRecalFile
this.baq = CalculationMode.CALCULATE_AS_NECESSARY
this.out = outBam
- if (!qscript.intervalString.isEmpty()) this.intervalsString ++= List(qscript.intervalString)
+ if (!qscript.intervalString.isEmpty) this.intervalsString ++= Seq(qscript.intervalString)
else if (qscript.intervals != null) this.intervals :+= qscript.intervals
this.no_pg_tag = qscript.testMode
this.scatterCount = nContigs
@@ -395,7 +395,7 @@ class DataProcessingPipeline extends QScript {
this.jobName = queueLogDir + outBam + ".dedup"
}
- case class joinBams (inBams: List[File], outBam: File) extends MergeSamFiles with ExternalCommonArgs {
+ case class joinBams (inBams: Seq[File], outBam: File) extends MergeSamFiles with ExternalCommonArgs {
this.input = inBams
this.output = outBam
this.analysisName = queueLogDir + outBam + ".joinBams"
@@ -495,7 +495,7 @@ class DataProcessingPipeline extends QScript {
this.jobName = queueLogDir + outBam + ".bwasw"
}
- case class writeList(inBams: List[File], outBamList: File) extends ListWriterFunction {
+ case class writeList(inBams: Seq[File], outBamList: File) extends ListWriterFunction {
this.inputFiles = inBams
this.listFile = outBamList
this.analysisName = queueLogDir + outBamList + ".bamList"
diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala
index c06601a2d..b50bf3d67 100755
--- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/MethodsDevelopmentCallingPipeline.scala
@@ -46,20 +46,24 @@ class MethodsDevelopmentCallingPipeline extends QScript {
val bamList: File,
val goldStandard_VCF: File,
val intervals: String,
- val titvTarget: Double,
- val trancheTarget: Double,
+ val indelTranchTarget: Double,
+ val snpTrancheTarget: Double,
val isLowpass: Boolean,
val isExome: Boolean,
val nSamples: Int) {
val name = qscript.outputDir + baseName
val clusterFile = new File(name + ".clusters")
- val rawVCF = new File(name + ".raw.vcf")
+ val rawSnpVCF = new File(name + ".raw.vcf")
val rawIndelVCF = new File(name + ".raw.indel.vcf")
val filteredIndelVCF = new File(name + ".filtered.indel.vcf")
- val recalibratedVCF = new File(name + ".recalibrated.vcf")
- val tranchesFile = new File(name + ".tranches")
- val vqsrRscript = name + ".vqsr.r"
- val recalFile = new File(name + ".tranches.recal")
+ val recalibratedSnpVCF = new File(name + ".snp.recalibrated.vcf")
+ val recalibratedIndelVCF = new File(name + ".indel.recalibrated.vcf")
+ val tranchesSnpFile = new File(name + ".snp.tranches")
+ val tranchesIndelFile = new File(name + ".indel.tranches")
+ val vqsrSnpRscript = name + ".snp.vqsr.r"
+ val vqsrIndelRscript = name + ".indel.vqsr.r"
+ val recalSnpFile = new File(name + ".snp.tranches.recal")
+ val recalIndelFile = new File(name + ".indel.tranches.recal")
val goldStandardRecalibratedVCF = new File(name + "goldStandard.recalibrated.vcf")
val goldStandardTranchesFile = new File(name + "goldStandard.tranches")
val goldStandardRecalFile = new File(name + "goldStandard.tranches.recal")
@@ -88,6 +92,7 @@ class MethodsDevelopmentCallingPipeline extends QScript {
val training_1000G = "/humgen/1kg/processing/official_release/phase1/projectConsensus/phase1.wgs.projectConsensus.v2b.recal.highQuality.vcf"
val badSites_1000G = "/humgen/1kg/processing/official_release/phase1/projectConsensus/phase1.wgs.projectConsensus.v2b.recal.terrible.vcf"
val projectConsensus_1000G = "/humgen/1kg/processing/official_release/phase1/projectConsensus/ALL.wgs.projectConsensus_v2b.20101123.snps.sites.vcf"
+ val indelGoldStandardCallset = "/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/GoldStandardIndel/gold.standard.indel.MillsAnd1000G.b37.vcf"
val lowPass: Boolean = true
val exome: Boolean = true
@@ -101,69 +106,69 @@ class MethodsDevelopmentCallingPipeline extends QScript {
"NA12878_gold" -> new Target("NA12878.goldStandard", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-hpprojects/dev/carneiro/NA12878/data/goldStandard.list"),
new File("/humgen/gsa-hpprojects/dev/carneiro/NA12878/analysis/snps/NA12878.HiSeq19.filtered.vcf"), // ** There is no gold standard for the gold standard **
- "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 2.14, 99.0, lowPass, !exome, 391),
+ "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 90.0, 99.0, lowPass, !exome, 391),
"NA12878_wgs_b37" -> new Target("NA12878.HiSeq.WGS.b37", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.bam"),
new File("/humgen/gsa-hpprojects/dev/carneiro/NA12878/analysis/snps/NA12878.HiSeq19.filtered.vcf"),
- "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 2.14, 99.0, !lowPass, !exome, 1),
+ "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 90.0, 99.0, !lowPass, !exome, 1),
"NA12878_wgs_decoy" -> new Target("NA12878.HiSeq.WGS.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37_decoy.NA12878.clean.dedup.recal.bam"),
new File("/humgen/gsa-hpprojects/dev/carneiro/NA12878/analysis/snps/NA12878.HiSeq19.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
- "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 2.14, 99.0, !lowPass, !exome, 1),
+ "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 90.0, 99.0, !lowPass, !exome, 1),
"NA12878_wgs_hg18" -> new Target("NA12878.HiSeq.WGS.hg18", hg18, dbSNP_hg18_129, hapmap_hg18,
"/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/HiSeq.WGS.cleaned.indels.10.mask",
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam"),
new File("/home/radon01/depristo/work/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/HiSeq.WGS.cleaned.ug.snpfiltered.indelfiltered.vcf"),
- "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg18.intervals", 2.14, 99.0, !lowPass, !exome, 1),
+ "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg18.intervals", 90.0, 99.0, !lowPass, !exome, 1),
"NA12878_wex_b37" -> new Target("NA12878.HiSeq.WEx.b37", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/seq/picard_aggregation/C339/NA12878/v3/NA12878.bam"),
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
- "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 3.3, 98.0, !lowPass, exome, 1),
+ "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 90.0, 98.0, !lowPass, exome, 1),
"NA12878_wex_hg18" -> new Target("NA12878.HiSeq.WEx.hg18", hg18, dbSNP_hg18_129, hapmap_hg18,
"/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/GA2.WEx.cleaned.indels.10.mask",
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.WEx.cleaned.recal.bam"),
new File("/home/radon01/depristo/work/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.vcf"),
- "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.targets.interval_list", 3.3, 98.0, !lowPass, exome, 1),
+ "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.targets.interval_list", 90.0, 98.0, !lowPass, exome, 1),
"NA12878_wex_decoy" -> new Target("NA12878.HiSeq.WEx.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WEx.b37_decoy.NA12878.clean.dedup.recal.bam"),
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
- "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 3.3, 98.0, !lowPass, exome, 1),
+ "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 90.0, 98.0, !lowPass, exome, 1),
"CEUTrio_wex_b37" -> new Target("CEUTrio.HiSeq.WEx.b37", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WEx.bwa.cleaned.recal.bam"),
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"),
- "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 3.3, 98.0, !lowPass, exome, 3),
+ "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 90.0, 98.0, !lowPass, exome, 3),
"CEUTrio_wgs_b37" -> new Target("CEUTrio.HiSeq.WGS.b37", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.bwa.cleaned.recal.bam"),
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
- "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 2.3, 99.0, !lowPass, !exome, 3),
+ "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 90.0, 99.0, !lowPass, !exome, 3),
"CEUTrio_wex_decoy" -> new Target("CEUTrio.HiSeq.WEx.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WEx.b37_decoy.list"),
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
- "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 3.3, 98.0, !lowPass, exome, 3),
+ "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 90.0, 98.0, !lowPass, exome, 3),
"CEUTrio_wgs_decoy" -> new Target("CEUTrio.HiSeq.WGS.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37_decoy.list"),
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
- "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 2.3, 99.0, !lowPass, !exome, 3),
+ "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 90.0, 99.0, !lowPass, !exome, 3),
"GA2hg19" -> new Target("NA12878.GA2.hg19", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.GA2.WGS.bwa.cleaned.hg19.bam"),
new File("/humgen/gsa-hpprojects/dev/carneiro/NA12878/analysis/snps/NA12878.GA2.hg19.filtered.vcf"),
- "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 2.14, 99.0, !lowPass, !exome, 1),
+ "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 90.0, 99.0, !lowPass, !exome, 1),
"FIN" -> new Target("FIN", b37, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/1kg/processing/pipeline_test_bams/FIN.79sample.Nov2010.chr20.bam"),
new File("/humgen/gsa-hpprojects/dev/data/AugChr20Calls_v4_3state/ALL.august.v4.chr20.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
- "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.chr20.hg19.intervals", 2.3, 99.0, lowPass, !exome, 79),
+ "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.chr20.hg19.intervals", 90.0, 99.0, lowPass, !exome, 79),
"TGPWExGdA" -> new Target("1000G.WEx.GdA", b37, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/1kg/processing/pipeline_test_bams/Barcoded_1000G_WEx_Reduced_Plate_1.cleaned.list"), // BUGBUG: reduce from 60 to 20 people
new File("/humgen/gsa-scr1/delangel/NewUG/calls/AugustRelease.filtered_Q50_QD5.0_SB0.0.allSamples.SNPs_hg19.WEx_UG_newUG_MQC.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
- "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 2.6, 99.0, !lowPass, exome, 96),
+ "/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 90.0, 99.0, !lowPass, exome, 96),
"LowPassN60" -> new Target("lowpass.N60", b36, dbSNP_b36, hapmap_b36, indelMask_b36,
new File("/humgen/1kg/analysis/bamsForDataProcessingPapers/lowpass_b36/lowpass.chr20.cleaned.matefixed.bam"), // the bam list to call from
new File("/home/radon01/depristo/work/oneOffProjects/VQSRCutByNRS/lowpass.N60.chr20.filtered.vcf"), // the gold standard VCF file to run through the VQSR
- "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.chr20.b36.intervals", 2.3, 99.0, lowPass, !exome, 60), // chunked interval list to use with Queue's scatter/gather functionality
+ "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.chr20.b36.intervals", 90.0, 99.0, lowPass, !exome, 60), // chunked interval list to use with Queue's scatter/gather functionality
"LowPassEUR363Nov" -> new Target("EUR.nov2010", b37, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/1kg/processing/pipeline_test_bams/EUR.363sample.Nov2010.chr20.bam"),
new File("/humgen/gsa-hpprojects/dev/data/AugChr20Calls_v4_3state/ALL.august.v4.chr20.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
- "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.chr20.hg19.intervals", 2.3, 99.0, lowPass, !exome, 363)
+ "/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.chr20.hg19.intervals", 90.0, 99.0, lowPass, !exome, 363)
)
@@ -181,15 +186,15 @@ class MethodsDevelopmentCallingPipeline extends QScript {
val goldStandard = true
for (target <- targets) {
if( !skipCalling ) {
- if (!noIndels) add(new indelCall(target), new indelFilter(target), new indelEvaluation(target))
+ if (!noIndels) add(new indelCall(target), new indelRecal(target), new indelCut(target), new indelEvaluation(target))
add(new snpCall(target))
- add(new VQSR(target, !goldStandard))
- add(new applyVQSR(target, !goldStandard))
+ add(new snpRecal(target, !goldStandard))
+ add(new snpCut(target, !goldStandard))
add(new snpEvaluation(target))
}
if ( runGoldStandard ) {
- add(new VQSR(target, goldStandard))
- add(new applyVQSR(target, goldStandard))
+ add(new snpRecal(target, goldStandard))
+ add(new snpCut(target, goldStandard))
}
}
}
@@ -222,7 +227,7 @@ class MethodsDevelopmentCallingPipeline extends QScript {
this.min_base_quality_score = minimumBaseQuality
if (qscript.deletions >= 0)
this.max_deletion_fraction = qscript.deletions
- this.out = t.rawVCF
+ this.out = t.rawSnpVCF
this.glm = org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel.Model.SNP
this.baq = if (noBAQ || t.isExome) {org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.OFF} else {org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.CALCULATE_AS_NECESSARY}
this.analysisName = t.name + "_UGs"
@@ -257,79 +262,114 @@ class MethodsDevelopmentCallingPipeline extends QScript {
this.jobName = queueLogDir + t.name + ".indelfilter"
}
- // 3.) Variant Quality Score Recalibration - Generate Recalibration table
- class VQSR(t: Target, goldStandard: Boolean) extends VariantRecalibrator with UNIVERSAL_GATK_ARGS {
+ class VQSRBase(t: Target) extends VariantRecalibrator with UNIVERSAL_GATK_ARGS {
this.nt = 2
this.reference_sequence = t.reference
this.intervalsString ++= List(t.intervals)
- this.input :+= ( if ( goldStandard ) { t.goldStandard_VCF } else { t.rawVCF } )
- this.resource :+= new TaggedFile( t.hapmapFile, "training=true,truth=true,prior=15.0" )
- this.resource :+= new TaggedFile( omni_b37, "training=true,truth=true,prior=12.0" )
- this.resource :+= new TaggedFile( training_1000G, "training=true,prior=10.0" )
+ this.allPoly = true
+ this.tranche ++= List("100.0", "99.9", "99.5", "99.3", "99.0", "98.9", "98.8", "98.5", "98.4", "98.3", "98.2", "98.1", "98.0", "97.9", "97.8", "97.5", "97.0", "95.0", "90.0")
+ }
+
+ class snpRecal(t: Target, goldStandard: Boolean) extends VQSRBase(t) with UNIVERSAL_GATK_ARGS {
+ this.input :+= ( if ( goldStandard ) { t.goldStandard_VCF } else { t.rawSnpVCF } )
+ this.resource :+= new TaggedFile( t.hapmapFile, "known=false,training=true,truth=true,prior=15.0" )
+ this.resource :+= new TaggedFile( omni_b37, "known=false,training=true,truth=true,prior=12.0" )
+ this.resource :+= new TaggedFile( training_1000G, "known=false,training=true,prior=10.0" )
this.resource :+= new TaggedFile( t.dbsnpFile, "known=true,prior=2.0" )
this.resource :+= new TaggedFile( projectConsensus_1000G, "prior=8.0" )
this.use_annotation ++= List("QD", "HaplotypeScore", "MQRankSum", "ReadPosRankSum", "MQ", "FS")
- if(t.nSamples >= 10) { // InbreedingCoeff is a population-wide statistic that requires at least 10 samples to calculate
- this.use_annotation ++= List("InbreedingCoeff")
- }
- if(!t.isExome) {
+ if(t.nSamples >= 10)
+ this.use_annotation ++= List("InbreedingCoeff") // InbreedingCoeff is a population-wide statistic that requires at least 10 samples to calculate
+ if(!t.isExome)
this.use_annotation ++= List("DP")
- } else { // exome specific parameters
+ else { // exome specific parameters
this.resource :+= new TaggedFile( badSites_1000G, "bad=true,prior=2.0" )
this.mG = 6
- if(t.nSamples <= 3) { // very few exome samples means very few variants
+ if(t.nSamples <= 3) { // very few exome samples means very few variants
this.mG = 4
this.percentBad = 0.04
}
}
- this.tranches_file = if ( goldStandard ) { t.goldStandardTranchesFile } else { t.tranchesFile }
- this.recal_file = if ( goldStandard ) { t.goldStandardRecalFile } else { t.recalFile }
- this.allPoly = true
- this.tranche ++= List("100.0", "99.9", "99.5", "99.3", "99.0", "98.9", "98.8", "98.5", "98.4", "98.3", "98.2", "98.1", "98.0", "97.9", "97.8", "97.5", "97.0", "95.0", "90.0")
- this.rscript_file = t.vqsrRscript
- this.analysisName = t.name + "_VQSR"
- this.jobName = queueLogDir + t.name + ".VQSR"
+ this.tranches_file = if ( goldStandard ) { t.goldStandardTranchesFile } else { t.tranchesSnpFile }
+ this.recal_file = if ( goldStandard ) { t.goldStandardRecalFile } else { t.recalSnpFile }
+ this.rscript_file = t.vqsrSnpRscript
+ this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP
+ this.analysisName = t.name + "_VQSRs"
+ this.jobName = queueLogDir + t.name + ".snprecal"
}
+ class indelRecal(t: Target) extends VQSRBase(t) with UNIVERSAL_GATK_ARGS {
+ this.input :+= t.rawIndelVCF
+ this.resource :+= new TaggedFile( indelGoldStandardCallset, "known=true,training=true,truth=true,prior=12.0" )
+ this.use_annotation ++= List("QD", "HaplotypeScore", "ReadPosRankSum", "FS")
+ if(t.nSamples >= 10)
+ this.use_annotation ++= List("InbreedingCoeff") // InbreedingCoeff is a population-wide statistic that requires at least 10 samples to calculate
+ this.tranches_file = t.tranchesIndelFile
+ this.recal_file = t.recalIndelFile
+ this.rscript_file = t.vqsrIndelRscript
+ this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL
+ this.analysisName = t.name + "_VQSRi"
+ this.jobName = queueLogDir + t.name + ".indelrecal"
+ }
+
+
// 4.) Apply the recalibration table to the appropriate tranches
- class applyVQSR (t: Target, goldStandard: Boolean) extends ApplyRecalibration with UNIVERSAL_GATK_ARGS {
+ class applyVQSRBase (t: Target) extends ApplyRecalibration with UNIVERSAL_GATK_ARGS {
this.memoryLimit = 6
this.reference_sequence = t.reference
this.intervalsString ++= List(t.intervals)
- this.input :+= ( if ( goldStandard ) { t.goldStandard_VCF } else { t.rawVCF } )
- this.tranches_file = if ( goldStandard ) { t.goldStandardTranchesFile } else { t.tranchesFile}
- this.recal_file = if ( goldStandard ) { t.goldStandardRecalFile } else { t.recalFile }
- this.ts_filter_level = t.trancheTarget
- this.out = t.recalibratedVCF
- this.analysisName = t.name + "_AVQSR"
- this.jobName = queueLogDir + t.name + ".applyVQSR"
}
+ class snpCut (t: Target, goldStandard: Boolean) extends applyVQSRBase(t) {
+ this.input :+= ( if ( goldStandard ) { t.goldStandard_VCF } else { t.rawSnpVCF } )
+ this.tranches_file = if ( goldStandard ) { t.goldStandardTranchesFile } else { t.tranchesSnpFile}
+ this.recal_file = if ( goldStandard ) { t.goldStandardRecalFile } else { t.recalSnpFile }
+ this.ts_filter_level = t.snpTrancheTarget
+ this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP
+ this.out = t.recalibratedSnpVCF
+ this.analysisName = t.name + "_AVQSRs"
+ this.jobName = queueLogDir + t.name + ".snpcut"
+ }
+
+ class indelCut (t: Target) extends applyVQSRBase(t) {
+ this.input :+= t.rawIndelVCF
+ this.tranches_file = t.tranchesIndelFile
+ this.recal_file = t.recalIndelFile
+ this.ts_filter_level = t.indelTranchTarget
+ this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL
+ this.out = t.recalibratedIndelVCF
+ this.analysisName = t.name + "_AVQSRi"
+ this.jobName = queueLogDir + t.name + ".indelcut"
+ }
+
+
// 5.) Variant Evaluation Base(OPTIONAL)
class EvalBase(t: Target) extends VariantEval with UNIVERSAL_GATK_ARGS {
this.memoryLimit = 3
- this.reference_sequence = t.reference
this.comp :+= new TaggedFile(t.hapmapFile, "hapmap" )
- this.intervalsString ++= List(t.intervals)
this.D = new File(t.dbsnpFile)
+ this.reference_sequence = t.reference
+ this.intervalsString ++= List(t.intervals)
this.sample = samples
}
// 5a.) SNP Evaluation (OPTIONAL) based on the cut vcf
class snpEvaluation(t: Target) extends EvalBase(t) {
if (t.reference == b37 || t.reference == hg19) this.comp :+= new TaggedFile( omni_b37, "omni" )
- this.eval :+= t.recalibratedVCF
+ this.eval :+= t.recalibratedSnpVCF
this.out = t.evalFile
this.analysisName = t.name + "_VEs"
- this.jobName = queueLogDir + t.name + ".snp.eval"
+ this.jobName = queueLogDir + t.name + ".snpeval"
}
// 5b.) Indel Evaluation (OPTIONAL)
class indelEvaluation(t: Target) extends EvalBase(t) {
- this.eval :+= t.filteredIndelVCF
- this.evalModule :+= "IndelStatistics"
+ this.eval :+= t.recalibratedIndelVCF
+ this.comp :+= new TaggedFile(indelGoldStandardCallset, "indelGS" )
+ this.noEV = true
+ this.evalModule = List("CompOverlap", "CountVariants", "TiTvVariantEvaluator", "ValidationReport", "IndelStatistics")
this.out = t.evalIndelFile
this.analysisName = t.name + "_VEi"
- this.jobName = queueLogDir + queueLogDir + t.name + ".indel.eval"
+ this.jobName = queueLogDir + queueLogDir + t.name + ".indeleval"
}
}
diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala
index 4896eaed3..2f954713e 100755
--- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala
@@ -53,9 +53,9 @@ class PacbioProcessingPipeline extends QScript {
val queueLogDir: String = ".qlog/"
- def script = {
+ def script() {
- val fileList: List[File] = QScriptUtils.createListFromFile(input)
+ val fileList: Seq[File] = QScriptUtils.createSeqFromFile(input)
for (file: File <- fileList) {
diff --git a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala
index 32913deb4..7a22e700b 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011, The Broad Institute
+ * Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@@ -36,6 +36,7 @@ import org.broadinstitute.sting.utils.io.IOUtils
import org.broadinstitute.sting.utils.help.ApplicationDetails
import java.util.{ResourceBundle, Arrays}
import org.broadinstitute.sting.utils.text.TextFormattingUtils
+import org.apache.commons.io.FilenameUtils
/**
* Entry point of Queue. Compiles and runs QScripts passed in to the command line.
@@ -61,6 +62,7 @@ object QCommandLine extends Logging {
CommandLineProgram.start(qCommandLine, argv)
try {
Runtime.getRuntime.removeShutdownHook(shutdownHook)
+ qCommandLine.shutdown()
} catch {
case _ => /* ignore, example 'java.lang.IllegalStateException: Shutdown in progress' */
}
@@ -78,10 +80,10 @@ object QCommandLine extends Logging {
class QCommandLine extends CommandLineProgram with Logging {
@Input(fullName="script", shortName="S", doc="QScript scala file", required=true)
@ClassType(classOf[File])
- private var scripts = List.empty[File]
+ var scripts = Seq.empty[File]
@ArgumentCollection
- private val settings = new QGraphSettings
+ val settings = new QGraphSettings
private val qScriptManager = new QScriptManager
private val qGraph = new QGraph
@@ -91,7 +93,7 @@ class QCommandLine extends CommandLineProgram with Logging {
private lazy val pluginManager = {
qScriptClasses = IOUtils.tempDir("Q-Classes-", "", settings.qSettings.tempDirectory)
qScriptManager.loadScripts(scripts, qScriptClasses)
- new PluginManager[QScript](classOf[QScript], List(qScriptClasses.toURI.toURL))
+ new PluginManager[QScript](classOf[QScript], Seq(qScriptClasses.toURI.toURL))
}
QFunction.parsingEngine = new ParsingEngine(this)
@@ -101,12 +103,16 @@ class QCommandLine extends CommandLineProgram with Logging {
* functions, and then builds and runs a QGraph based on the dependencies.
*/
def execute = {
+ if (settings.qSettings.runName == null)
+ settings.qSettings.runName = FilenameUtils.removeExtension(scripts.head.getName)
+
qGraph.settings = settings
val allQScripts = pluginManager.createAllTypes();
for (script <- allQScripts) {
logger.info("Scripting " + pluginManager.getName(script.getClass.asSubclass(classOf[QScript])))
loadArgumentsIntoObject(script)
+ script.qSettings = settings.qSettings
try {
script.script()
} catch {
@@ -120,22 +126,34 @@ class QCommandLine extends CommandLineProgram with Logging {
// Execute the job graph
qGraph.run()
+ val functionsAndStatus = qGraph.getFunctionsAndStatus
+ val success = qGraph.success
+
// walk over each script, calling onExecutionDone
for (script <- allQScripts) {
- script.onExecutionDone(qGraph.getFunctionsAndStatus(script.functions), qGraph.success)
- if ( ! settings.disableJobReport ) {
- val jobStringName = (QScriptUtils.?(settings.jobReportFile)).getOrElse(settings.qSettings.jobNamePrefix + ".jobreport.txt")
+ val scriptFunctions = functionsAndStatus.filterKeys(f => script.functions.contains(f))
+ script.onExecutionDone(scriptFunctions, success)
+ }
- if (!shuttingDown) {
- val reportFile = new File(jobStringName)
- logger.info("Writing JobLogging GATKReport to file " + reportFile)
- QJobReport.printReport(qGraph.getFunctionsAndStatus(script.functions), reportFile)
+ logger.info("Script %s with %d total jobs".format(if (success) "completed successfully" else "failed", functionsAndStatus.size))
- if ( settings.run ) {
- val pdfFile = new File(jobStringName + ".pdf")
- logger.info("Plotting JobLogging GATKReport to file " + pdfFile)
- QJobReport.plotReport(reportFile, pdfFile)
- }
+ if (!settings.disableJobReport) {
+ val jobStringName = {
+ if (settings.jobReportFile != null)
+ settings.jobReportFile
+ else
+ settings.qSettings.runName + ".jobreport.txt"
+ }
+
+ if (!shuttingDown) {
+ val reportFile = IOUtils.absolute(settings.qSettings.runDirectory, jobStringName)
+ logger.info("Writing JobLogging GATKReport to file " + reportFile)
+ QJobReport.printReport(functionsAndStatus, reportFile)
+
+ if (settings.run) {
+ val pdfFile = IOUtils.absolute(settings.qSettings.runDirectory, FilenameUtils.removeExtension(jobStringName) + ".pdf")
+ logger.info("Plotting JobLogging GATKReport to file " + pdfFile)
+ QJobReport.plotReport(reportFile, pdfFile)
}
}
}
@@ -179,20 +197,20 @@ class QCommandLine extends CommandLineProgram with Logging {
override def getApplicationDetails : ApplicationDetails = {
new ApplicationDetails(createQueueHeader(),
- List.empty[String],
+ Seq.empty[String],
ApplicationDetails.createDefaultRunningInstructions(getClass.asInstanceOf[Class[CommandLineProgram]]),
"")
}
- private def createQueueHeader() : List[String] = {
- List(String.format("Queue v%s, Compiled %s", getQueueVersion, getBuildTimestamp),
- "Copyright (c) 2011 The Broad Institute",
+ private def createQueueHeader() : Seq[String] = {
+ Seq(String.format("Queue v%s, Compiled %s", getQueueVersion, getBuildTimestamp),
+ "Copyright (c) 2012 The Broad Institute",
"Please view our documentation at http://www.broadinstitute.org/gsa/wiki",
"For support, please view our support site at http://getsatisfaction.com/gsa")
}
private def getQueueVersion : String = {
- var stingResources : ResourceBundle = TextFormattingUtils.loadResourceBundle("StingText")
+ val stingResources : ResourceBundle = TextFormattingUtils.loadResourceBundle("StingText")
if ( stingResources.containsKey("org.broadinstitute.sting.queue.QueueVersion.version") ) {
stingResources.getString("org.broadinstitute.sting.queue.QueueVersion.version")
@@ -203,7 +221,7 @@ class QCommandLine extends CommandLineProgram with Logging {
}
private def getBuildTimestamp : String = {
- var stingResources : ResourceBundle = TextFormattingUtils.loadResourceBundle("StingText")
+ val stingResources : ResourceBundle = TextFormattingUtils.loadResourceBundle("StingText")
if ( stingResources.containsKey("build.timestamp") ) {
stingResources.getString("build.timestamp")
diff --git a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala
index fce65c997..6f887ea00 100755
--- a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala
@@ -27,7 +27,6 @@ package org.broadinstitute.sting.queue
import engine.JobRunInfo
import org.broadinstitute.sting.queue.function.QFunction
import annotation.target.field
-import io.Source
import util.{StringFileConversions, PrimitiveOptionConversions, Logging}
/**
@@ -53,6 +52,11 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon
type ArgumentCollection = org.broadinstitute.sting.commandline.ArgumentCollection @field
type Gather = org.broadinstitute.sting.commandline.Gather @field
+ /**
+ * Default settings for QFunctions
+ */
+ var qSettings: QSettings = _
+
/**
* Builds the CommandLineFunctions that will be used to run this script and adds them to this.functions directly or using the add() utility method.
*/
@@ -60,18 +64,14 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon
/**
* A default handler for the onExecutionDone() function. By default this doesn't do anything
- * except print out a fine status message.
*/
def onExecutionDone(jobs: Map[QFunction, JobRunInfo], success: Boolean) {
- logger.info("Script %s with %d total jobs".format(if (success) "completed successfully" else "failed", jobs.size))
- // this is too much output
- // for ( (f, info) <- jobs ) logger.info(" %s %s".format(f.jobName, info))
}
/**
* The command line functions that will be executed for this QScript.
*/
- var functions = List.empty[QFunction]
+ var functions = Seq.empty[QFunction]
/**
* Exchanges the extension on a file.
@@ -98,22 +98,20 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon
* Adds one or more command line functions to be run.
* @param functions Functions to add.
*/
- def add(functions: QFunction*) = {
+ def add(functions: QFunction*) {
functions.foreach(function => function.addOrder = QScript.nextAddOrder)
this.functions ++= functions
}
- def addAll(functions: List[QFunction]) {
+ def addAll(functions: Seq[QFunction]) {
functions.foreach( f => add(f) )
}
-
- def extractFileEntries(in: File): List[File] = Source.fromFile(in).getLines().toList
}
object QScript {
private var addOrder = 0
private def nextAddOrder = {
addOrder += 1
- List(addOrder)
+ Seq(addOrder)
}
}
diff --git a/public/scala/src/org/broadinstitute/sting/queue/QScriptManager.scala b/public/scala/src/org/broadinstitute/sting/queue/QScriptManager.scala
index 512a9f8dd..74487917f 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/QScriptManager.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/QScriptManager.scala
@@ -20,7 +20,7 @@ class QScriptManager() extends Logging {
* Compiles and loads the scripts in the files into the current classloader.
* Heavily based on scala/src/compiler/scala/tools/ant/Scalac.scala
*/
- def loadScripts(scripts: List[File], tempDir: File) {
+ def loadScripts(scripts: Seq[File], tempDir: File) {
if (scripts.size > 0) {
val settings = new Settings((error: String) => logger.error(error))
settings.deprecation.value = true
@@ -36,7 +36,7 @@ class QScriptManager() extends Logging {
logger.info("Compiling %s QScript%s".format(scripts.size, plural(scripts.size)))
logger.debug("Compilation directory: " + settings.outdir.value)
- run.compileFiles(scripts.map(new PlainFile(_)))
+ run.compileFiles(scripts.toList.map(new PlainFile(_)))
reporter.printSummary()
if (reporter.hasErrors) {
diff --git a/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala b/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala
index e8ac26a57..d9fed4ce8 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011, The Broad Institute
+ * Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@@ -25,15 +25,14 @@
package org.broadinstitute.sting.queue
import java.io.File
-import org.broadinstitute.sting.commandline.{ArgumentCollection, Argument}
-import org.broadinstitute.sting.queue.util.{SystemUtils, EmailSettings}
+import org.broadinstitute.sting.commandline.Argument
/**
* Default settings settable on the command line and passed to CommandLineFunctions.
*/
class QSettings {
- @Argument(fullName="job_name_prefix", shortName="jobPrefix", doc="Default name prefix for compute farm jobs.", required=false)
- var jobNamePrefix: String = QSettings.processNamePrefix
+ @Argument(fullName="run_name", shortName="runName", doc="A name for this run used for various status messages.", required=false)
+ var runName: String = _
@Argument(fullName="job_project", shortName="jobProject", doc="Default project for compute farm jobs.", required=false)
var jobProject: String = _
@@ -45,13 +44,13 @@ class QSettings {
var jobPriority: Option[Int] = None
@Argument(fullName="job_native_arg", shortName="jobNative", doc="Native arguments to pass to the job runner.", required=false)
- var jobNativeArgs: List[String] = Nil
+ var jobNativeArgs: Seq[String] = Nil
@Argument(fullName="job_resource_request", shortName="jobResReq", doc="Resource requests to pass to the job runner.", required=false)
- var jobResourceRequests: List[String] = Nil
+ var jobResourceRequests: Seq[String] = Nil
@Argument(fullName="job_environment_name", shortName="jobEnv", doc="Environment names for the job runner.", required=false)
- var jobEnvironmentNames: List[String] = Nil
+ var jobEnvironmentNames: Seq[String] = Nil
@Argument(fullName="memory_limit", shortName="memLimit", doc="Default memory limit for jobs, in gigabytes.", required=false)
var memoryLimit: Option[Double] = None
@@ -77,15 +76,4 @@ class QSettings {
@Argument(fullName="job_scatter_gather_directory", shortName="jobSGDir", doc="Default directory to place scatter gather output for compute farm jobs.", required=false)
var jobScatterGatherDirectory: File = _
-
- @ArgumentCollection
- val emailSettings = new EmailSettings
-}
-
-/**
- * Default settings settable on the command line and passed to CommandLineFunctions.
- */
-object QSettings {
- /** A semi-unique job prefix using the host name and the process id. */
- private val processNamePrefix = "Q-" + SystemUtils.pidAtHost
}
diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala
index 55ed94267..8225d28ab 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/engine/FunctionEdge.scala
@@ -1,3 +1,27 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
package org.broadinstitute.sting.queue.engine
import org.broadinstitute.sting.queue.function.QFunction
@@ -28,15 +52,18 @@ class FunctionEdge(val function: QFunction, val inputs: QNode, val outputs: QNod
val myRunInfo: JobRunInfo = JobRunInfo.default // purely for dryRun testing
+ /**
+ * When using reset status this variable tracks the old status
+ */
+ var resetFromStatus: RunnerStatus.Value = null
+
/**
* Initializes with the current status of the function.
*/
private var currentStatus = {
- val isDone = function.isDone
- val isFail = function.isFail
- if (isFail.isDefined && isFail.get)
+ if (function.isFail)
RunnerStatus.FAILED
- else if (isDone.isDefined && isDone.get)
+ else if (function.isDone)
RunnerStatus.DONE
else
RunnerStatus.PENDING
@@ -136,13 +163,15 @@ class FunctionEdge(val function: QFunction, val inputs: QNode, val outputs: QNod
* Resets the edge to pending status.
*/
def resetToPending(cleanOutputs: Boolean) {
+ if (resetFromStatus == null)
+ resetFromStatus = currentStatus
currentStatus = RunnerStatus.PENDING
if (cleanOutputs)
function.deleteOutputs()
runner = null
}
- override def dotString = function.dotString
+ override def shortDescription = function.shortDescription
/**
* Returns the path to the file to use for logging errors.
diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/InProcessRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/InProcessRunner.scala
index d006cde4b..be5622360 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/engine/InProcessRunner.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/engine/InProcessRunner.scala
@@ -3,7 +3,8 @@ package org.broadinstitute.sting.queue.engine
import org.broadinstitute.sting.queue.function.InProcessFunction
import java.util.Date
import org.broadinstitute.sting.utils.Utils
-import org.apache.commons.io.FileUtils
+import org.apache.commons.io.{IOUtils, FileUtils}
+import java.io.PrintStream
/**
* Runs a function that executes in process and does not fork out an external process.
@@ -16,12 +17,24 @@ class InProcessRunner(val function: InProcessFunction) extends JobRunner[InProce
getRunInfo.exechosts = Utils.resolveHostname()
runStatus = RunnerStatus.RUNNING
- function.run()
+ function.jobOutputStream = new PrintStream(FileUtils.openOutputStream(function.jobOutputFile))
+ function.jobErrorStream = {
+ if (function.jobErrorFile != null)
+ new PrintStream(FileUtils.openOutputStream(function.jobErrorFile))
+ else
+ function.jobOutputStream
+ }
+ try {
+ function.run()
+ function.jobOutputStream.println("%s%nDone.".format(function.description))
+ } finally {
+ IOUtils.closeQuietly(function.jobOutputStream)
+ if (function.jobErrorFile != null)
+ IOUtils.closeQuietly(function.jobErrorStream)
+ }
- getRunInfo.doneTime = new Date()
- val content = "%s%nDone.".format(function.description)
- FileUtils.writeStringToFile(function.jobOutputFile, content)
runStatus = RunnerStatus.DONE
+ getRunInfo.doneTime = new Date()
}
def status = runStatus
diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/MappingEdge.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/MappingEdge.scala
index 1d56009f3..17f0561fa 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/engine/MappingEdge.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/engine/MappingEdge.scala
@@ -1,3 +1,27 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
package org.broadinstitute.sting.queue.engine
/**
@@ -10,5 +34,5 @@ class MappingEdge(val inputs: QNode, val outputs: QNode) extends QEdge {
* @return