diff --git a/ivy.xml b/ivy.xml
index f90b9a010..96c1de844 100644
--- a/ivy.xml
+++ b/ivy.xml
@@ -46,7 +46,7 @@
-
+
@@ -75,6 +75,9 @@
+
+
+
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java
index 92e7c6554..6a057a456 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java
@@ -354,7 +354,7 @@ public class VariantEvalUtils {
private void addMapping(HashMap> mappings, String sample, VariantContext vc) {
if ( !mappings.containsKey(sample) )
- mappings.put(sample, new HashSet());
+ mappings.put(sample, new LinkedHashSet());
mappings.get(sample).add(vc);
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java
index bf9a3b03e..9b625fb04 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java
@@ -92,7 +92,7 @@ import java.util.*;
* -o output.vcf \
* -sn SAMPLE_1_PARC \
* -sn SAMPLE_1_ACTG \
- * -sn 'SAMPLE.+PARC'
+ * -se 'SAMPLE.+PARC'
*
* Select any sample that matches a regular expression and sites where the QD annotation is more than 10:
* java -Xmx2g -jar GenomeAnalysisTK.jar \
@@ -100,7 +100,7 @@ import java.util.*;
* -T SelectVariants \
* --variant input.vcf \
* -o output.vcf \
- * -sn 'SAMPLE.+PARC'
+ * -se 'SAMPLE.+PARC'
* -select "QD > 10.0"
*
* Select a sample and exclude non-variant loci and filtered loci:
diff --git a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java
index 04cbef0c3..f24bbb636 100644
--- a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java
+++ b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java
@@ -35,8 +35,10 @@ import org.reflections.scanners.SubTypesScanner;
import org.reflections.util.ConfigurationBuilder;
import org.slf4j.LoggerFactory;
+import java.io.File;
import java.lang.reflect.Constructor;
import java.lang.reflect.Method;
+import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.*;
@@ -57,8 +59,25 @@ public class PluginManager {
Logger logger = (ch.qos.logback.classic.Logger) LoggerFactory.getLogger(Reflections.class);
logger.setLevel(Level.OFF);
+ Set classPathUrls = new LinkedHashSet();
+
+ URL cwd;
+ try {
+ cwd = new File(".").getAbsoluteFile().toURI().toURL();
+ } catch (MalformedURLException e) {
+ throw new RuntimeException(e);
+ }
+
+ // NOTE: Reflections also scans directories for classes.
+ // Meanwhile some of the jar MANIFEST.MF Bundle-ClassPath properties contain "."
+ // Do NOT let reflections scan the CWD where it often picks up test classes when
+ // they weren't explicitly in the classpath, for example the UninstantiableWalker
+ for (URL url: JVMUtils.getClasspathURLs())
+ if (!url.equals(cwd))
+ classPathUrls.add(url);
+
defaultReflections = new Reflections( new ConfigurationBuilder()
- .setUrls(JVMUtils.getClasspathURLs())
+ .setUrls(classPathUrls)
.setScanners(new SubTypesScanner()));
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java
index 81d00d9d7..be7dc52f7 100644
--- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java
+++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java
@@ -249,7 +249,7 @@ public class ClippingOp {
@Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1", "!read.getReadUnmappedFlag()"})
private SAMRecord hardClip (SAMRecord read, int start, int stop) {
- if (start == 0 && stop == read.getReadLength() -1)
+ if (start == 0 && stop == read.getReadLength() - 1)
return new SAMRecord(read.getHeader());
// If the read is unmapped there is no Cigar string and neither should we create a new cigar string
diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java
index 3b83617cf..83db20238 100644
--- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java
+++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java
@@ -4,6 +4,7 @@ import com.google.java.contract.Requires;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.ReadUtils;
@@ -68,25 +69,15 @@ public class ReadClipper {
}
private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
- int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart);
- int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop);
+ int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
+ int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
if (start < 0 || stop > read.getReadLength() - 1)
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
- if ( start > stop ) {
-// stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read));
+ if ( start > stop )
throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!");
- }
- //This tries to fix the bug where the deletion is counted a read base and as a result, the hardCLipper runs into
- //an endless loop when hard clipping the cigar string because the read coordinates are not covered by the read
-// stop -= numDeletions(read);
-// if ( start > stop )
-// start -= numDeletions(read);
-
-
- //System.out.println("Clipping start/stop: " + start + "/" + stop);
this.addOp(new ClippingOp(start, stop));
SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
this.ops = null;
diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentStartWithNoTiesComparator.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentStartWithNoTiesComparator.java
new file mode 100644
index 000000000..02512c8dc
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentStartWithNoTiesComparator.java
@@ -0,0 +1,46 @@
+package org.broadinstitute.sting.utils.sam;
+
+import com.google.java.contract.Ensures;
+import com.google.java.contract.Requires;
+import net.sf.samtools.SAMRecord;
+
+import java.util.Comparator;
+
+public class AlignmentStartWithNoTiesComparator implements Comparator {
+ @Requires("c1 >= 0 && c2 >= 0")
+ @Ensures("result == 0 || result == 1 || result == -1")
+ private int compareContigs(int c1, int c2) {
+ if (c1 == c2)
+ return 0;
+ else if (c1 > c2)
+ return 1;
+ return -1;
+ }
+
+ @Requires("r1 != null && r2 != null")
+ @Ensures("result == 0 || result == 1 || result == -1")
+ public int compare(SAMRecord r1, SAMRecord r2) {
+ int result;
+
+ if (r1 == r2)
+ result = 0;
+
+ else if (r1.getReadUnmappedFlag())
+ result = 1;
+ else if (r2.getReadUnmappedFlag())
+ result = -1;
+ else {
+ final int cmpContig = compareContigs(r1.getReferenceIndex(), r2.getReferenceIndex());
+
+ if (cmpContig != 0)
+ result = cmpContig;
+
+ else {
+ if (r1.getAlignmentStart() < r2.getAlignmentStart()) result = -1;
+ else result = 1;
+ }
+ }
+
+ return result;
+ }
+}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java
index 8beb1b21a..14ebbaa6f 100755
--- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java
@@ -780,11 +780,56 @@ public class ReadUtils {
}
+ public enum ClippingTail {
+ LEFT_TAIL,
+ RIGHT_TAIL
+ }
+
+ /**
+ * Pre-processes the results of getReadCoordinateForReferenceCoordinate(SAMRecord, int) in case it falls in
+ * a deletion following the typical clipping needs. If clipping the left tail (beginning of the read) returns
+ * the base prior to the deletion. If clipping the right tail (end of the read) returns the base after the
+ * deletion.
+ *
+ * @param read
+ * @param refCoord
+ * @param tail
+ * @return the read coordinate corresponding to the requested reference coordinate for clipping.
+ */
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"})
@Ensures({"result >= 0", "result < read.getReadLength()"})
- public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) {
+ public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord, ClippingTail tail) {
+ Pair result = getReadCoordinateForReferenceCoordinate(read, refCoord);
+ int readCoord = result.getFirst();
+
+ if (result.getSecond() && tail == ClippingTail.RIGHT_TAIL)
+ readCoord++;
+
+ return readCoord;
+ }
+
+ /**
+ * Returns the read coordinate corresponding to the requested reference coordinate.
+ *
+ * WARNING: if the requested reference coordinate happens to fall inside a deletion in the read, this function
+ * will return the last read base before the deletion. This function returns a
+ * Pair(int readCoord, boolean fallsInsideDeletion) so you can choose which readCoordinate to use when faced with
+ * a deletion.
+ *
+ * SUGGESTION: Use getReadCoordinateForReferenceCoordinate(SAMRecord, int, ClippingTail) instead to get a
+ * pre-processed result according to normal clipping needs. Or you can use this function and tailor the
+ * behavior to your needs.
+ *
+ * @param read
+ * @param refCoord
+ * @return the read coordinate corresponding to the requested reference coordinate. (see warning!)
+ */
+ @Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"})
+ @Ensures({"result.getFirst() >= 0", "result.getFirst() < read.getReadLength()"})
+ public static Pair getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) {
int readBases = 0;
int refBases = 0;
+ boolean fallsInsideDeletion = false;
if (refCoord < read.getAlignmentStart()) {
readBases = getReadCoordinateForReferenceCoordinateBeforeAlignmentStart(read, refCoord);
@@ -806,26 +851,56 @@ public class ReadUtils {
int shift = 0;
if (cigarElement.getOperator().consumesReferenceBases()) {
- if (refBases + cigarElement.getLength() < goal) {
+ if (refBases + cigarElement.getLength() < goal)
shift = cigarElement.getLength();
- }
- else {
+ else
shift = goal - refBases;
- }
+
refBases += shift;
}
goalReached = refBases == goal;
- if (cigarElement.getOperator().consumesReadBases()) {
- readBases += goalReached ? shift : cigarElement.getLength();
- }
+ if (!goalReached && cigarElement.getOperator().consumesReadBases())
+ readBases += cigarElement.getLength();
+
+ if (goalReached) {
+ // Is this base's reference position within this cigar element? Or did we use it all?
+ boolean endsWithinCigar = shift < cigarElement.getLength();
+
+ // If it isn't, we need to check the next one. There should *ALWAYS* be a next one
+ // since we checked if the goal coordinate is within the read length, so this is just a sanity check.
+ if (!endsWithinCigar && !cigarElementIterator.hasNext())
+ throw new ReviewedStingException("Reference coordinate corresponds to a non-existent base in the read. This should never happen -- call Mauricio");
+
+ CigarElement nextCigarElement;
+
+ // if we end inside the current cigar element, we just have to check if it is a deletion
+ if (endsWithinCigar)
+ fallsInsideDeletion = cigarElement.getOperator() == CigarOperator.DELETION;
+
+ // if we end outside the current cigar element, we need to check if the next element is a deletion.
+ else {
+ nextCigarElement = cigarElementIterator.next();
+ fallsInsideDeletion = nextCigarElement.getOperator() == CigarOperator.DELETION;
+ }
+
+ // If we reached our goal outside a deletion, add the shift
+ if (!fallsInsideDeletion && cigarElement.getOperator().consumesReadBases())
+ readBases += shift;
+
+ // If we reached our goal inside a deletion, but the deletion is the next cigar element then we need
+ // to add the shift of the current cigar element but go back to it's last element to return the last
+ // base before the deletion (see warning in function contracts)
+ else if (fallsInsideDeletion && !endsWithinCigar)
+ readBases += shift - 1;
+ }
}
if (!goalReached)
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?");
}
- return readBases;
+ return new Pair(readBases, fallsInsideDeletion);
}
public static SAMRecord unclipSoftClippedBases(SAMRecord rec) {
@@ -871,4 +946,19 @@ public class ReadUtils {
return rec;
}
+
+ /**
+ * Compares two SAMRecords only the basis on alignment start. Note that
+ * comparisons are performed ONLY on the basis of alignment start; any
+ * two SAM records with the same alignment start will be considered equal.
+ *
+ * Unmapped alignments will all be considered equal.
+ */
+
+ @Requires({"read1 != null", "read2 != null"})
+ @Ensures("result == 0 || result == 1 || result == -1")
+ public static int compareSAMRecords(SAMRecord read1, SAMRecord read2) {
+ AlignmentStartComparator comp = new AlignmentStartComparator();
+ return comp.compare(read1, read2);
+ }
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java
index 9d4b23a8b..c146bf4d4 100644
--- a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java
@@ -80,7 +80,7 @@ public class ListFileUtils {
unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags));
}
else {
- throw new UserException.CommandLineException(String.format("The GATK reads argument (-I) supports only BAM files with the .bam extension and lists of BAM files " +
+ throw new UserException.CommandLineException(String.format("The GATK reads argument (-I, --input_file) supports only BAM files with the .bam extension and lists of BAM files " +
"with the .list extension, but the file %s has neither extension. Please ensure that your BAM file or list " +
"of BAM files is in the correct format, update the extension, and try again.",inputFileName));
}
diff --git a/public/packages/Queue.xml b/public/packages/Queue.xml
index 589cb45f5..c256e5687 100644
--- a/public/packages/Queue.xml
+++ b/public/packages/Queue.xml
@@ -59,7 +59,12 @@
-
+
+
+
+
+
+
diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/snpeff/SnpEff.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/snpeff/SnpEff.scala
new file mode 100644
index 000000000..62f66ec06
--- /dev/null
+++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/snpeff/SnpEff.scala
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.queue.extensions.snpeff
+
+import org.broadinstitute.sting.queue.function.JavaCommandLineFunction
+import java.io.File
+import org.broadinstitute.sting.commandline.{Argument, Output, Input}
+
+/**
+ * Basic snpEff support.
+ * See: http://www.broadinstitute.org/gsa/wiki/index.php/Adding_Genomic_Annotations_Using_SnpEff_and_VariantAnnotator
+ */
+class SnpEff extends JavaCommandLineFunction {
+ javaMainClass = "ca.mcgill.mcb.pcingola.snpEffect.commandLine.SnpEff"
+
+ @Input(doc="snp vcf4 file")
+ var inVcf: File = _
+
+ @Input(doc="config file with path to data dir", required=false)
+ var config: File = _
+
+ @Argument(doc="genome version")
+ var genomeVersion: String = _
+
+ @Argument(doc="verbose", required=false)
+ var verbose = true
+
+ @Output(doc="snp eff output")
+ var outVcf: File = _
+
+ override def commandLine = Array(
+ super.commandLine,
+ " eff",
+ if (verbose) " -v" else "",
+ optional(" -c ", config),
+ " -i vcf -o vcf %s %s > %s".format(genomeVersion, inVcf, outVcf)
+ ).mkString
+}
diff --git a/settings/repository/net.sf.snpeff/snpeff-2.0.2.jar b/settings/repository/net.sf.snpeff/snpeff-2.0.2.jar
new file mode 100755
index 000000000..bfd06f97f
Binary files /dev/null and b/settings/repository/net.sf.snpeff/snpeff-2.0.2.jar differ
diff --git a/settings/repository/net.sf.snpeff/snpeff-2.0.2.xml b/settings/repository/net.sf.snpeff/snpeff-2.0.2.xml
new file mode 100644
index 000000000..f0568def4
--- /dev/null
+++ b/settings/repository/net.sf.snpeff/snpeff-2.0.2.xml
@@ -0,0 +1,3 @@
+
+
+