diff --git a/ivy.xml b/ivy.xml index f90b9a010..96c1de844 100644 --- a/ivy.xml +++ b/ivy.xml @@ -46,7 +46,7 @@ - + @@ -75,6 +75,9 @@ + + + diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index 92e7c6554..6a057a456 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -354,7 +354,7 @@ public class VariantEvalUtils { private void addMapping(HashMap> mappings, String sample, VariantContext vc) { if ( !mappings.containsKey(sample) ) - mappings.put(sample, new HashSet()); + mappings.put(sample, new LinkedHashSet()); mappings.get(sample).add(vc); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index bf9a3b03e..9b625fb04 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -92,7 +92,7 @@ import java.util.*; * -o output.vcf \ * -sn SAMPLE_1_PARC \ * -sn SAMPLE_1_ACTG \ - * -sn 'SAMPLE.+PARC' + * -se 'SAMPLE.+PARC' * * Select any sample that matches a regular expression and sites where the QD annotation is more than 10: * java -Xmx2g -jar GenomeAnalysisTK.jar \ @@ -100,7 +100,7 @@ import java.util.*; * -T SelectVariants \ * --variant input.vcf \ * -o output.vcf \ - * -sn 'SAMPLE.+PARC' + * -se 'SAMPLE.+PARC' * -select "QD > 10.0" * * Select a sample and exclude non-variant loci and filtered loci: diff --git a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java index 04cbef0c3..f24bbb636 100644 --- a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java +++ b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java @@ -35,8 +35,10 @@ import org.reflections.scanners.SubTypesScanner; import org.reflections.util.ConfigurationBuilder; import org.slf4j.LoggerFactory; +import java.io.File; import java.lang.reflect.Constructor; import java.lang.reflect.Method; +import java.net.MalformedURLException; import java.net.URL; import java.net.URLClassLoader; import java.util.*; @@ -57,8 +59,25 @@ public class PluginManager { Logger logger = (ch.qos.logback.classic.Logger) LoggerFactory.getLogger(Reflections.class); logger.setLevel(Level.OFF); + Set classPathUrls = new LinkedHashSet(); + + URL cwd; + try { + cwd = new File(".").getAbsoluteFile().toURI().toURL(); + } catch (MalformedURLException e) { + throw new RuntimeException(e); + } + + // NOTE: Reflections also scans directories for classes. + // Meanwhile some of the jar MANIFEST.MF Bundle-ClassPath properties contain "." + // Do NOT let reflections scan the CWD where it often picks up test classes when + // they weren't explicitly in the classpath, for example the UninstantiableWalker + for (URL url: JVMUtils.getClasspathURLs()) + if (!url.equals(cwd)) + classPathUrls.add(url); + defaultReflections = new Reflections( new ConfigurationBuilder() - .setUrls(JVMUtils.getClasspathURLs()) + .setUrls(classPathUrls) .setScanners(new SubTypesScanner())); } diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java index 81d00d9d7..be7dc52f7 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java @@ -249,7 +249,7 @@ public class ClippingOp { @Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1", "!read.getReadUnmappedFlag()"}) private SAMRecord hardClip (SAMRecord read, int start, int stop) { - if (start == 0 && stop == read.getReadLength() -1) + if (start == 0 && stop == read.getReadLength() - 1) return new SAMRecord(read.getHeader()); // If the read is unmapped there is no Cigar string and neither should we create a new cigar string diff --git a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java index 3b83617cf..83db20238 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java @@ -4,6 +4,7 @@ import com.google.java.contract.Requires; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -68,25 +69,15 @@ public class ReadClipper { } private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) { - int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart); - int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop); + int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL); + int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL); if (start < 0 || stop > read.getReadLength() - 1) throw new ReviewedStingException("Trying to clip before the start or after the end of a read"); - if ( start > stop ) { -// stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read)); + if ( start > stop ) throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!"); - } - //This tries to fix the bug where the deletion is counted a read base and as a result, the hardCLipper runs into - //an endless loop when hard clipping the cigar string because the read coordinates are not covered by the read -// stop -= numDeletions(read); -// if ( start > stop ) -// start -= numDeletions(read); - - - //System.out.println("Clipping start/stop: " + start + "/" + stop); this.addOp(new ClippingOp(start, stop)); SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES); this.ops = null; diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentStartWithNoTiesComparator.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentStartWithNoTiesComparator.java new file mode 100644 index 000000000..02512c8dc --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentStartWithNoTiesComparator.java @@ -0,0 +1,46 @@ +package org.broadinstitute.sting.utils.sam; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; +import net.sf.samtools.SAMRecord; + +import java.util.Comparator; + +public class AlignmentStartWithNoTiesComparator implements Comparator { + @Requires("c1 >= 0 && c2 >= 0") + @Ensures("result == 0 || result == 1 || result == -1") + private int compareContigs(int c1, int c2) { + if (c1 == c2) + return 0; + else if (c1 > c2) + return 1; + return -1; + } + + @Requires("r1 != null && r2 != null") + @Ensures("result == 0 || result == 1 || result == -1") + public int compare(SAMRecord r1, SAMRecord r2) { + int result; + + if (r1 == r2) + result = 0; + + else if (r1.getReadUnmappedFlag()) + result = 1; + else if (r2.getReadUnmappedFlag()) + result = -1; + else { + final int cmpContig = compareContigs(r1.getReferenceIndex(), r2.getReferenceIndex()); + + if (cmpContig != 0) + result = cmpContig; + + else { + if (r1.getAlignmentStart() < r2.getAlignmentStart()) result = -1; + else result = 1; + } + } + + return result; + } +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index 8beb1b21a..14ebbaa6f 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -780,11 +780,56 @@ public class ReadUtils { } + public enum ClippingTail { + LEFT_TAIL, + RIGHT_TAIL + } + + /** + * Pre-processes the results of getReadCoordinateForReferenceCoordinate(SAMRecord, int) in case it falls in + * a deletion following the typical clipping needs. If clipping the left tail (beginning of the read) returns + * the base prior to the deletion. If clipping the right tail (end of the read) returns the base after the + * deletion. + * + * @param read + * @param refCoord + * @param tail + * @return the read coordinate corresponding to the requested reference coordinate for clipping. + */ @Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"}) @Ensures({"result >= 0", "result < read.getReadLength()"}) - public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) { + public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord, ClippingTail tail) { + Pair result = getReadCoordinateForReferenceCoordinate(read, refCoord); + int readCoord = result.getFirst(); + + if (result.getSecond() && tail == ClippingTail.RIGHT_TAIL) + readCoord++; + + return readCoord; + } + + /** + * Returns the read coordinate corresponding to the requested reference coordinate. + * + * WARNING: if the requested reference coordinate happens to fall inside a deletion in the read, this function + * will return the last read base before the deletion. This function returns a + * Pair(int readCoord, boolean fallsInsideDeletion) so you can choose which readCoordinate to use when faced with + * a deletion. + * + * SUGGESTION: Use getReadCoordinateForReferenceCoordinate(SAMRecord, int, ClippingTail) instead to get a + * pre-processed result according to normal clipping needs. Or you can use this function and tailor the + * behavior to your needs. + * + * @param read + * @param refCoord + * @return the read coordinate corresponding to the requested reference coordinate. (see warning!) + */ + @Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"}) + @Ensures({"result.getFirst() >= 0", "result.getFirst() < read.getReadLength()"}) + public static Pair getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) { int readBases = 0; int refBases = 0; + boolean fallsInsideDeletion = false; if (refCoord < read.getAlignmentStart()) { readBases = getReadCoordinateForReferenceCoordinateBeforeAlignmentStart(read, refCoord); @@ -806,26 +851,56 @@ public class ReadUtils { int shift = 0; if (cigarElement.getOperator().consumesReferenceBases()) { - if (refBases + cigarElement.getLength() < goal) { + if (refBases + cigarElement.getLength() < goal) shift = cigarElement.getLength(); - } - else { + else shift = goal - refBases; - } + refBases += shift; } goalReached = refBases == goal; - if (cigarElement.getOperator().consumesReadBases()) { - readBases += goalReached ? shift : cigarElement.getLength(); - } + if (!goalReached && cigarElement.getOperator().consumesReadBases()) + readBases += cigarElement.getLength(); + + if (goalReached) { + // Is this base's reference position within this cigar element? Or did we use it all? + boolean endsWithinCigar = shift < cigarElement.getLength(); + + // If it isn't, we need to check the next one. There should *ALWAYS* be a next one + // since we checked if the goal coordinate is within the read length, so this is just a sanity check. + if (!endsWithinCigar && !cigarElementIterator.hasNext()) + throw new ReviewedStingException("Reference coordinate corresponds to a non-existent base in the read. This should never happen -- call Mauricio"); + + CigarElement nextCigarElement; + + // if we end inside the current cigar element, we just have to check if it is a deletion + if (endsWithinCigar) + fallsInsideDeletion = cigarElement.getOperator() == CigarOperator.DELETION; + + // if we end outside the current cigar element, we need to check if the next element is a deletion. + else { + nextCigarElement = cigarElementIterator.next(); + fallsInsideDeletion = nextCigarElement.getOperator() == CigarOperator.DELETION; + } + + // If we reached our goal outside a deletion, add the shift + if (!fallsInsideDeletion && cigarElement.getOperator().consumesReadBases()) + readBases += shift; + + // If we reached our goal inside a deletion, but the deletion is the next cigar element then we need + // to add the shift of the current cigar element but go back to it's last element to return the last + // base before the deletion (see warning in function contracts) + else if (fallsInsideDeletion && !endsWithinCigar) + readBases += shift - 1; + } } if (!goalReached) throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?"); } - return readBases; + return new Pair(readBases, fallsInsideDeletion); } public static SAMRecord unclipSoftClippedBases(SAMRecord rec) { @@ -871,4 +946,19 @@ public class ReadUtils { return rec; } + + /** + * Compares two SAMRecords only the basis on alignment start. Note that + * comparisons are performed ONLY on the basis of alignment start; any + * two SAM records with the same alignment start will be considered equal. + * + * Unmapped alignments will all be considered equal. + */ + + @Requires({"read1 != null", "read2 != null"}) + @Ensures("result == 0 || result == 1 || result == -1") + public static int compareSAMRecords(SAMRecord read1, SAMRecord read2) { + AlignmentStartComparator comp = new AlignmentStartComparator(); + return comp.compare(read1, read2); + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java index 9d4b23a8b..c146bf4d4 100644 --- a/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/text/ListFileUtils.java @@ -80,7 +80,7 @@ public class ListFileUtils { unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags)); } else { - throw new UserException.CommandLineException(String.format("The GATK reads argument (-I) supports only BAM files with the .bam extension and lists of BAM files " + + throw new UserException.CommandLineException(String.format("The GATK reads argument (-I, --input_file) supports only BAM files with the .bam extension and lists of BAM files " + "with the .list extension, but the file %s has neither extension. Please ensure that your BAM file or list " + "of BAM files is in the correct format, update the extension, and try again.",inputFileName)); } diff --git a/public/packages/Queue.xml b/public/packages/Queue.xml index 589cb45f5..c256e5687 100644 --- a/public/packages/Queue.xml +++ b/public/packages/Queue.xml @@ -59,7 +59,12 @@ - + + + + + + diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/snpeff/SnpEff.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/snpeff/SnpEff.scala new file mode 100644 index 000000000..62f66ec06 --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/snpeff/SnpEff.scala @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.queue.extensions.snpeff + +import org.broadinstitute.sting.queue.function.JavaCommandLineFunction +import java.io.File +import org.broadinstitute.sting.commandline.{Argument, Output, Input} + +/** + * Basic snpEff support. + * See: http://www.broadinstitute.org/gsa/wiki/index.php/Adding_Genomic_Annotations_Using_SnpEff_and_VariantAnnotator + */ +class SnpEff extends JavaCommandLineFunction { + javaMainClass = "ca.mcgill.mcb.pcingola.snpEffect.commandLine.SnpEff" + + @Input(doc="snp vcf4 file") + var inVcf: File = _ + + @Input(doc="config file with path to data dir", required=false) + var config: File = _ + + @Argument(doc="genome version") + var genomeVersion: String = _ + + @Argument(doc="verbose", required=false) + var verbose = true + + @Output(doc="snp eff output") + var outVcf: File = _ + + override def commandLine = Array( + super.commandLine, + " eff", + if (verbose) " -v" else "", + optional(" -c ", config), + " -i vcf -o vcf %s %s > %s".format(genomeVersion, inVcf, outVcf) + ).mkString +} diff --git a/settings/repository/net.sf.snpeff/snpeff-2.0.2.jar b/settings/repository/net.sf.snpeff/snpeff-2.0.2.jar new file mode 100755 index 000000000..bfd06f97f Binary files /dev/null and b/settings/repository/net.sf.snpeff/snpeff-2.0.2.jar differ diff --git a/settings/repository/net.sf.snpeff/snpeff-2.0.2.xml b/settings/repository/net.sf.snpeff/snpeff-2.0.2.xml new file mode 100644 index 000000000..f0568def4 --- /dev/null +++ b/settings/repository/net.sf.snpeff/snpeff-2.0.2.xml @@ -0,0 +1,3 @@ + + +