Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Mark DePristo 2011-09-28 11:36:33 -04:00
commit a88b7c1203
13 changed files with 247 additions and 30 deletions

View File

@ -46,7 +46,7 @@
<!-- Lucene core utilities -->
<dependency org="org.apache.lucene" name="lucene-core" rev="3.0.3"/>
<!-- Dependencies for LSF library -->
<!-- Dependencies for LSF, DRMAA, and other C libraries -->
<dependency org="net.java.dev.jna" name="jna" rev="3.2.7"/>
<!-- Dependencies for amazon.com S3 support -->
@ -75,6 +75,9 @@
<dependency org="org.apache.poi" name="poi" rev="3.8-beta3" />
<dependency org="org.apache.poi" name="poi-ooxml" rev="3.8-beta3" />
<!-- snpEff annotator for pipelines -->
<dependency org="net.sf.snpeff" name="snpeff" rev="2.0.2" />
<!-- Exclude dependencies on sun libraries where the downloads aren't available but included in the jvm. -->
<exclude org="javax.servlet" />
<exclude org="javax.jms" />

View File

@ -354,7 +354,7 @@ public class VariantEvalUtils {
private void addMapping(HashMap<String, Set<VariantContext>> mappings, String sample, VariantContext vc) {
if ( !mappings.containsKey(sample) )
mappings.put(sample, new HashSet<VariantContext>());
mappings.put(sample, new LinkedHashSet<VariantContext>());
mappings.get(sample).add(vc);
}

View File

@ -92,7 +92,7 @@ import java.util.*;
* -o output.vcf \
* -sn SAMPLE_1_PARC \
* -sn SAMPLE_1_ACTG \
* -sn 'SAMPLE.+PARC'
* -se 'SAMPLE.+PARC'
*
* Select any sample that matches a regular expression and sites where the QD annotation is more than 10:
* java -Xmx2g -jar GenomeAnalysisTK.jar \
@ -100,7 +100,7 @@ import java.util.*;
* -T SelectVariants \
* --variant input.vcf \
* -o output.vcf \
* -sn 'SAMPLE.+PARC'
* -se 'SAMPLE.+PARC'
* -select "QD > 10.0"
*
* Select a sample and exclude non-variant loci and filtered loci:

View File

@ -35,8 +35,10 @@ import org.reflections.scanners.SubTypesScanner;
import org.reflections.util.ConfigurationBuilder;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.lang.reflect.Constructor;
import java.lang.reflect.Method;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.*;
@ -57,8 +59,25 @@ public class PluginManager<PluginType> {
Logger logger = (ch.qos.logback.classic.Logger) LoggerFactory.getLogger(Reflections.class);
logger.setLevel(Level.OFF);
Set<URL> classPathUrls = new LinkedHashSet<URL>();
URL cwd;
try {
cwd = new File(".").getAbsoluteFile().toURI().toURL();
} catch (MalformedURLException e) {
throw new RuntimeException(e);
}
// NOTE: Reflections also scans directories for classes.
// Meanwhile some of the jar MANIFEST.MF Bundle-ClassPath properties contain "."
// Do NOT let reflections scan the CWD where it often picks up test classes when
// they weren't explicitly in the classpath, for example the UninstantiableWalker
for (URL url: JVMUtils.getClasspathURLs())
if (!url.equals(cwd))
classPathUrls.add(url);
defaultReflections = new Reflections( new ConfigurationBuilder()
.setUrls(JVMUtils.getClasspathURLs())
.setUrls(classPathUrls)
.setScanners(new SubTypesScanner()));
}

View File

@ -249,7 +249,7 @@ public class ClippingOp {
@Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1", "!read.getReadUnmappedFlag()"})
private SAMRecord hardClip (SAMRecord read, int start, int stop) {
if (start == 0 && stop == read.getReadLength() -1)
if (start == 0 && stop == read.getReadLength() - 1)
return new SAMRecord(read.getHeader());
// If the read is unmapped there is no Cigar string and neither should we create a new cigar string

View File

@ -4,6 +4,7 @@ import com.google.java.contract.Requires;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -68,25 +69,15 @@ public class ReadClipper {
}
private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart);
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop);
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
if (start < 0 || stop > read.getReadLength() - 1)
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
if ( start > stop ) {
// stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read));
if ( start > stop )
throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!");
}
//This tries to fix the bug where the deletion is counted a read base and as a result, the hardCLipper runs into
//an endless loop when hard clipping the cigar string because the read coordinates are not covered by the read
// stop -= numDeletions(read);
// if ( start > stop )
// start -= numDeletions(read);
//System.out.println("Clipping start/stop: " + start + "/" + stop);
this.addOp(new ClippingOp(start, stop));
SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
this.ops = null;

View File

@ -0,0 +1,46 @@
package org.broadinstitute.sting.utils.sam;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.SAMRecord;
import java.util.Comparator;
public class AlignmentStartWithNoTiesComparator implements Comparator<SAMRecord> {
@Requires("c1 >= 0 && c2 >= 0")
@Ensures("result == 0 || result == 1 || result == -1")
private int compareContigs(int c1, int c2) {
if (c1 == c2)
return 0;
else if (c1 > c2)
return 1;
return -1;
}
@Requires("r1 != null && r2 != null")
@Ensures("result == 0 || result == 1 || result == -1")
public int compare(SAMRecord r1, SAMRecord r2) {
int result;
if (r1 == r2)
result = 0;
else if (r1.getReadUnmappedFlag())
result = 1;
else if (r2.getReadUnmappedFlag())
result = -1;
else {
final int cmpContig = compareContigs(r1.getReferenceIndex(), r2.getReferenceIndex());
if (cmpContig != 0)
result = cmpContig;
else {
if (r1.getAlignmentStart() < r2.getAlignmentStart()) result = -1;
else result = 1;
}
}
return result;
}
}

View File

@ -780,11 +780,56 @@ public class ReadUtils {
}
public enum ClippingTail {
LEFT_TAIL,
RIGHT_TAIL
}
/**
* Pre-processes the results of getReadCoordinateForReferenceCoordinate(SAMRecord, int) in case it falls in
* a deletion following the typical clipping needs. If clipping the left tail (beginning of the read) returns
* the base prior to the deletion. If clipping the right tail (end of the read) returns the base after the
* deletion.
*
* @param read
* @param refCoord
* @param tail
* @return the read coordinate corresponding to the requested reference coordinate for clipping.
*/
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"})
@Ensures({"result >= 0", "result < read.getReadLength()"})
public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) {
public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord, ClippingTail tail) {
Pair<Integer, Boolean> result = getReadCoordinateForReferenceCoordinate(read, refCoord);
int readCoord = result.getFirst();
if (result.getSecond() && tail == ClippingTail.RIGHT_TAIL)
readCoord++;
return readCoord;
}
/**
* Returns the read coordinate corresponding to the requested reference coordinate.
*
* WARNING: if the requested reference coordinate happens to fall inside a deletion in the read, this function
* will return the last read base before the deletion. This function returns a
* Pair(int readCoord, boolean fallsInsideDeletion) so you can choose which readCoordinate to use when faced with
* a deletion.
*
* SUGGESTION: Use getReadCoordinateForReferenceCoordinate(SAMRecord, int, ClippingTail) instead to get a
* pre-processed result according to normal clipping needs. Or you can use this function and tailor the
* behavior to your needs.
*
* @param read
* @param refCoord
* @return the read coordinate corresponding to the requested reference coordinate. (see warning!)
*/
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"})
@Ensures({"result.getFirst() >= 0", "result.getFirst() < read.getReadLength()"})
public static Pair<Integer, Boolean> getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) {
int readBases = 0;
int refBases = 0;
boolean fallsInsideDeletion = false;
if (refCoord < read.getAlignmentStart()) {
readBases = getReadCoordinateForReferenceCoordinateBeforeAlignmentStart(read, refCoord);
@ -806,26 +851,56 @@ public class ReadUtils {
int shift = 0;
if (cigarElement.getOperator().consumesReferenceBases()) {
if (refBases + cigarElement.getLength() < goal) {
if (refBases + cigarElement.getLength() < goal)
shift = cigarElement.getLength();
}
else {
else
shift = goal - refBases;
}
refBases += shift;
}
goalReached = refBases == goal;
if (cigarElement.getOperator().consumesReadBases()) {
readBases += goalReached ? shift : cigarElement.getLength();
}
if (!goalReached && cigarElement.getOperator().consumesReadBases())
readBases += cigarElement.getLength();
if (goalReached) {
// Is this base's reference position within this cigar element? Or did we use it all?
boolean endsWithinCigar = shift < cigarElement.getLength();
// If it isn't, we need to check the next one. There should *ALWAYS* be a next one
// since we checked if the goal coordinate is within the read length, so this is just a sanity check.
if (!endsWithinCigar && !cigarElementIterator.hasNext())
throw new ReviewedStingException("Reference coordinate corresponds to a non-existent base in the read. This should never happen -- call Mauricio");
CigarElement nextCigarElement;
// if we end inside the current cigar element, we just have to check if it is a deletion
if (endsWithinCigar)
fallsInsideDeletion = cigarElement.getOperator() == CigarOperator.DELETION;
// if we end outside the current cigar element, we need to check if the next element is a deletion.
else {
nextCigarElement = cigarElementIterator.next();
fallsInsideDeletion = nextCigarElement.getOperator() == CigarOperator.DELETION;
}
// If we reached our goal outside a deletion, add the shift
if (!fallsInsideDeletion && cigarElement.getOperator().consumesReadBases())
readBases += shift;
// If we reached our goal inside a deletion, but the deletion is the next cigar element then we need
// to add the shift of the current cigar element but go back to it's last element to return the last
// base before the deletion (see warning in function contracts)
else if (fallsInsideDeletion && !endsWithinCigar)
readBases += shift - 1;
}
}
if (!goalReached)
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?");
}
return readBases;
return new Pair<Integer, Boolean>(readBases, fallsInsideDeletion);
}
public static SAMRecord unclipSoftClippedBases(SAMRecord rec) {
@ -871,4 +946,19 @@ public class ReadUtils {
return rec;
}
/**
* Compares two SAMRecords only the basis on alignment start. Note that
* comparisons are performed ONLY on the basis of alignment start; any
* two SAM records with the same alignment start will be considered equal.
*
* Unmapped alignments will all be considered equal.
*/
@Requires({"read1 != null", "read2 != null"})
@Ensures("result == 0 || result == 1 || result == -1")
public static int compareSAMRecords(SAMRecord read1, SAMRecord read2) {
AlignmentStartComparator comp = new AlignmentStartComparator();
return comp.compare(read1, read2);
}
}

View File

@ -80,7 +80,7 @@ public class ListFileUtils {
unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags));
}
else {
throw new UserException.CommandLineException(String.format("The GATK reads argument (-I) supports only BAM files with the .bam extension and lists of BAM files " +
throw new UserException.CommandLineException(String.format("The GATK reads argument (-I, --input_file) supports only BAM files with the .bam extension and lists of BAM files " +
"with the .list extension, but the file %s has neither extension. Please ensure that your BAM file or list " +
"of BAM files is in the correct format, update the extension, and try again.",inputFileName));
}

View File

@ -59,7 +59,12 @@
<dir name="com/sun/jna" />
<!-- Contracts for Java -->
<package name="class com.google.java.contract.**" />
<package name="com.google.java.contract.**" />
<!-- snpEff -->
<package name="ca.mcgill.mcb.pcingola.**" />
<file path="snpEff_genes.ftl" />
<file path="snpEff_summary.ftl" />
</dependencies>
<modules>
<module file="GATKEngine.xml"/>

View File

@ -0,0 +1,60 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.queue.extensions.snpeff
import org.broadinstitute.sting.queue.function.JavaCommandLineFunction
import java.io.File
import org.broadinstitute.sting.commandline.{Argument, Output, Input}
/**
* Basic snpEff support.
* See: http://www.broadinstitute.org/gsa/wiki/index.php/Adding_Genomic_Annotations_Using_SnpEff_and_VariantAnnotator
*/
class SnpEff extends JavaCommandLineFunction {
javaMainClass = "ca.mcgill.mcb.pcingola.snpEffect.commandLine.SnpEff"
@Input(doc="snp vcf4 file")
var inVcf: File = _
@Input(doc="config file with path to data dir", required=false)
var config: File = _
@Argument(doc="genome version")
var genomeVersion: String = _
@Argument(doc="verbose", required=false)
var verbose = true
@Output(doc="snp eff output")
var outVcf: File = _
override def commandLine = Array(
super.commandLine,
" eff",
if (verbose) " -v" else "",
optional(" -c ", config),
" -i vcf -o vcf %s %s > %s".format(genomeVersion, inVcf, outVcf)
).mkString
}

View File

@ -0,0 +1,3 @@
<ivy-module version="1.0">
<info organisation="net.sf.snpeff" module="snpeff" revision="2.0.2" status="release" />
</ivy-module>