Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
a88b7c1203
5
ivy.xml
5
ivy.xml
|
|
@ -46,7 +46,7 @@
|
|||
<!-- Lucene core utilities -->
|
||||
<dependency org="org.apache.lucene" name="lucene-core" rev="3.0.3"/>
|
||||
|
||||
<!-- Dependencies for LSF library -->
|
||||
<!-- Dependencies for LSF, DRMAA, and other C libraries -->
|
||||
<dependency org="net.java.dev.jna" name="jna" rev="3.2.7"/>
|
||||
|
||||
<!-- Dependencies for amazon.com S3 support -->
|
||||
|
|
@ -75,6 +75,9 @@
|
|||
<dependency org="org.apache.poi" name="poi" rev="3.8-beta3" />
|
||||
<dependency org="org.apache.poi" name="poi-ooxml" rev="3.8-beta3" />
|
||||
|
||||
<!-- snpEff annotator for pipelines -->
|
||||
<dependency org="net.sf.snpeff" name="snpeff" rev="2.0.2" />
|
||||
|
||||
<!-- Exclude dependencies on sun libraries where the downloads aren't available but included in the jvm. -->
|
||||
<exclude org="javax.servlet" />
|
||||
<exclude org="javax.jms" />
|
||||
|
|
|
|||
|
|
@ -354,7 +354,7 @@ public class VariantEvalUtils {
|
|||
|
||||
private void addMapping(HashMap<String, Set<VariantContext>> mappings, String sample, VariantContext vc) {
|
||||
if ( !mappings.containsKey(sample) )
|
||||
mappings.put(sample, new HashSet<VariantContext>());
|
||||
mappings.put(sample, new LinkedHashSet<VariantContext>());
|
||||
mappings.get(sample).add(vc);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ import java.util.*;
|
|||
* -o output.vcf \
|
||||
* -sn SAMPLE_1_PARC \
|
||||
* -sn SAMPLE_1_ACTG \
|
||||
* -sn 'SAMPLE.+PARC'
|
||||
* -se 'SAMPLE.+PARC'
|
||||
*
|
||||
* Select any sample that matches a regular expression and sites where the QD annotation is more than 10:
|
||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||
|
|
@ -100,7 +100,7 @@ import java.util.*;
|
|||
* -T SelectVariants \
|
||||
* --variant input.vcf \
|
||||
* -o output.vcf \
|
||||
* -sn 'SAMPLE.+PARC'
|
||||
* -se 'SAMPLE.+PARC'
|
||||
* -select "QD > 10.0"
|
||||
*
|
||||
* Select a sample and exclude non-variant loci and filtered loci:
|
||||
|
|
|
|||
|
|
@ -35,8 +35,10 @@ import org.reflections.scanners.SubTypesScanner;
|
|||
import org.reflections.util.ConfigurationBuilder;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.lang.reflect.Method;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.net.URLClassLoader;
|
||||
import java.util.*;
|
||||
|
|
@ -57,8 +59,25 @@ public class PluginManager<PluginType> {
|
|||
Logger logger = (ch.qos.logback.classic.Logger) LoggerFactory.getLogger(Reflections.class);
|
||||
logger.setLevel(Level.OFF);
|
||||
|
||||
Set<URL> classPathUrls = new LinkedHashSet<URL>();
|
||||
|
||||
URL cwd;
|
||||
try {
|
||||
cwd = new File(".").getAbsoluteFile().toURI().toURL();
|
||||
} catch (MalformedURLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
// NOTE: Reflections also scans directories for classes.
|
||||
// Meanwhile some of the jar MANIFEST.MF Bundle-ClassPath properties contain "."
|
||||
// Do NOT let reflections scan the CWD where it often picks up test classes when
|
||||
// they weren't explicitly in the classpath, for example the UninstantiableWalker
|
||||
for (URL url: JVMUtils.getClasspathURLs())
|
||||
if (!url.equals(cwd))
|
||||
classPathUrls.add(url);
|
||||
|
||||
defaultReflections = new Reflections( new ConfigurationBuilder()
|
||||
.setUrls(JVMUtils.getClasspathURLs())
|
||||
.setUrls(classPathUrls)
|
||||
.setScanners(new SubTypesScanner()));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -249,7 +249,7 @@ public class ClippingOp {
|
|||
|
||||
@Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1", "!read.getReadUnmappedFlag()"})
|
||||
private SAMRecord hardClip (SAMRecord read, int start, int stop) {
|
||||
if (start == 0 && stop == read.getReadLength() -1)
|
||||
if (start == 0 && stop == read.getReadLength() - 1)
|
||||
return new SAMRecord(read.getHeader());
|
||||
|
||||
// If the read is unmapped there is no Cigar string and neither should we create a new cigar string
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import com.google.java.contract.Requires;
|
|||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
|
|
@ -68,25 +69,15 @@ public class ReadClipper {
|
|||
}
|
||||
|
||||
private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
|
||||
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart);
|
||||
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop);
|
||||
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
|
||||
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
|
||||
|
||||
if (start < 0 || stop > read.getReadLength() - 1)
|
||||
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
|
||||
|
||||
if ( start > stop ) {
|
||||
// stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read));
|
||||
if ( start > stop )
|
||||
throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!");
|
||||
}
|
||||
|
||||
//This tries to fix the bug where the deletion is counted a read base and as a result, the hardCLipper runs into
|
||||
//an endless loop when hard clipping the cigar string because the read coordinates are not covered by the read
|
||||
// stop -= numDeletions(read);
|
||||
// if ( start > stop )
|
||||
// start -= numDeletions(read);
|
||||
|
||||
|
||||
//System.out.println("Clipping start/stop: " + start + "/" + stop);
|
||||
this.addOp(new ClippingOp(start, stop));
|
||||
SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||
this.ops = null;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,46 @@
|
|||
package org.broadinstitute.sting.utils.sam;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
||||
public class AlignmentStartWithNoTiesComparator implements Comparator<SAMRecord> {
|
||||
@Requires("c1 >= 0 && c2 >= 0")
|
||||
@Ensures("result == 0 || result == 1 || result == -1")
|
||||
private int compareContigs(int c1, int c2) {
|
||||
if (c1 == c2)
|
||||
return 0;
|
||||
else if (c1 > c2)
|
||||
return 1;
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Requires("r1 != null && r2 != null")
|
||||
@Ensures("result == 0 || result == 1 || result == -1")
|
||||
public int compare(SAMRecord r1, SAMRecord r2) {
|
||||
int result;
|
||||
|
||||
if (r1 == r2)
|
||||
result = 0;
|
||||
|
||||
else if (r1.getReadUnmappedFlag())
|
||||
result = 1;
|
||||
else if (r2.getReadUnmappedFlag())
|
||||
result = -1;
|
||||
else {
|
||||
final int cmpContig = compareContigs(r1.getReferenceIndex(), r2.getReferenceIndex());
|
||||
|
||||
if (cmpContig != 0)
|
||||
result = cmpContig;
|
||||
|
||||
else {
|
||||
if (r1.getAlignmentStart() < r2.getAlignmentStart()) result = -1;
|
||||
else result = 1;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
@ -780,11 +780,56 @@ public class ReadUtils {
|
|||
}
|
||||
|
||||
|
||||
public enum ClippingTail {
|
||||
LEFT_TAIL,
|
||||
RIGHT_TAIL
|
||||
}
|
||||
|
||||
/**
|
||||
* Pre-processes the results of getReadCoordinateForReferenceCoordinate(SAMRecord, int) in case it falls in
|
||||
* a deletion following the typical clipping needs. If clipping the left tail (beginning of the read) returns
|
||||
* the base prior to the deletion. If clipping the right tail (end of the read) returns the base after the
|
||||
* deletion.
|
||||
*
|
||||
* @param read
|
||||
* @param refCoord
|
||||
* @param tail
|
||||
* @return the read coordinate corresponding to the requested reference coordinate for clipping.
|
||||
*/
|
||||
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"})
|
||||
@Ensures({"result >= 0", "result < read.getReadLength()"})
|
||||
public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) {
|
||||
public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord, ClippingTail tail) {
|
||||
Pair<Integer, Boolean> result = getReadCoordinateForReferenceCoordinate(read, refCoord);
|
||||
int readCoord = result.getFirst();
|
||||
|
||||
if (result.getSecond() && tail == ClippingTail.RIGHT_TAIL)
|
||||
readCoord++;
|
||||
|
||||
return readCoord;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the read coordinate corresponding to the requested reference coordinate.
|
||||
*
|
||||
* WARNING: if the requested reference coordinate happens to fall inside a deletion in the read, this function
|
||||
* will return the last read base before the deletion. This function returns a
|
||||
* Pair(int readCoord, boolean fallsInsideDeletion) so you can choose which readCoordinate to use when faced with
|
||||
* a deletion.
|
||||
*
|
||||
* SUGGESTION: Use getReadCoordinateForReferenceCoordinate(SAMRecord, int, ClippingTail) instead to get a
|
||||
* pre-processed result according to normal clipping needs. Or you can use this function and tailor the
|
||||
* behavior to your needs.
|
||||
*
|
||||
* @param read
|
||||
* @param refCoord
|
||||
* @return the read coordinate corresponding to the requested reference coordinate. (see warning!)
|
||||
*/
|
||||
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"})
|
||||
@Ensures({"result.getFirst() >= 0", "result.getFirst() < read.getReadLength()"})
|
||||
public static Pair<Integer, Boolean> getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) {
|
||||
int readBases = 0;
|
||||
int refBases = 0;
|
||||
boolean fallsInsideDeletion = false;
|
||||
|
||||
if (refCoord < read.getAlignmentStart()) {
|
||||
readBases = getReadCoordinateForReferenceCoordinateBeforeAlignmentStart(read, refCoord);
|
||||
|
|
@ -806,26 +851,56 @@ public class ReadUtils {
|
|||
int shift = 0;
|
||||
|
||||
if (cigarElement.getOperator().consumesReferenceBases()) {
|
||||
if (refBases + cigarElement.getLength() < goal) {
|
||||
if (refBases + cigarElement.getLength() < goal)
|
||||
shift = cigarElement.getLength();
|
||||
}
|
||||
else {
|
||||
else
|
||||
shift = goal - refBases;
|
||||
}
|
||||
|
||||
refBases += shift;
|
||||
}
|
||||
goalReached = refBases == goal;
|
||||
|
||||
if (cigarElement.getOperator().consumesReadBases()) {
|
||||
readBases += goalReached ? shift : cigarElement.getLength();
|
||||
}
|
||||
if (!goalReached && cigarElement.getOperator().consumesReadBases())
|
||||
readBases += cigarElement.getLength();
|
||||
|
||||
if (goalReached) {
|
||||
// Is this base's reference position within this cigar element? Or did we use it all?
|
||||
boolean endsWithinCigar = shift < cigarElement.getLength();
|
||||
|
||||
// If it isn't, we need to check the next one. There should *ALWAYS* be a next one
|
||||
// since we checked if the goal coordinate is within the read length, so this is just a sanity check.
|
||||
if (!endsWithinCigar && !cigarElementIterator.hasNext())
|
||||
throw new ReviewedStingException("Reference coordinate corresponds to a non-existent base in the read. This should never happen -- call Mauricio");
|
||||
|
||||
CigarElement nextCigarElement;
|
||||
|
||||
// if we end inside the current cigar element, we just have to check if it is a deletion
|
||||
if (endsWithinCigar)
|
||||
fallsInsideDeletion = cigarElement.getOperator() == CigarOperator.DELETION;
|
||||
|
||||
// if we end outside the current cigar element, we need to check if the next element is a deletion.
|
||||
else {
|
||||
nextCigarElement = cigarElementIterator.next();
|
||||
fallsInsideDeletion = nextCigarElement.getOperator() == CigarOperator.DELETION;
|
||||
}
|
||||
|
||||
// If we reached our goal outside a deletion, add the shift
|
||||
if (!fallsInsideDeletion && cigarElement.getOperator().consumesReadBases())
|
||||
readBases += shift;
|
||||
|
||||
// If we reached our goal inside a deletion, but the deletion is the next cigar element then we need
|
||||
// to add the shift of the current cigar element but go back to it's last element to return the last
|
||||
// base before the deletion (see warning in function contracts)
|
||||
else if (fallsInsideDeletion && !endsWithinCigar)
|
||||
readBases += shift - 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!goalReached)
|
||||
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?");
|
||||
}
|
||||
|
||||
return readBases;
|
||||
return new Pair<Integer, Boolean>(readBases, fallsInsideDeletion);
|
||||
}
|
||||
|
||||
public static SAMRecord unclipSoftClippedBases(SAMRecord rec) {
|
||||
|
|
@ -871,4 +946,19 @@ public class ReadUtils {
|
|||
|
||||
return rec;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares two SAMRecords only the basis on alignment start. Note that
|
||||
* comparisons are performed ONLY on the basis of alignment start; any
|
||||
* two SAM records with the same alignment start will be considered equal.
|
||||
*
|
||||
* Unmapped alignments will all be considered equal.
|
||||
*/
|
||||
|
||||
@Requires({"read1 != null", "read2 != null"})
|
||||
@Ensures("result == 0 || result == 1 || result == -1")
|
||||
public static int compareSAMRecords(SAMRecord read1, SAMRecord read2) {
|
||||
AlignmentStartComparator comp = new AlignmentStartComparator();
|
||||
return comp.compare(read1, read2);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@ public class ListFileUtils {
|
|||
unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags));
|
||||
}
|
||||
else {
|
||||
throw new UserException.CommandLineException(String.format("The GATK reads argument (-I) supports only BAM files with the .bam extension and lists of BAM files " +
|
||||
throw new UserException.CommandLineException(String.format("The GATK reads argument (-I, --input_file) supports only BAM files with the .bam extension and lists of BAM files " +
|
||||
"with the .list extension, but the file %s has neither extension. Please ensure that your BAM file or list " +
|
||||
"of BAM files is in the correct format, update the extension, and try again.",inputFileName));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -59,7 +59,12 @@
|
|||
<dir name="com/sun/jna" />
|
||||
|
||||
<!-- Contracts for Java -->
|
||||
<package name="class com.google.java.contract.**" />
|
||||
<package name="com.google.java.contract.**" />
|
||||
|
||||
<!-- snpEff -->
|
||||
<package name="ca.mcgill.mcb.pcingola.**" />
|
||||
<file path="snpEff_genes.ftl" />
|
||||
<file path="snpEff_summary.ftl" />
|
||||
</dependencies>
|
||||
<modules>
|
||||
<module file="GATKEngine.xml"/>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,60 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.queue.extensions.snpeff
|
||||
|
||||
import org.broadinstitute.sting.queue.function.JavaCommandLineFunction
|
||||
import java.io.File
|
||||
import org.broadinstitute.sting.commandline.{Argument, Output, Input}
|
||||
|
||||
/**
|
||||
* Basic snpEff support.
|
||||
* See: http://www.broadinstitute.org/gsa/wiki/index.php/Adding_Genomic_Annotations_Using_SnpEff_and_VariantAnnotator
|
||||
*/
|
||||
class SnpEff extends JavaCommandLineFunction {
|
||||
javaMainClass = "ca.mcgill.mcb.pcingola.snpEffect.commandLine.SnpEff"
|
||||
|
||||
@Input(doc="snp vcf4 file")
|
||||
var inVcf: File = _
|
||||
|
||||
@Input(doc="config file with path to data dir", required=false)
|
||||
var config: File = _
|
||||
|
||||
@Argument(doc="genome version")
|
||||
var genomeVersion: String = _
|
||||
|
||||
@Argument(doc="verbose", required=false)
|
||||
var verbose = true
|
||||
|
||||
@Output(doc="snp eff output")
|
||||
var outVcf: File = _
|
||||
|
||||
override def commandLine = Array(
|
||||
super.commandLine,
|
||||
" eff",
|
||||
if (verbose) " -v" else "",
|
||||
optional(" -c ", config),
|
||||
" -i vcf -o vcf %s %s > %s".format(genomeVersion, inVcf, outVcf)
|
||||
).mkString
|
||||
}
|
||||
Binary file not shown.
|
|
@ -0,0 +1,3 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="net.sf.snpeff" module="snpeff" revision="2.0.2" status="release" />
|
||||
</ivy-module>
|
||||
Loading…
Reference in New Issue