Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Guillermo del Angel 2011-09-28 15:53:11 -04:00
commit fe23e4d10c
16 changed files with 264 additions and 168 deletions

View File

@ -46,7 +46,7 @@
<!-- Lucene core utilities -->
<dependency org="org.apache.lucene" name="lucene-core" rev="3.0.3"/>
<!-- Dependencies for LSF library -->
<!-- Dependencies for LSF, DRMAA, and other C libraries -->
<dependency org="net.java.dev.jna" name="jna" rev="3.2.7"/>
<!-- Dependencies for amazon.com S3 support -->
@ -75,6 +75,9 @@
<dependency org="org.apache.poi" name="poi" rev="3.8-beta3" />
<dependency org="org.apache.poi" name="poi-ooxml" rev="3.8-beta3" />
<!-- snpEff annotator for pipelines -->
<dependency org="net.sf.snpeff" name="snpeff" rev="2.0.2" />
<!-- Exclude dependencies on sun libraries where the downloads aren't available but included in the jvm. -->
<exclude org="javax.servlet" />
<exclude org="javax.jms" />

View File

@ -1,130 +0,0 @@
package org.broadinstitute.sting.gatk.refdata.indexer;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.apache.log4j.Logger;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.Tribble;
import org.broad.tribble.index.Index;
import org.broad.tribble.index.IndexFactory;
import org.broad.tribble.util.LittleEndianOutputStream;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.CommandLineProgram;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import java.io.File;
import java.io.FileOutputStream;
/**
* a utility class that can create an index, written to a target location. This is useful when you're unable to write to the directory
* in which an index is located, or if you'd like to pre-index files to save time.
*/
public class RMDIndexer extends CommandLineProgram {
@Argument(shortName="in", fullName="inputFile", doc="The reference meta data file to index", required = true)
File inputFileSource = null;
@Argument(shortName="t", fullName="type", doc="The reference meta data file format (e.g. vcf, bed)", required = true)
String inputFileType = null;
@Input(fullName = "referenceSequence", shortName = "R", doc = "The reference to use when indexing; this sequence will be set in the index", required = true)
public File referenceFile = null;
@Input(shortName = "i", fullName = "indexFile", doc = "Where to write the index to (as a file), if not supplied we write to <inputFile>.idx", required = false)
public File indexFile = null;
@Argument(shortName = "ba", fullName = "balanceApproach", doc="the index balancing approach to take", required=false)
IndexFactory.IndexBalanceApproach approach = IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME;
private static Logger logger = Logger.getLogger(RMDIndexer.class);
private IndexedFastaSequenceFile ref = null;
private GenomeLocParser genomeLocParser = null;
@Override
protected int execute() throws Exception {
// check parameters
// ---------------------------------------------------------------------------------
// check the input parameters
if (referenceFile != null && !referenceFile.canRead())
throw new IllegalArgumentException("We can't read the reference file: "
+ referenceFile + ", check that it exists, and that you have permissions to read it");
// set the index file to the default name if they didn't specify a file
if (indexFile == null && inputFileSource != null)
indexFile = new File(inputFileSource.getAbsolutePath() + Tribble.STANDARD_INDEX_EXTENSION);
// check that we can create the output file
if (indexFile == null || indexFile.exists())
throw new IllegalArgumentException("We can't write to the index file location: "
+ indexFile + ", the index exists");
logger.info(String.format("attempting to index file: %s", inputFileSource));
logger.info(String.format("using reference: %s", ((referenceFile != null) ? referenceFile.getAbsolutePath() : "(not supplied)")));
logger.info(String.format("using type: %s", inputFileType));
logger.info(String.format("writing to location: %s", indexFile.getAbsolutePath()));
// try to index the file
// ---------------------------------------------------------------------------------
// setup the reference
ref = new CachingIndexedFastaSequenceFile(referenceFile);
genomeLocParser = new GenomeLocParser(ref);
// get a track builder
RMDTrackBuilder builder = new RMDTrackBuilder(ref.getSequenceDictionary(),genomeLocParser, ValidationExclusion.TYPE.ALL);
// find the types available to the track builders
FeatureManager.FeatureDescriptor descriptor = builder.getFeatureManager().getByName(inputFileType);
// check that the type is valid
if (descriptor == null)
throw new IllegalArgumentException("The type specified " + inputFileType + " is not a valid type. Valid type list: " + builder.getFeatureManager().userFriendlyListOfAvailableFeatures());
// create the codec
FeatureCodec codec = builder.getFeatureManager().createCodec(descriptor, "foo", genomeLocParser);
// check if it's a reference dependent feature codec
if (codec instanceof ReferenceDependentFeatureCodec)
((ReferenceDependentFeatureCodec)codec).setGenomeLocParser(genomeLocParser);
// get some timing info
long currentTime = System.currentTimeMillis();
Index index = IndexFactory.createIndex(inputFileSource, codec, approach);
// add writing of the sequence dictionary, if supplied
builder.validateAndUpdateIndexSequenceDictionary(inputFileSource, index, ref.getSequenceDictionary());
// create the output stream, and write the index
LittleEndianOutputStream stream = new LittleEndianOutputStream(new FileOutputStream(indexFile));
index.write(stream);
stream.close();
// report and exit
logger.info("Successfully wrote the index to location: " + indexFile + " in " + ((System.currentTimeMillis() - currentTime)/1000) + " seconds");
return 0; // return successfully
}
/**
* the generic call execute main
* @param argv the arguments from the command line
*/
public static void main(String[] argv) {
try {
RMDIndexer instance = new RMDIndexer();
start(instance, argv);
System.exit(CommandLineProgram.result);
} catch (Exception e) {
exitSystemWithError(e);
}
}
}

View File

@ -42,7 +42,7 @@ import java.util.List;
import java.util.Map;
/**
* List all of the samples in the info field
* List all of the polymorphic samples.
*/
public class SampleList extends InfoFieldAnnotation {

View File

@ -354,7 +354,7 @@ public class VariantEvalUtils {
private void addMapping(HashMap<String, Set<VariantContext>> mappings, String sample, VariantContext vc) {
if ( !mappings.containsKey(sample) )
mappings.put(sample, new HashSet<VariantContext>());
mappings.put(sample, new LinkedHashSet<VariantContext>());
mappings.get(sample).add(vc);
}

View File

@ -92,7 +92,7 @@ import java.util.*;
* -o output.vcf \
* -sn SAMPLE_1_PARC \
* -sn SAMPLE_1_ACTG \
* -sn 'SAMPLE.+PARC'
* -se 'SAMPLE.+PARC'
*
* Select any sample that matches a regular expression and sites where the QD annotation is more than 10:
* java -Xmx2g -jar GenomeAnalysisTK.jar \
@ -100,7 +100,7 @@ import java.util.*;
* -T SelectVariants \
* --variant input.vcf \
* -o output.vcf \
* -sn 'SAMPLE.+PARC'
* -se 'SAMPLE.+PARC'
* -select "QD > 10.0"
*
* Select a sample and exclude non-variant loci and filtered loci:

View File

@ -35,8 +35,10 @@ import org.reflections.scanners.SubTypesScanner;
import org.reflections.util.ConfigurationBuilder;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.lang.reflect.Constructor;
import java.lang.reflect.Method;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.*;
@ -57,8 +59,25 @@ public class PluginManager<PluginType> {
Logger logger = (ch.qos.logback.classic.Logger) LoggerFactory.getLogger(Reflections.class);
logger.setLevel(Level.OFF);
Set<URL> classPathUrls = new LinkedHashSet<URL>();
URL cwd;
try {
cwd = new File(".").getAbsoluteFile().toURI().toURL();
} catch (MalformedURLException e) {
throw new RuntimeException(e);
}
// NOTE: Reflections also scans directories for classes.
// Meanwhile some of the jar MANIFEST.MF Bundle-ClassPath properties contain "."
// Do NOT let reflections scan the CWD where it often picks up test classes when
// they weren't explicitly in the classpath, for example the UninstantiableWalker
for (URL url: JVMUtils.getClasspathURLs())
if (!url.equals(cwd))
classPathUrls.add(url);
defaultReflections = new Reflections( new ConfigurationBuilder()
.setUrls(JVMUtils.getClasspathURLs())
.setUrls(classPathUrls)
.setScanners(new SubTypesScanner()));
}

View File

@ -249,7 +249,7 @@ public class ClippingOp {
@Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1", "!read.getReadUnmappedFlag()"})
private SAMRecord hardClip (SAMRecord read, int start, int stop) {
if (start == 0 && stop == read.getReadLength() -1)
if (start == 0 && stop == read.getReadLength() - 1)
return new SAMRecord(read.getHeader());
// If the read is unmapped there is no Cigar string and neither should we create a new cigar string

View File

@ -4,6 +4,7 @@ import com.google.java.contract.Requires;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -68,25 +69,15 @@ public class ReadClipper {
}
private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart);
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop);
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
if (start < 0 || stop > read.getReadLength() - 1)
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
if ( start > stop ) {
// stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read));
if ( start > stop )
throw new ReviewedStingException("START > STOP -- this should never happen -- call Mauricio!");
}
//This tries to fix the bug where the deletion is counted a read base and as a result, the hardCLipper runs into
//an endless loop when hard clipping the cigar string because the read coordinates are not covered by the read
// stop -= numDeletions(read);
// if ( start > stop )
// start -= numDeletions(read);
//System.out.println("Clipping start/stop: " + start + "/" + stop);
this.addOp(new ClippingOp(start, stop));
SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
this.ops = null;

View File

@ -325,11 +325,14 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
return Arrays.toString((Object[])value);
else
throw new RuntimeException("Unexpected array type in prettyPrintValue. Value was " + value + " type is " + type);
} else if ( RodBinding.class.isAssignableFrom(value.getClass() ) )
} else if ( RodBinding.class.isAssignableFrom(value.getClass() ) ) {
// annoying special case to handle the UnBound() constructor
return "none";
return value.toString();
} else if ( value instanceof String ) {
return value.equals("") ? "\"\"" : value;
} else {
return value.toString();
}
}
/**
@ -432,11 +435,17 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
* Returns a Pair of (main, synonym) names for argument with fullName s1 and
* shortName s2. The main is selected to be the longest of the two, provided
* it doesn't exceed MAX_DISPLAY_NAME, in which case the shorter is taken.
* @param s1
* @param s2
* @return
*
* @param s1 the short argument name without -, or null if not provided
* @param s2 the long argument name without --, or null if not provided
* @return A pair of fully qualified names (with - or --) for the argument. The first
* element is the primary display name while the second (potentially null) is a
* synonymous name.
*/
Pair<String, String> displayNames(String s1, String s2) {
s1 = s1 == null ? null : "-" + s1;
s2 = s2 == null ? null : "--" + s2;
if ( s1 == null ) return new Pair<String, String>(s2, null);
if ( s2 == null ) return new Pair<String, String>(s1, null);
@ -510,7 +519,7 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
*/
protected Map<String, Object> docForArgument(FieldDoc fieldDoc, ArgumentSource source, ArgumentDefinition def) {
Map<String, Object> root = new HashMap<String, Object>();
Pair<String, String> names = displayNames("-" + def.shortName, "--" + def.fullName);
Pair<String, String> names = displayNames(def.shortName, def.fullName);
root.put("name", names.getFirst() );

View File

@ -0,0 +1,46 @@
package org.broadinstitute.sting.utils.sam;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.SAMRecord;
import java.util.Comparator;
public class AlignmentStartWithNoTiesComparator implements Comparator<SAMRecord> {
@Requires("c1 >= 0 && c2 >= 0")
@Ensures("result == 0 || result == 1 || result == -1")
private int compareContigs(int c1, int c2) {
if (c1 == c2)
return 0;
else if (c1 > c2)
return 1;
return -1;
}
@Requires("r1 != null && r2 != null")
@Ensures("result == 0 || result == 1 || result == -1")
public int compare(SAMRecord r1, SAMRecord r2) {
int result;
if (r1 == r2)
result = 0;
else if (r1.getReadUnmappedFlag())
result = 1;
else if (r2.getReadUnmappedFlag())
result = -1;
else {
final int cmpContig = compareContigs(r1.getReferenceIndex(), r2.getReferenceIndex());
if (cmpContig != 0)
result = cmpContig;
else {
if (r1.getAlignmentStart() < r2.getAlignmentStart()) result = -1;
else result = 1;
}
}
return result;
}
}

View File

@ -780,11 +780,56 @@ public class ReadUtils {
}
public enum ClippingTail {
LEFT_TAIL,
RIGHT_TAIL
}
/**
* Pre-processes the results of getReadCoordinateForReferenceCoordinate(SAMRecord, int) in case it falls in
* a deletion following the typical clipping needs. If clipping the left tail (beginning of the read) returns
* the base prior to the deletion. If clipping the right tail (end of the read) returns the base after the
* deletion.
*
* @param read
* @param refCoord
* @param tail
* @return the read coordinate corresponding to the requested reference coordinate for clipping.
*/
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"})
@Ensures({"result >= 0", "result < read.getReadLength()"})
public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) {
public static int getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord, ClippingTail tail) {
Pair<Integer, Boolean> result = getReadCoordinateForReferenceCoordinate(read, refCoord);
int readCoord = result.getFirst();
if (result.getSecond() && tail == ClippingTail.RIGHT_TAIL)
readCoord++;
return readCoord;
}
/**
* Returns the read coordinate corresponding to the requested reference coordinate.
*
* WARNING: if the requested reference coordinate happens to fall inside a deletion in the read, this function
* will return the last read base before the deletion. This function returns a
* Pair(int readCoord, boolean fallsInsideDeletion) so you can choose which readCoordinate to use when faced with
* a deletion.
*
* SUGGESTION: Use getReadCoordinateForReferenceCoordinate(SAMRecord, int, ClippingTail) instead to get a
* pre-processed result according to normal clipping needs. Or you can use this function and tailor the
* behavior to your needs.
*
* @param read
* @param refCoord
* @return the read coordinate corresponding to the requested reference coordinate. (see warning!)
*/
@Requires({"refCoord >= read.getUnclippedStart()", "refCoord <= read.getUnclippedEnd()"})
@Ensures({"result.getFirst() >= 0", "result.getFirst() < read.getReadLength()"})
public static Pair<Integer, Boolean> getReadCoordinateForReferenceCoordinate(SAMRecord read, int refCoord) {
int readBases = 0;
int refBases = 0;
boolean fallsInsideDeletion = false;
if (refCoord < read.getAlignmentStart()) {
readBases = getReadCoordinateForReferenceCoordinateBeforeAlignmentStart(read, refCoord);
@ -806,26 +851,56 @@ public class ReadUtils {
int shift = 0;
if (cigarElement.getOperator().consumesReferenceBases()) {
if (refBases + cigarElement.getLength() < goal) {
if (refBases + cigarElement.getLength() < goal)
shift = cigarElement.getLength();
}
else {
else
shift = goal - refBases;
}
refBases += shift;
}
goalReached = refBases == goal;
if (cigarElement.getOperator().consumesReadBases()) {
readBases += goalReached ? shift : cigarElement.getLength();
}
if (!goalReached && cigarElement.getOperator().consumesReadBases())
readBases += cigarElement.getLength();
if (goalReached) {
// Is this base's reference position within this cigar element? Or did we use it all?
boolean endsWithinCigar = shift < cigarElement.getLength();
// If it isn't, we need to check the next one. There should *ALWAYS* be a next one
// since we checked if the goal coordinate is within the read length, so this is just a sanity check.
if (!endsWithinCigar && !cigarElementIterator.hasNext())
throw new ReviewedStingException("Reference coordinate corresponds to a non-existent base in the read. This should never happen -- call Mauricio");
CigarElement nextCigarElement;
// if we end inside the current cigar element, we just have to check if it is a deletion
if (endsWithinCigar)
fallsInsideDeletion = cigarElement.getOperator() == CigarOperator.DELETION;
// if we end outside the current cigar element, we need to check if the next element is a deletion.
else {
nextCigarElement = cigarElementIterator.next();
fallsInsideDeletion = nextCigarElement.getOperator() == CigarOperator.DELETION;
}
// If we reached our goal outside a deletion, add the shift
if (!fallsInsideDeletion && cigarElement.getOperator().consumesReadBases())
readBases += shift;
// If we reached our goal inside a deletion, but the deletion is the next cigar element then we need
// to add the shift of the current cigar element but go back to it's last element to return the last
// base before the deletion (see warning in function contracts)
else if (fallsInsideDeletion && !endsWithinCigar)
readBases += shift - 1;
}
}
if (!goalReached)
throw new ReviewedStingException("Somehow the requested coordinate is not covered by the read. Too many deletions?");
}
return readBases;
return new Pair<Integer, Boolean>(readBases, fallsInsideDeletion);
}
public static SAMRecord unclipSoftClippedBases(SAMRecord rec) {
@ -871,4 +946,19 @@ public class ReadUtils {
return rec;
}
/**
* Compares two SAMRecords only the basis on alignment start. Note that
* comparisons are performed ONLY on the basis of alignment start; any
* two SAM records with the same alignment start will be considered equal.
*
* Unmapped alignments will all be considered equal.
*/
@Requires({"read1 != null", "read2 != null"})
@Ensures("result == 0 || result == 1 || result == -1")
public static int compareSAMRecords(SAMRecord read1, SAMRecord read2) {
AlignmentStartComparator comp = new AlignmentStartComparator();
return comp.compare(read1, read2);
}
}

View File

@ -80,7 +80,7 @@ public class ListFileUtils {
unpackedReads.add(new SAMReaderID(inputFileName,inputFileNameTags));
}
else {
throw new UserException.CommandLineException(String.format("The GATK reads argument (-I) supports only BAM files with the .bam extension and lists of BAM files " +
throw new UserException.CommandLineException(String.format("The GATK reads argument (-I, --input_file) supports only BAM files with the .bam extension and lists of BAM files " +
"with the .list extension, but the file %s has neither extension. Please ensure that your BAM file or list " +
"of BAM files is in the correct format, update the extension, and try again.",inputFileName));
}

View File

@ -59,7 +59,12 @@
<dir name="com/sun/jna" />
<!-- Contracts for Java -->
<package name="class com.google.java.contract.**" />
<package name="com.google.java.contract.**" />
<!-- snpEff -->
<package name="ca.mcgill.mcb.pcingola.**" />
<file path="snpEff_genes.ftl" />
<file path="snpEff_summary.ftl" />
</dependencies>
<modules>
<module file="GATKEngine.xml"/>

View File

@ -0,0 +1,60 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.queue.extensions.snpeff
import org.broadinstitute.sting.queue.function.JavaCommandLineFunction
import java.io.File
import org.broadinstitute.sting.commandline.{Argument, Output, Input}
/**
* Basic snpEff support.
* See: http://www.broadinstitute.org/gsa/wiki/index.php/Adding_Genomic_Annotations_Using_SnpEff_and_VariantAnnotator
*/
class SnpEff extends JavaCommandLineFunction {
javaMainClass = "ca.mcgill.mcb.pcingola.snpEffect.commandLine.SnpEff"
@Input(doc="snp vcf4 file")
var inVcf: File = _
@Input(doc="config file with path to data dir", required=false)
var config: File = _
@Argument(doc="genome version")
var genomeVersion: String = _
@Argument(doc="verbose", required=false)
var verbose = true
@Output(doc="snp eff output")
var outVcf: File = _
override def commandLine = Array(
super.commandLine,
" eff",
if (verbose) " -v" else "",
optional(" -c ", config),
" -i vcf -o vcf %s %s > %s".format(genomeVersion, inVcf, outVcf)
).mkString
}

View File

@ -0,0 +1,3 @@
<ivy-module version="1.0">
<info organisation="net.sf.snpeff" module="snpeff" revision="2.0.2" status="release" />
</ivy-module>