diff --git a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
index 16358d05f..5fff8f609 100644
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
@@ -379,7 +379,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
}
if ( tribbleType == null )
- if ( ! file.canRead() | !! file.isFile() ) {
+ if ( ! file.canRead() | ! file.isFile() ) {
throw new UserException.BadArgumentValue(name, "Couldn't read file to determine type: " + file);
} else {
throw new UserException.CommandLineException(
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java
index 467aebac5..47eb55b28 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java
@@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.datasources.reads;
import net.sf.picard.util.PeekableIterator;
import net.sf.samtools.GATKBAMFileSpan;
+import net.sf.samtools.GATKChunk;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
@@ -84,7 +85,7 @@ public class BAMScheduler implements Iterator
* Tables pertaining to different coverage summaries. Suffix on the table files declares the contents:
+ *
* - no suffix: per locus coverage
+ *
* - _summary: total, mean, median, quartiles, and threshold proportions, aggregated over all bases
+ *
* - _statistics: coverage histograms (# locus with X coverage), aggregated over all bases
+ *
* - _interval_summary: total, mean, median, quartiles, and threshold proportions, aggregated per interval
+ *
* - _interval_statistics: 2x2 table of # of intervals covered to >= X depth in >=Y samples
+ *
* - _gene_summary: total, mean, median, quartiles, and threshold proportions, aggregated per gene
+ *
* - _gene_statistics: 2x2 table of # of genes covered to >= X depth in >= Y samples
+ *
* - _cumulative_coverage_counts: coverage histograms (# locus with >= X coverage), aggregated over all bases
+ *
* - _cumulative_coverage_proportions: proprotions of loci with >= X coverage, aggregated over all bases
*
- * Given variant ROD tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s).
- * Additionally, allows for a "snpmask" ROD to set overlapping bases to 'N'.
+ * Given variant tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s).
+ * Additionally, allows for one or more "snpmask" VCFs to set overlapping bases to 'N'.
+ * Note that if there are multiple variants at a site, it takes the first one seen.
+ * Reference bases for each interval will be output as a separate fasta sequence (named numerically in order).
*
*
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java
index 5f3b37cc8..7ae5c5c75 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java
@@ -42,6 +42,9 @@ import java.io.PrintStream;
*
*
* The output format can be partially controlled using the provided command-line arguments.
+ * Specify intervals with the usual -L argument to output only the reference bases within your intervals.
+ * Overlapping intervals are automatically merged; reference bases for each disjoint interval will be output as a
+ * separate fasta sequence (named numerically in order).
*
*
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java
index 500b11360..d88e55687 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java
@@ -30,7 +30,6 @@ import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.SampleUtils;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
index d5dbdedd6..428f97e2a 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
@@ -38,7 +38,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.baq.BAQ;
-import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@@ -127,7 +126,8 @@ public class UnifiedGenotyper extends LocusWalker
+ * Tumor and normal bam files (or single sample bam file(s) in --unpaired mode).
+ *
+ * Indel calls with associated metrics.
+ *
* A recalibration table file in CSV format that is used by the TableRecalibration walker.
+ * It is a comma-separated text file relating the desired covariates to the number of such bases and their rate of mismatch in the genome, and its implied empirical quality score.
+ *
+ * The first 20 lines of such a file is shown below.
+ * * The file begins with a series of comment lines describing:
+ * ** The number of counted loci
+ * ** The number of counted bases
+ * ** The number of skipped loci and the fraction skipped, due to presence in dbSNP or bad reference bases
+ *
+ * * After the comments appears a header line indicating which covariates were used as well as the ordering of elements in the subsequent records.
+ *
+ * * After the header, data records occur one per line until the end of the file. The first several items on a line are the values of the individual covariates and will change
+ * depending on which covariates were specified at runtime. The last three items are the data- that is, number of observations for this combination of covariates, number of
+ * reference mismatches, and the raw empirical quality score calculated by phred-scaling the mismatch rate.
+ *
+ * Output
* Input
* Input
* Input
+ * Output
+ * Examples
+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ * -R ref.fasta \
+ * -T SomaticIndelDetector \
+ * -o indels.vcf \
+ * -verbose indels.txt
+ * -I:normal normal.bam \
+ * -I:tumor tumor.bam
+ *
*
*/
+
@ReadFilters({Platform454Filter.class, MappingQualityZeroFilter.class, PlatformUnitFilter.class})
public class SomaticIndelDetectorWalker extends ReadWalkerOutput
*
+ * # Counted Sites 19451059
+ * # Counted Bases 56582018
+ * # Skipped Sites 82666
+ * # Fraction Skipped 1 / 235 bp
+ * ReadGroup,QualityScore,Cycle,Dinuc,nObservations,nMismatches,Qempirical
+ * SRR006446,11,65,CA,9,1,10
+ * SRR006446,11,48,TA,10,0,40
+ * SRR006446,11,67,AA,27,0,40
+ * SRR006446,11,61,GA,11,1,10
+ * SRR006446,12,34,CA,47,1,17
+ * SRR006446,12,30,GA,52,1,17
+ * SRR006446,12,36,AA,352,1,25
+ * SRR006446,12,17,TA,182,11,12
+ * SRR006446,11,48,TG,2,0,40
+ * SRR006446,11,67,AG,1,0,40
+ * SRR006446,12,34,CG,9,0,40
+ * SRR006446,12,30,GG,43,0,40
+ * ERR001876,4,31,AG,1,0,40
+ * ERR001876,4,31,AT,2,2,1
+ * ERR001876,4,31,CA,1,0,40
+ *
* Examples
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java
index 01e8cd321..48cba6a1a 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java
@@ -61,7 +61,7 @@ import java.util.List;
* CACGTTCGGcttgtgcagagcctcaaggtcatccagaggtgatAGTTTAGGGCCCTCTCAAGTCTTTCCNGTGCGCATGG[GT/AC*]CAGCCCTGGGCACCTGTNNNNNNNNNNNNNTGCTCATGGCCTTCTAGATTCCCAGGAAATGTCAGAGCTTTTCAAAGCCC
*
* are amplicon sequences resulting from running the tool. The flags (preceding the sequence itself) can be:
- *
+ *
* Valid // amplicon is valid
* SITE_IS_FILTERED=1 // validation site is not marked 'PASS' or '.' in its filter field ("you are trying to validate a filtered variant")
* VARIANT_TOO_NEAR_PROBE=1 // there is a variant too near to the variant to be validated, potentially shifting the mass-spec peak
@@ -72,10 +72,10 @@ import java.util.List;
* END_TOO_CLOSE, // variant is too close to the end of the amplicon region to give sequenom a good chance to find a suitable primer
* NO_VARIANTS_FOUND, // no variants found within the amplicon region
* INDEL_OVERLAPS_VALIDATION_SITE, // an insertion or deletion interferes directly with the site to be validated (i.e. insertion directly preceding or postceding, or a deletion that spans the site itself)
- *
* java * -jar GenomeAnalysisTK.jar * -T ValidationAmplicons diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 266b97af0..28f4f2a56 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -56,6 +56,22 @@ import java.util.*; *Output
** Evaluation tables detailing the results of the eval modules which were applied. + * For example: + *
+ * output.eval.gatkreport: + * ##:GATKReport.v0.1 CountVariants : Counts different classes of variants in the sample + * CountVariants CompRod CpG EvalRod JexlExpression Novelty nProcessedLoci nCalledLoci nRefLoci nVariantLoci variantRate ... + * CountVariants dbsnp CpG eval none all 65900028 135770 0 135770 0.00206024 ... + * CountVariants dbsnp CpG eval none known 65900028 47068 0 47068 0.00071423 ... + * CountVariants dbsnp CpG eval none novel 65900028 88702 0 88702 0.00134601 ... + * CountVariants dbsnp all eval none all 65900028 330818 0 330818 0.00502000 ... + * CountVariants dbsnp all eval none known 65900028 120685 0 120685 0.00183133 ... + * CountVariants dbsnp all eval none novel 65900028 210133 0 210133 0.00318866 ... + * CountVariants dbsnp non_CpG eval none all 65900028 195048 0 195048 0.00295976 ... + * CountVariants dbsnp non_CpG eval none known 65900028 73617 0 73617 0.00111710 ... + * CountVariants dbsnp non_CpG eval none novel 65900028 121431 0 121431 0.00184265 ... + * ... + ** * *Examples
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java index 193a65591..88ffcaaeb 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.SnpEff; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.ArrayList; @@ -11,12 +12,19 @@ import java.util.List; * Stratifies by nonsense, missense, silent, and all annotations in the input ROD, from the INFO field annotation. */ public class FunctionalClass extends VariantStratifier { + + public enum FunctionalType { + silent, + missense, + nonsense + } + + @Override public void initialize() { states.add("all"); - states.add("silent"); - states.add("missense"); - states.add("nonsense"); + for ( FunctionalType type : FunctionalType.values() ) + states.add(type.name()); } @@ -26,10 +34,12 @@ public class FunctionalClass extends VariantStratifier { relevantStates.add("all"); if (eval != null && eval.isVariant()) { - String type = null; + FunctionalType type = null; if (eval.hasAttribute("refseq.functionalClass")) { - type = eval.getAttributeAsString("refseq.functionalClass"); + try { + type = FunctionalType.valueOf(eval.getAttributeAsString("refseq.functionalClass")); + } catch ( Exception e ) {} // don't error out if the type isn't supported } else if (eval.hasAttribute("refseq.functionalClass_1")) { int annotationId = 1; String key; @@ -37,24 +47,33 @@ public class FunctionalClass extends VariantStratifier { do { key = String.format("refseq.functionalClass_%d", annotationId); - String newtype = eval.getAttributeAsString(key); - - if ( newtype != null && !newtype.equalsIgnoreCase("null") && - ( type == null || - ( type.equals("silent") && !newtype.equals("silent") ) || - ( type.equals("missense") && newtype.equals("nonsense") ) ) - ) { - type = newtype; + String newtypeStr = eval.getAttributeAsString(key); + if ( newtypeStr != null && !newtypeStr.equalsIgnoreCase("null") ) { + try { + FunctionalType newType = FunctionalType.valueOf(newtypeStr); + if ( type == null || + ( type == FunctionalType.silent && newType != FunctionalType.silent ) || + ( type == FunctionalType.missense && newType == FunctionalType.nonsense ) ) { + type = newType; + } + } catch ( Exception e ) {} // don't error out if the type isn't supported } annotationId++; } while (eval.hasAttribute(key)); + + } else if ( eval.hasAttribute(SnpEff.InfoFieldKey.EFFECT_KEY.getKeyName() ) ) { + SnpEff.EffectType snpEffType = SnpEff.EffectType.valueOf(eval.getAttribute(SnpEff.InfoFieldKey.EFFECT_KEY.getKeyName()).toString()); + if ( snpEffType == SnpEff.EffectType.STOP_GAINED ) + type = FunctionalType.nonsense; + else if ( snpEffType == SnpEff.EffectType.NON_SYNONYMOUS_CODING ) + type = FunctionalType.missense; + else if ( snpEffType == SnpEff.EffectType.SYNONYMOUS_CODING ) + type = FunctionalType.silent; } - if (type != null) { - if (type.equals("silent")) { relevantStates.add("silent"); } - else if (type.equals("missense")) { relevantStates.add("missense"); } - else if (type.equals("nonsense")) { relevantStates.add("nonsense"); } + if ( type != null ) { + relevantStates.add(type.name()); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 7062f17e5..3e3b29a7f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -234,16 +234,47 @@ public class CombineVariants extends RodWalker{ if (minimumN > 1 && (vcs.size() - numFilteredRecords < minimumN)) return 0; - List mergedVCs = new ArrayList (); + List preMergedVCs = new ArrayList (); Map > VCsByType = VariantContextUtils.separateVariantContextsByType(vcs); // iterate over the types so that it's deterministic for ( VariantContext.Type type : VariantContext.Type.values() ) { if ( VCsByType.containsKey(type) ) - mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type), + preMergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type), priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges, SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC)); } + List mergedVCs = new ArrayList (); + // se have records merged but separated by type. If a particular record is for example a snp but all alleles are a subset of an existing mixed record, + // we will still merge those records. + if (preMergedVCs.size() > 1) { + for (VariantContext vc1 : preMergedVCs) { + VariantContext newvc = vc1; + boolean merged = false; + for (int k=0; k < mergedVCs.size(); k++) { + VariantContext vc2 = mergedVCs.get(k); + + if (VariantContextUtils.allelesAreSubset(vc1,vc2) || VariantContextUtils.allelesAreSubset(vc2,vc1)) { + // all alleles of vc1 are contained in vc2 but they are of different type (say, vc1 is snp, vc2 is complex): try to merget v1 into v2 + List vcpair = new ArrayList (); + vcpair.add(vc1); + vcpair.add(vc2); + newvc = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), vcpair, + priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges, + SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC); + mergedVCs.set(k,newvc); + merged = true; + break; + } + } + if (!merged) + mergedVCs.add(vc1); + } + } + else { + mergedVCs = preMergedVCs; + } + for ( VariantContext mergedVC : mergedVCs ) { // only operate at the start of events if ( mergedVC == null ) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java index b98646270..8eaf976d0 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java @@ -41,7 +41,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; import java.util.*; /** - * Annotates a validation (from e.g. Sequenom) VCF with QC metrics (HW-equilibrium, % failed probes) + * Annotates a validation (from Sequenom for example) VCF with QC metrics (HW-equilibrium, % failed probes) * * * The Variant Validation Assessor is a tool for vetting/assessing validation data (containing genotypes). @@ -57,7 +57,16 @@ import java.util.*; * *
Output
*- * An annotated VCF. + * An annotated VCF. Additionally, a table like the following will be output: + *
+ * Total number of samples assayed: 185 + * Total number of records processed: 152 + * Number of Hardy-Weinberg violations: 34 (22%) + * Number of no-call violations: 12 (7%) + * Number of homozygous variant violations: 0 (0%) + * Number of records passing all filters: 106 (69%) + * Number of passing records that are polymorphic: 98 (92%) + ** * *Examples
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java deleted file mode 100644 index 7f3d9e17d..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java +++ /dev/null @@ -1,282 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.codecs.snpEff; - -import org.broad.tribble.Feature; -import org.broad.tribble.FeatureCodec; -import org.broad.tribble.TribbleException; -import org.broad.tribble.readers.AsciiLineReader; -import org.broad.tribble.readers.LineReader; -import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec; - -import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType; -import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType; -import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity; - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; - -/** - * Codec for decoding the output format of the SnpEff variant effect predictor tool - * - *- * This format has 23 tab-delimited fields: - * - *
- * Chromosome - * Position - * Reference - * Change - * Change Type: {SNP, MNP, INS, DEL} - * Zygosity: {Hom, Het} - * Quality - * Coverage - * Warnings - * Gene ID - * Gene Name - * Bio Type - * Transcript ID - * Exon ID - * Exon Rank - * Effect - * Old/New Amino Acid - * Old/New Codon - * Codon Num - * CDS Size - * Codons Around - * Amino Acids Around - * Custom Interval ID - *- * Note that we treat all except the Chromosome, Position, and Effect fields as optional. - * - * - *- * See also: @see SNPEff project page - *
- * - * @author David Roazen - * @since 2011 - */ -public class SnpEffCodec implements FeatureCodec, SelfScopingFeatureCodec { - - public static final int EXPECTED_NUMBER_OF_FIELDS = 23; - public static final String FIELD_DELIMITER_PATTERN = "\\t"; - public static final String EFFECT_FIELD_DELIMITER_PATTERN = "[,:]"; - public static final String HEADER_LINE_START = "# "; - public static final String[] HEADER_FIELD_NAMES = { "Chromo", - "Position", - "Reference", - "Change", - "Change type", - "Homozygous", - "Quality", - "Coverage", - "Warnings", - "Gene_ID", - "Gene_name", - "Bio_type", - "Trancript_ID", // yes, this is how it's spelled in the SnpEff output - "Exon_ID", - "Exon_Rank", - "Effect", - "old_AA/new_AA", - "Old_codon/New_codon", - "Codon_Num(CDS)", - "CDS_size", - "Codons around", - "AAs around", - "Custom_interval_ID" - }; - - // The "Chromo", "Position", and "Effect" fields are required to be non-empty in every SnpEff output line: - public static final int[] REQUIRED_FIELDS = { 0, 1, 15 }; - - public static final String NON_CODING_GENE_FLAG = "WITHIN_NON_CODING_GENE"; - - - public Feature decodeLoc ( String line ) { - return decode(line); - } - - public Feature decode ( String line ) { - String[] tokens = line.split(FIELD_DELIMITER_PATTERN, -1); - - if ( tokens.length != EXPECTED_NUMBER_OF_FIELDS ) { - throw new TribbleException.InvalidDecodeLine("Line does not have the expected (" + EXPECTED_NUMBER_OF_FIELDS + - ") number of fields: found " + tokens.length + " fields.", line); - } - - try { - trimAllFields(tokens); - checkForRequiredFields(tokens, line); - - String contig = tokens[0]; - long position = Long.parseLong(tokens[1]); - - String reference = tokens[2].isEmpty() ? null : tokens[2]; - String change = tokens[3].isEmpty() ? null : tokens[3]; - ChangeType changeType = tokens[4].isEmpty() ? null : ChangeType.valueOf(tokens[4]); - Zygosity zygosity = tokens[5].isEmpty() ? null : Zygosity.valueOf(tokens[5]); - Double quality = tokens[6].isEmpty() ? null : Double.parseDouble(tokens[6]); - Long coverage = tokens[7].isEmpty() ? null : Long.parseLong(tokens[7]); - String warnings = tokens[8].isEmpty() ? null : tokens[8]; - String geneID = tokens[9].isEmpty() ? null : tokens[9]; - String geneName = tokens[10].isEmpty() ? null : tokens[10]; - String bioType = tokens[11].isEmpty() ? null : tokens[11]; - String transcriptID = tokens[12].isEmpty() ? null : tokens[12]; - String exonID = tokens[13].isEmpty() ? null : tokens[13]; - Integer exonRank = tokens[14].isEmpty() ? null : Integer.parseInt(tokens[14]); - - boolean isNonCodingGene = isNonCodingGene(tokens[15]); - - // Split the effect field into three subfields if the WITHIN_NON_CODING_GENE flag is present, - // otherwise split it into two subfields. We need this limit to prevent the extra effect-related information - // in the final field (when present) from being inappropriately tokenized: - - int effectFieldTokenLimit = isNonCodingGene ? 3 : 2; - String[] effectFieldTokens = tokens[15].split(EFFECT_FIELD_DELIMITER_PATTERN, effectFieldTokenLimit); - EffectType effect = parseEffect(effectFieldTokens, isNonCodingGene); - String effectExtraInformation = parseEffectExtraInformation(effectFieldTokens, isNonCodingGene); - - String oldAndNewAA = tokens[16].isEmpty() ? null : tokens[16]; - String oldAndNewCodon = tokens[17].isEmpty() ? null : tokens[17]; - Integer codonNum = tokens[18].isEmpty() ? null : Integer.parseInt(tokens[18]); - Integer cdsSize = tokens[19].isEmpty() ? null : Integer.parseInt(tokens[19]); - String codonsAround = tokens[20].isEmpty() ? null : tokens[20]; - String aasAround = tokens[21].isEmpty() ? null : tokens[21]; - String customIntervalID = tokens[22].isEmpty() ? null : tokens[22]; - - return new SnpEffFeature(contig, position, reference, change, changeType, zygosity, quality, coverage, - warnings, geneID, geneName, bioType, transcriptID, exonID, exonRank, isNonCodingGene, - effect, effectExtraInformation, oldAndNewAA, oldAndNewCodon, codonNum, cdsSize, - codonsAround, aasAround, customIntervalID); - } - catch ( NumberFormatException e ) { - throw new TribbleException.InvalidDecodeLine("Error parsing a numeric field : " + e.getMessage(), line); - } - catch ( IllegalArgumentException e ) { - throw new TribbleException.InvalidDecodeLine("Illegal value in field: " + e.getMessage(), line); - } - } - - private void trimAllFields ( String[] tokens ) { - for ( int i = 0; i < tokens.length; i++ ) { - tokens[i] = tokens[i].trim(); - } - } - - private void checkForRequiredFields ( String[] tokens, String line ) { - for ( int requiredFieldIndex : REQUIRED_FIELDS ) { - if ( tokens[requiredFieldIndex].isEmpty() ) { - throw new TribbleException.InvalidDecodeLine("Line is missing required field \"" + - HEADER_FIELD_NAMES[requiredFieldIndex] + "\"", - line); - } - } - } - - private boolean isNonCodingGene ( String effectField ) { - return effectField.startsWith(NON_CODING_GENE_FLAG); - } - - private EffectType parseEffect ( String[] effectFieldTokens, boolean isNonCodingGene ) { - String effectName = ""; - - // If there's a WITHIN_NON_CODING_GENE flag, the effect name will be in the second subfield, - // otherwise it will be in the first subfield: - - if ( effectFieldTokens.length > 1 && isNonCodingGene ) { - effectName = effectFieldTokens[1].trim(); - } - else { - effectName = effectFieldTokens[0].trim(); - } - - return EffectType.valueOf(effectName); - } - - private String parseEffectExtraInformation ( String[] effectFieldTokens, boolean isNonCodingGene ) { - - // The extra effect-related information, if present, will always be the last subfield: - - if ( (effectFieldTokens.length == 2 && ! isNonCodingGene) || effectFieldTokens.length == 3 ) { - return effectFieldTokens[effectFieldTokens.length - 1].trim(); - } - - return null; - } - - public ClassgetFeatureType() { - return SnpEffFeature.class; - } - - public Object readHeader ( LineReader reader ) { - String headerLine = ""; - - try { - headerLine = reader.readLine(); - } - catch ( IOException e ) { - throw new TribbleException("Unable to read header line from input file."); - } - - validateHeaderLine(headerLine); - return headerLine; - } - - private void validateHeaderLine ( String headerLine ) { - if ( headerLine == null || ! headerLine.startsWith(HEADER_LINE_START) ) { - throw new TribbleException.InvalidHeader("Header line does not start with " + HEADER_LINE_START); - } - - String[] headerTokens = headerLine.substring(HEADER_LINE_START.length()).split(FIELD_DELIMITER_PATTERN); - - if ( headerTokens.length != EXPECTED_NUMBER_OF_FIELDS ) { - throw new TribbleException.InvalidHeader("Header line does not contain headings for the expected number (" + - EXPECTED_NUMBER_OF_FIELDS + ") of columns."); - } - - for ( int columnIndex = 0; columnIndex < headerTokens.length; columnIndex++ ) { - if ( ! HEADER_FIELD_NAMES[columnIndex].equals(headerTokens[columnIndex]) ) { - throw new TribbleException.InvalidHeader("Header field #" + columnIndex + ": Expected \"" + - HEADER_FIELD_NAMES[columnIndex] + "\" but found \"" + - headerTokens[columnIndex] + "\""); - } - } - } - - public boolean canDecode ( final File potentialInput ) { - try { - LineReader reader = new AsciiLineReader(new FileInputStream(potentialInput)); - readHeader(reader); - } - catch ( Exception e ) { - return false; - } - - return true; - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffConstants.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffConstants.java deleted file mode 100644 index 270db470f..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffConstants.java +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.codecs.snpEff; - -/** - * A set of constants associated with the SnpEff codec. - * - * @author David Roazen - */ -public class SnpEffConstants { - - // Possible SnpEff biological effects and their associated impacts: - public enum EffectType { - START_GAINED (EffectImpact.HIGH), - START_LOST (EffectImpact.HIGH), - EXON_DELETED (EffectImpact.HIGH), - FRAME_SHIFT (EffectImpact.HIGH), - STOP_GAINED (EffectImpact.HIGH), - STOP_LOST (EffectImpact.HIGH), - SPLICE_SITE_ACCEPTOR (EffectImpact.HIGH), - SPLICE_SITE_DONOR (EffectImpact.HIGH), - - NON_SYNONYMOUS_CODING (EffectImpact.MODERATE), - UTR_5_DELETED (EffectImpact.MODERATE), - UTR_3_DELETED (EffectImpact.MODERATE), - CODON_INSERTION (EffectImpact.MODERATE), - CODON_CHANGE_PLUS_CODON_INSERTION (EffectImpact.MODERATE), - CODON_DELETION (EffectImpact.MODERATE), - CODON_CHANGE_PLUS_CODON_DELETION (EffectImpact.MODERATE), - - NONE (EffectImpact.LOW), - CHROMOSOME (EffectImpact.LOW), - INTERGENIC (EffectImpact.LOW), - UPSTREAM (EffectImpact.LOW), - UTR_5_PRIME (EffectImpact.LOW), - SYNONYMOUS_START (EffectImpact.LOW), - NON_SYNONYMOUS_START (EffectImpact.LOW), - CDS (EffectImpact.LOW), - GENE (EffectImpact.LOW), - TRANSCRIPT (EffectImpact.LOW), - EXON (EffectImpact.LOW), - SYNONYMOUS_CODING (EffectImpact.LOW), - CODON_CHANGE (EffectImpact.LOW), - SYNONYMOUS_STOP (EffectImpact.LOW), - NON_SYNONYMOUS_STOP (EffectImpact.LOW), - INTRON (EffectImpact.LOW), - UTR_3_PRIME (EffectImpact.LOW), - DOWNSTREAM (EffectImpact.LOW), - INTRON_CONSERVED (EffectImpact.LOW), - INTERGENIC_CONSERVED (EffectImpact.LOW), - CUSTOM (EffectImpact.LOW); - - private final EffectImpact impact; - - EffectType ( EffectImpact impact ) { - this.impact = impact; - } - - public EffectImpact getImpact() { - return impact; - } - } - - public enum EffectImpact { - LOW (1), - MODERATE (2), - HIGH (3); - - private final int severityRating; - - EffectImpact ( int severityRating ) { - this.severityRating = severityRating; - } - - public boolean isHigherImpactThan ( EffectImpact other ) { - return this.severityRating > other.severityRating; - } - } - - // The kinds of variants supported by the SnpEff output format: - public enum ChangeType { - SNP, - MNP, - INS, - DEL - } - - // Possible zygosities of SnpEff variants: - public enum Zygosity { - Hom, - Het - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java deleted file mode 100644 index 2f120b7d2..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java +++ /dev/null @@ -1,423 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.codecs.snpEff; - -import org.broad.tribble.Feature; - -import java.util.NoSuchElementException; - -import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType; -import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectImpact; -import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType; -import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity; - -/** - * Feature returned by the SnpEff codec -- stores the parsed field values from a line of SnpEff output. - * - * Many fields are optional, and missing values are represented by nulls. You should always call the - * hasX() method before calling the corresponding getX() method. Required fields can never be null - * and do not have a hasX() method. - * - * @author David Roazen - */ -public class SnpEffFeature implements Feature { - - private String contig; // REQUIRED FIELD - private long position; // REQUIRED FIELD - private String reference; - private String change; - private ChangeType changeType; - private Zygosity zygosity; - private Double quality; - private Long coverage; - private String warnings; - private String geneID; - private String geneName; - private String bioType; - private String transcriptID; - private String exonID; - private Integer exonRank; - private boolean isNonCodingGene; // REQUIRED FIELD - private EffectType effect; // REQUIRED FIELD - private String effectExtraInformation; - private String oldAndNewAA; - private String oldAndNewCodon; - private Integer codonNum; - private Integer cdsSize; - private String codonsAround; - private String aasAround; - private String customIntervalID; - - public SnpEffFeature ( String contig, - long position, - String reference, - String change, - ChangeType changeType, - Zygosity zygosity, - Double quality, - Long coverage, - String warnings, - String geneID, - String geneName, - String bioType, - String transcriptID, - String exonID, - Integer exonRank, - boolean isNonCodingGene, - EffectType effect, - String effectExtraInformation, - String oldAndNewAA, - String oldAndNewCodon, - Integer codonNum, - Integer cdsSize, - String codonsAround, - String aasAround, - String customIntervalID ) { - - if ( contig == null || effect == null ) { - throw new IllegalArgumentException("contig and effect cannot be null, as they are required fields"); - } - - this.contig = contig; - this.position = position; - this.reference = reference; - this.change = change; - this.changeType = changeType; - this.zygosity = zygosity; - this.quality = quality; - this.coverage = coverage; - this.warnings = warnings; - this.geneID = geneID; - this.geneName = geneName; - this.bioType = bioType; - this.transcriptID = transcriptID; - this.exonID = exonID; - this.exonRank = exonRank; - this.isNonCodingGene = isNonCodingGene; - this.effect = effect; - this.effectExtraInformation = effectExtraInformation; - this.oldAndNewAA = oldAndNewAA; - this.oldAndNewCodon = oldAndNewCodon; - this.codonNum = codonNum; - this.cdsSize = cdsSize; - this.codonsAround = codonsAround; - this.aasAround = aasAround; - this.customIntervalID = customIntervalID; - } - - public boolean isHigherImpactThan ( SnpEffFeature other ) { - - // If one effect is in a non-coding gene and the other is not, the effect NOT in the - // non-coding gene has higher impact: - - if ( ! isNonCodingGene() && other.isNonCodingGene() ) { - return true; - } - else if ( isNonCodingGene() && ! other.isNonCodingGene() ) { - return false; - } - - // Otherwise, both effects are either in or not in a non-coding gene, so we compare the impacts - // of the effects themselves as defined in the SnpEffConstants class: - - return getEffectImpact().isHigherImpactThan(other.getEffectImpact()); - } - - public String getChr() { - return contig; - } - - public int getStart() { - return (int)position; - } - - public int getEnd() { - return (int)position; - } - - public boolean hasReference() { - return reference != null; - } - - public String getReference() { - if ( reference == null ) throw new NoSuchElementException("This feature has no reference field"); - return reference; - } - - public boolean hasChange() { - return change != null; - } - - public String getChange() { - if ( change == null ) throw new NoSuchElementException("This feature has no change field"); - return change; - } - - public boolean hasChangeType() { - return changeType != null; - } - - public ChangeType getChangeType() { - if ( changeType == null ) throw new NoSuchElementException("This feature has no changeType field"); - return changeType; - } - - public boolean hasZygosity() { - return zygosity != null; - } - - public Zygosity getZygosity() { - if ( zygosity == null ) throw new NoSuchElementException("This feature has no zygosity field"); - return zygosity; - } - - public boolean hasQuality() { - return quality != null; - } - - public Double getQuality() { - if ( quality == null ) throw new NoSuchElementException("This feature has no quality field"); - return quality; - } - - public boolean hasCoverage() { - return coverage != null; - } - - public Long getCoverage() { - if ( coverage == null ) throw new NoSuchElementException("This feature has no coverage field"); - return coverage; - } - - public boolean hasWarnings() { - return warnings != null; - } - - public String getWarnings() { - if ( warnings == null ) throw new NoSuchElementException("This feature has no warnings field"); - return warnings; - } - - public boolean hasGeneID() { - return geneID != null; - } - - public String getGeneID() { - if ( geneID == null ) throw new NoSuchElementException("This feature has no geneID field"); - return geneID; - } - - public boolean hasGeneName() { - return geneName != null; - } - - public String getGeneName() { - if ( geneName == null ) throw new NoSuchElementException("This feature has no geneName field"); - return geneName; - } - - public boolean hasBioType() { - return bioType != null; - } - - public String getBioType() { - if ( bioType == null ) throw new NoSuchElementException("This feature has no bioType field"); - return bioType; - } - - public boolean hasTranscriptID() { - return transcriptID != null; - } - - public String getTranscriptID() { - if ( transcriptID == null ) throw new NoSuchElementException("This feature has no transcriptID field"); - return transcriptID; - } - - public boolean hasExonID() { - return exonID != null; - } - - public String getExonID() { - if ( exonID == null ) throw new NoSuchElementException("This feature has no exonID field"); - return exonID; - } - - public boolean hasExonRank() { - return exonRank != null; - } - - public Integer getExonRank() { - if ( exonRank == null ) throw new NoSuchElementException("This feature has no exonRank field"); - return exonRank; - } - - public boolean isNonCodingGene() { - return isNonCodingGene; - } - - public EffectType getEffect() { - return effect; - } - - public EffectImpact getEffectImpact() { - return effect.getImpact(); - } - - public boolean hasEffectExtraInformation() { - return effectExtraInformation != null; - } - - public String getEffectExtraInformation() { - if ( effectExtraInformation == null ) throw new NoSuchElementException("This feature has no effectExtraInformation field"); - return effectExtraInformation; - } - - public boolean hasOldAndNewAA() { - return oldAndNewAA != null; - } - - public String getOldAndNewAA() { - if ( oldAndNewAA == null ) throw new NoSuchElementException("This feature has no oldAndNewAA field"); - return oldAndNewAA; - } - - public boolean hasOldAndNewCodon() { - return oldAndNewCodon != null; - } - - public String getOldAndNewCodon() { - if ( oldAndNewCodon == null ) throw new NoSuchElementException("This feature has no oldAndNewCodon field"); - return oldAndNewCodon; - } - - public boolean hasCodonNum() { - return codonNum != null; - } - - public Integer getCodonNum() { - if ( codonNum == null ) throw new NoSuchElementException("This feature has no codonNum field"); - return codonNum; - } - - public boolean hasCdsSize() { - return cdsSize != null; - } - - public Integer getCdsSize() { - if ( cdsSize == null ) throw new NoSuchElementException("This feature has no cdsSize field"); - return cdsSize; - } - - public boolean hasCodonsAround() { - return codonsAround != null; - } - - public String getCodonsAround() { - if ( codonsAround == null ) throw new NoSuchElementException("This feature has no codonsAround field"); - return codonsAround; - } - - public boolean hadAasAround() { - return aasAround != null; - } - - public String getAasAround() { - if ( aasAround == null ) throw new NoSuchElementException("This feature has no aasAround field"); - return aasAround; - } - - public boolean hasCustomIntervalID() { - return customIntervalID != null; - } - - public String getCustomIntervalID() { - if ( customIntervalID == null ) throw new NoSuchElementException("This feature has no customIntervalID field"); - return customIntervalID; - } - - public boolean equals ( Object o ) { - if ( o == null || ! (o instanceof SnpEffFeature) ) { - return false; - } - - SnpEffFeature other = (SnpEffFeature)o; - - return contig.equals(other.contig) && - position == other.position && - (reference == null ? other.reference == null : reference.equals(other.reference)) && - (change == null ? other.change == null : change.equals(other.change)) && - changeType == other.changeType && - zygosity == other.zygosity && - (quality == null ? other.quality == null : quality.equals(other.quality)) && - (coverage == null ? other.coverage == null : coverage.equals(other.coverage)) && - (warnings == null ? other.warnings == null : warnings.equals(other.warnings)) && - (geneID == null ? other.geneID == null : geneID.equals(other.geneID)) && - (geneName == null ? other.geneName == null : geneName.equals(other.geneName)) && - (bioType == null ? other.bioType == null : bioType.equals(other.bioType)) && - (transcriptID == null ? other.transcriptID == null : transcriptID.equals(other.transcriptID)) && - (exonID == null ? other.exonID == null : exonID.equals(other.exonID)) && - (exonRank == null ? other.exonRank == null : exonRank.equals(other.exonRank)) && - isNonCodingGene == other.isNonCodingGene && - effect == other.effect && - (effectExtraInformation == null ? other.effectExtraInformation == null : effectExtraInformation.equals(other.effectExtraInformation)) && - (oldAndNewAA == null ? other.oldAndNewAA == null : oldAndNewAA.equals(other.oldAndNewAA)) && - (oldAndNewCodon == null ? other.oldAndNewCodon == null : oldAndNewCodon.equals(other.oldAndNewCodon)) && - (codonNum == null ? other.codonNum == null : codonNum.equals(other.codonNum)) && - (cdsSize == null ? other.cdsSize == null : cdsSize.equals(other.cdsSize)) && - (codonsAround == null ? other.codonsAround == null : codonsAround.equals(other.codonsAround)) && - (aasAround == null ? other.aasAround == null : aasAround.equals(other.aasAround)) && - (customIntervalID == null ? other.customIntervalID == null : customIntervalID.equals(other.customIntervalID)); - } - - public String toString() { - return "[Contig: " + contig + - " Position: " + position + - " Reference: " + reference + - " Change: " + change + - " Change Type: " + changeType + - " Zygosity: " + zygosity + - " Quality: " + quality + - " Coverage: " + coverage + - " Warnings: " + warnings + - " Gene ID: " + geneID + - " Gene Name: " + geneName + - " Bio Type: " + bioType + - " Transcript ID: " + transcriptID + - " Exon ID: " + exonID + - " Exon Rank: " + exonRank + - " Non-Coding Gene: " + isNonCodingGene + - " Effect: " + effect + - " Effect Extra Information: " + effectExtraInformation + - " Old/New AA: " + oldAndNewAA + - " Old/New Codon: " + oldAndNewCodon + - " Codon Num: " + codonNum + - " CDS Size: " + cdsSize + - " Codons Around: " + codonsAround + - " AAs Around: " + aasAround + - " Custom Interval ID: " + customIntervalID + - "]"; - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java index eb01e5dca..fd1c74993 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java @@ -24,6 +24,7 @@ public class VCFHeader { private final Set mMetaData; private final Map mInfoMetaData = new HashMap (); private final Map mFormatMetaData = new HashMap (); + private final Map mOtherMetaData = new HashMap (); // the list of auxillary tags private final Set mGenotypeSampleNames = new LinkedHashSet (); @@ -110,6 +111,9 @@ public class VCFHeader { VCFFormatHeaderLine formatLine = (VCFFormatHeaderLine)line; mFormatMetaData.put(formatLine.getName(), formatLine); } + else { + mOtherMetaData.put(line.getKey(), line); + } } } @@ -185,6 +189,14 @@ public class VCFHeader { public VCFFormatHeaderLine getFormatHeaderLine(String key) { return mFormatMetaData.get(key); } + + /** + * @param key the header key name + * @return the meta data line, or null if there is none + */ + public VCFHeaderLine getOtherHeaderLine(String key) { + return mOtherMetaData.get(key); + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 1c65102ae..cfd59b504 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -817,6 +817,28 @@ public class VariantContext implements Feature { // to enable tribble intergrati throw new IllegalArgumentException("Requested " + i + " alternative allele but there are only " + n + " alternative alleles " + this); } + /** + * @param other VariantContext whose alternate alleles to compare against + * @return true if this VariantContext has the same alternate alleles as other, + * regardless of ordering. Otherwise returns false. + */ + public boolean hasSameAlternateAllelesAs ( VariantContext other ) { + Set thisAlternateAlleles = getAlternateAlleles(); + Set otherAlternateAlleles = other.getAlternateAlleles(); + + if ( thisAlternateAlleles.size() != otherAlternateAlleles.size() ) { + return false; + } + + for ( Allele allele : thisAlternateAlleles ) { + if ( ! otherAlternateAlleles.contains(allele) ) { + return false; + } + } + + return true; + } + // --------------------------------------------------------------------------------------------------------- // // Working with genotypes diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 986d6305c..506bb3b33 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -663,6 +663,18 @@ public class VariantContextUtils { return merged; } + public static boolean allelesAreSubset(VariantContext vc1, VariantContext vc2) { + // if all alleles of vc1 are a contained in alleles of vc2, return true + if (!vc1.getReference().equals(vc2.getReference())) + return false; + + for (Allele a :vc1.getAlternateAlleles()) { + if (!vc2.getAlternateAlleles().contains(a)) + return false; + } + + return true; + } public static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) { // see if we need to trim common reference base from all alleles boolean trimVC; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SnpEffUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SnpEffUnitTest.java new file mode 100644 index 000000000..462abeba1 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SnpEffUnitTest.java @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.testng.Assert; +import org.testng.annotations.Test; +import org.broadinstitute.sting.gatk.walkers.annotator.SnpEff.SnpEffEffect; + +public class SnpEffUnitTest { + + @Test + public void testParseWellFormedEffect() { + String effectName = "NON_SYNONYMOUS_CODING"; + String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" }; + + SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata); + Assert.assertTrue( effect.isWellFormed() && effect.isCoding() ); + } + + @Test + public void testParseInvalidEffectNameEffect() { + String effectName = "MADE_UP_EFFECT"; + String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" }; + + SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata); + Assert.assertFalse(effect.isWellFormed()); + } + + @Test + public void testParseInvalidEffectImpactEffect() { + String effectName = "NON_SYNONYMOUS_CODING"; + String[] effectMetadata = { "MEDIUM", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" }; + + SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata); + Assert.assertFalse(effect.isWellFormed()); + } + + @Test + public void testParseWrongNumberOfMetadataFieldsEffect() { + String effectName = "NON_SYNONYMOUS_CODING"; + String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990" }; + + SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata); + Assert.assertFalse(effect.isWellFormed()); + } + + @Test + public void testParseSnpEffWarningEffect() { + String effectName = "NON_SYNONYMOUS_CODING"; + String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829", "SNPEFF_WARNING" }; + + SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata); + Assert.assertTrue( ! effect.isWellFormed() && effect.getParseError().equals("SnpEff issued the following warning: SNPEFF_WARNING") ); + } + + @Test + public void testParseSnpEffErrorEffect() { + String effectName = "NON_SYNONYMOUS_CODING"; + String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829", "", "SNPEFF_ERROR" }; + + SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata); + Assert.assertTrue( ! effect.isWellFormed() && effect.getParseError().equals("SnpEff issued the following error: SNPEFF_ERROR") ); + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 832079807..08baae7a7 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.testng.annotations.Test; import java.util.Arrays; @@ -129,12 +130,24 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testSnpEffAnnotations() { WalkerTestSpec spec = new WalkerTestSpec( - "-T VariantAnnotator -R " + b37KGReference + " -NO_HEADER -o %s -A SnpEff --variant " + - validationDataLocation + "1000G.exomes.vcf --snpEffFile " + validationDataLocation + - "snpEff_1.9.6_1000G.exomes.vcf_hg37.61.out -L 1:26,000,000-26,500,000", + "-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " + + validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation + + "snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000", 1, - Arrays.asList("03eae1dab19a9358250890594bf53607") + Arrays.asList("486fc6a5ca1819f5ab180d5d72b1ebc9") ); executeTest("Testing SnpEff annotations", spec); } + + @Test + public void testSnpEffAnnotationsUnsupportedVersion() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " + + validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation + + "snpEff.AFR.unfiltered.unsupported.version.vcf -L 1:1-1,500,000", + 1, + UserException.class + ); + executeTest("Testing SnpEff annotations (unsupported version)", spec); + } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java index 5f759fdbf..1a01ef8e8 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/beagle/BeagleIntegrationTest.java @@ -41,7 +41,7 @@ public class BeagleIntegrationTest extends WalkerTest { "--beagleR2:BEAGLE " + beagleValidationDataLocation + "inttestbgl.r2 " + "--beagleProbs:BEAGLE " + beagleValidationDataLocation + "inttestbgl.gprobs " + "--beaglePhased:BEAGLE " + beagleValidationDataLocation + "inttestbgl.phased " + - "-o %s -NO_HEADER", 1, Arrays.asList("3531451e84208264104040993889aaf4")); + "-o %s -NO_HEADER", 1, Arrays.asList("b445d280fd8fee1eeb4aacb3f5a54847")); executeTest("test BeagleOutputToVCF", spec); } @@ -72,7 +72,7 @@ public class BeagleIntegrationTest extends WalkerTest { "--beagleR2:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.r2 "+ "--beagleProbs:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.gprobs.bgl "+ "--beaglePhased:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.phased.bgl "+ - "-L 20:1-70000 -o %s -NO_HEADER ",1,Arrays.asList("8dd6ec53994fb46c5c22af8535d22965")); + "-L 20:1-70000 -o %s -NO_HEADER ",1,Arrays.asList("51a57ea565176edd96d907906914b0ee")); executeTest("testBeagleChangesSitesToRef",spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index e992684bc..b90e6d0ff 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -6,7 +6,7 @@ import org.testng.annotations.Test; import java.util.Arrays; public class VariantEvalIntegrationTest extends WalkerTest { - private static String variantEvalTestDataRoot = validationDataLocation + "/VariantEval"; + private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval"; private static String fundamentalTestVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf"; private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.vcf"; private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.HG00625.vcf"; @@ -14,6 +14,27 @@ public class VariantEvalIntegrationTest extends WalkerTest { private static String cmdRoot = "-T VariantEval" + " -R " + b36KGReference; + @Test + public void testFunctionClassWithSnpeff() { + WalkerTestSpec spec = new WalkerTestSpec( + buildCommandLine( + "-T VariantEval", + "-R " + b37KGReference, + "--dbsnp " + b37dbSNP132, + "--eval " + validationDataLocation + "snpEff.AFR.unfiltered.VariantAnnotator.output.vcf", + "-noEV", + "-EV TiTvVariantEvaluator", + "-noST", + "-ST FunctionalClass", + "-BTI eval", + "-o %s" + ), + 1, + Arrays.asList("f5f811ceb973d7fd6c1b2b734f1b2b12") + ); + executeTest("testFunctionClassWithSnpeff", spec); + } + @Test public void testStratifySamplesAndExcludeMonomorphicSites() { WalkerTestSpec spec = new WalkerTestSpec( diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java index 3267173a7..35495d797 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -90,7 +90,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { @Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f"); } // official project VCF files in tabix format @Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9"); } // official project VCF files in tabix format - @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "f1cf095c2fe9641b7ca1f8ee2c46fd4a"); } + @Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "312a22aedb088b678bc891f1a1b03c91"); } @Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e144b6283765494bfe8189ac59965083"); } @@ -110,7 +110,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest { " -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" + " -genotypeMergeOptions UNIQUIFY -L 1"), 1, - Arrays.asList("1de95f91ca15d2a8856de35dee0ce33e")); + Arrays.asList("35acb0f15f9cd18c653ede4e15e365c9")); executeTest("threeWayWithRefs", spec); } diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodecUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodecUnitTest.java deleted file mode 100644 index 6d492565b..000000000 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodecUnitTest.java +++ /dev/null @@ -1,259 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.codecs.snpEff; - -import org.apache.commons.io.input.ReaderInputStream; -import org.broad.tribble.TribbleException; -import org.broad.tribble.readers.AsciiLineReader; -import org.broad.tribble.readers.LineReader; -import org.testng.Assert; -import org.testng.annotations.Test; - -import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType; -import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType; -import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity; - -import java.io.StringReader; - -public class SnpEffCodecUnitTest { - - @Test - public void testParseWellFormedSnpEffHeaderLine() { - String wellFormedSnpEffHeaderLine = "# Chromo\tPosition\tReference\tChange\tChange type\t" + - "Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" + - "Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" + - "AAs around\tCustom_interval_ID"; - - SnpEffCodec codec = new SnpEffCodec(); - LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(wellFormedSnpEffHeaderLine))); - String headerReturned = (String)codec.readHeader(reader); - - Assert.assertEquals(headerReturned, wellFormedSnpEffHeaderLine); - } - - @Test(expectedExceptions = TribbleException.InvalidHeader.class) - public void testParseWrongNumberOfFieldsSnpEffHeaderLine() { - String wrongNumberOfFieldsSnpEffHeaderLine = "# Chromo\tPosition\tReference\tChange\tChange type\t" + - "Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" + - "Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" + - "AAs around"; - - SnpEffCodec codec = new SnpEffCodec(); - LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(wrongNumberOfFieldsSnpEffHeaderLine))); - codec.readHeader(reader); - } - - @Test(expectedExceptions = TribbleException.InvalidHeader.class) - public void testParseMisnamedColumnSnpEffHeaderLine() { - String misnamedColumnSnpEffHeaderLine = "# Chromo\tPosition\tRef\tChange\tChange type\t" + - "Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" + - "Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" + - "AAs around\tCustom_interval_ID"; - - SnpEffCodec codec = new SnpEffCodec(); - LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(misnamedColumnSnpEffHeaderLine))); - codec.readHeader(reader); - } - - @Test - public void testParseSimpleEffectSnpEffLine() { - String simpleEffectSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" + - "OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\t918\t\t\t"; - - SnpEffFeature expectedFeature = new SnpEffFeature("1", - 69428l, - "T", - "G", - ChangeType.SNP, - Zygosity.Hom, - 6049.69, - 61573l, - null, - "ENSG00000177693", - "OR4F5", - "mRNA", - "ENST00000326183", - "exon_1_69055_70108", - 1, - false, - EffectType.NON_SYNONYMOUS_CODING, - null, - "F/C", - "TTT/TGT", - 113, - 918, - null, - null, - null - ); - - SnpEffCodec codec = new SnpEffCodec(); - SnpEffFeature feature = (SnpEffFeature)codec.decode(simpleEffectSnpEffLine); - - Assert.assertEquals(feature, expectedFeature); - } - - @Test - public void testParseNonCodingRegionSnpEffLine() { - String nonCodingRegionSnpEffLine = "1\t1337592\tG\tC\tSNP\tHom\t1935.52\t21885\t\tENSG00000250188\t" + - "RP4-758J18.5\tmRNA\tENST00000514958\texon_1_1337454_1338076\t2\tWITHIN_NON_CODING_GENE, NON_SYNONYMOUS_CODING\t" + - "L/V\tCTA/GTA\t272\t952\t\t\t"; - - SnpEffFeature expectedFeature = new SnpEffFeature("1", - 1337592l, - "G", - "C", - ChangeType.SNP, - Zygosity.Hom, - 1935.52, - 21885l, - null, - "ENSG00000250188", - "RP4-758J18.5", - "mRNA", - "ENST00000514958", - "exon_1_1337454_1338076", - 2, - true, - EffectType.NON_SYNONYMOUS_CODING, - null, - "L/V", - "CTA/GTA", - 272, - 952, - null, - null, - null - ); - - SnpEffCodec codec = new SnpEffCodec(); - SnpEffFeature feature = (SnpEffFeature)codec.decode(nonCodingRegionSnpEffLine); - - Assert.assertEquals(feature, expectedFeature); - } - - @Test - public void testParseExtraEffectInformationSnpEffLine() { - String extraEffectInformationSnpEffLine = "1\t879537\tT\tC\tSNP\tHom\t341.58\t13733\t\tENSG00000187634\tSAMD11\t" + - "mRNA\tENST00000341065\t\t\tUTR_3_PRIME: 4 bases from transcript end\t\t\t\t\t\t\t"; - - SnpEffFeature expectedFeature = new SnpEffFeature("1", - 879537l, - "T", - "C", - ChangeType.SNP, - Zygosity.Hom, - 341.58, - 13733l, - null, - "ENSG00000187634", - "SAMD11", - "mRNA", - "ENST00000341065", - null, - null, - false, - EffectType.UTR_3_PRIME, - "4 bases from transcript end", - null, - null, - null, - null, - null, - null, - null - ); - - SnpEffCodec codec = new SnpEffCodec(); - SnpEffFeature feature = (SnpEffFeature)codec.decode(extraEffectInformationSnpEffLine); - - Assert.assertEquals(feature, expectedFeature); - } - - @Test - public void testParseMultiEffectSnpEffLine() { - String multiEffectSnpEffLine = "1\t901901\tC\tT\tSNP\tHom\t162.91\t4646\t\tENSG00000187583\tPLEKHN1\tmRNA\t" + - "ENST00000379410\texon_1_901877_901994\t1\tSTART_GAINED: ATG, UTR_5_PRIME: 11 bases from TSS\t\t\t\t\t\t\t"; - - SnpEffFeature expectedFeature = new SnpEffFeature("1", - 901901l, - "C", - "T", - ChangeType.SNP, - Zygosity.Hom, - 162.91, - 4646l, - null, - "ENSG00000187583", - "PLEKHN1", - "mRNA", - "ENST00000379410", - "exon_1_901877_901994", - 1, - false, - EffectType.START_GAINED, - "ATG, UTR_5_PRIME: 11 bases from TSS", - null, - null, - null, - null, - null, - null, - null - ); - - SnpEffCodec codec = new SnpEffCodec(); - SnpEffFeature feature = (SnpEffFeature)codec.decode(multiEffectSnpEffLine); - - Assert.assertEquals(feature, expectedFeature); - } - - @Test(expectedExceptions = TribbleException.InvalidDecodeLine.class) - public void testParseWrongNumberOfFieldsSnpEffLine() { - String wrongNumberOfFieldsSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" + - "OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\t918\t\t"; - - SnpEffCodec codec = new SnpEffCodec(); - SnpEffFeature feature = (SnpEffFeature)codec.decode(wrongNumberOfFieldsSnpEffLine); - } - - @Test(expectedExceptions = TribbleException.InvalidDecodeLine.class) - public void testParseBlankEffectFieldSnpEffLine() { - String blankEffectFieldSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" + - "OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\t\tF/C\tTTT/TGT\t113\t918\t\t\t"; - - SnpEffCodec codec = new SnpEffCodec(); - SnpEffFeature feature = (SnpEffFeature)codec.decode(blankEffectFieldSnpEffLine); - } - - @Test(expectedExceptions = TribbleException.InvalidDecodeLine.class) - public void testParseInvalidNumericFieldSnpEffLine() { - String invalidNumericFieldSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" + - "OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\tfoo\t\t\t";; - - SnpEffCodec codec = new SnpEffCodec(); - SnpEffFeature feature = (SnpEffFeature)codec.decode(invalidNumericFieldSnpEffLine); - } -} diff --git a/public/packages/AnalyzeCovariates.xml b/public/packages/AnalyzeCovariates.xml index 7e31934df..a6675a63d 100644 --- a/public/packages/AnalyzeCovariates.xml +++ b/public/packages/AnalyzeCovariates.xml @@ -6,10 +6,7 @@ - - - - + diff --git a/public/packages/FindContaminatingReadGroups.xml b/public/packages/FindContaminatingReadGroups.xml deleted file mode 100644 index 880f64a81..000000000 --- a/public/packages/FindContaminatingReadGroups.xml +++ /dev/null @@ -1,10 +0,0 @@ - - - diff --git a/public/packages/GATKResources.xml b/public/packages/GATKResources.xml deleted file mode 100755 index 87e6e0e50..000000000 --- a/public/packages/GATKResources.xml +++ /dev/null @@ -1,20 +0,0 @@ - -- -- - - -- - diff --git a/public/packages/IndelGenotyper.xml b/public/packages/IndelGenotyper.xml deleted file mode 100644 index c9e3ae0f6..000000000 --- a/public/packages/IndelGenotyper.xml +++ /dev/null @@ -1,11 +0,0 @@ - -- -- - - - - - - - - - - - - - - - diff --git a/public/packages/LocalRealignmentAroundIndels.xml b/public/packages/LocalRealignmentAroundIndels.xml deleted file mode 100644 index 46960e69f..000000000 --- a/public/packages/LocalRealignmentAroundIndels.xml +++ /dev/null @@ -1,12 +0,0 @@ - -- -- - - - -- - diff --git a/public/packages/QualityScoresRecalibration.xml b/public/packages/QualityScoresRecalibration.xml deleted file mode 100644 index 95e8b7c63..000000000 --- a/public/packages/QualityScoresRecalibration.xml +++ /dev/null @@ -1,18 +0,0 @@ - -- -- - - - -- - - diff --git a/public/packages/RMDIndexer.xml b/public/packages/RMDIndexer.xml deleted file mode 100644 index 5d40876de..000000000 --- a/public/packages/RMDIndexer.xml +++ /dev/null @@ -1,13 +0,0 @@ - -- -- - - - -- - - - - - - - - diff --git a/public/packages/UnifiedGenotyper.xml b/public/packages/UnifiedGenotyper.xml deleted file mode 100644 index 67a17640c..000000000 --- a/public/packages/UnifiedGenotyper.xml +++ /dev/null @@ -1,11 +0,0 @@ - -- -- - - -- - - - - diff --git a/public/packages/VariantAnnotator.xml b/public/packages/VariantAnnotator.xml deleted file mode 100644 index 88c0701f0..000000000 --- a/public/packages/VariantAnnotator.xml +++ /dev/null @@ -1,26 +0,0 @@ - -- -- - - - -- - diff --git a/public/packages/VariantEval.xml b/public/packages/VariantEval.xml deleted file mode 100644 index 791066fb7..000000000 --- a/public/packages/VariantEval.xml +++ /dev/null @@ -1,18 +0,0 @@ - -- -- - - - -- - - - - - - - - - - - - - - - - diff --git a/public/packages/VariantFiltration.xml b/public/packages/VariantFiltration.xml deleted file mode 100644 index 48fa0ff37..000000000 --- a/public/packages/VariantFiltration.xml +++ /dev/null @@ -1,13 +0,0 @@ - -- -- - - -- - - - - - - - - - diff --git a/public/packages/VariantRecalibration.xml b/public/packages/VariantRecalibration.xml deleted file mode 100644 index 6fe6b1eff..000000000 --- a/public/packages/VariantRecalibration.xml +++ /dev/null @@ -1,12 +0,0 @@ - -- -- - - - -- - - - diff --git a/public/perl/liftOverVCF.pl b/public/perl/liftOverVCF.pl index 21cb8bb6b..ba4198292 100755 --- a/public/perl/liftOverVCF.pl +++ b/public/perl/liftOverVCF.pl @@ -36,7 +36,7 @@ my $unsorted_vcf = "$tmp_prefix.unsorted.vcf"; # lift over the file print "Lifting over the vcf..."; -my $cmd = "java -jar $gatk/dist/GenomeAnalysisTK.jar -T LiftoverVariants -R $oldRef.fasta -B:variant,vcf $in -o $unsorted_vcf -chain $chain -dict $newRef.dict"; +my $cmd = "java -jar $gatk/dist/GenomeAnalysisTK.jar -T LiftoverVariants -R $oldRef.fasta -V:variant $in -o $unsorted_vcf -chain $chain -dict $newRef.dict"; if ($recordOriginalLocation) { $cmd .= " -recordOriginalLocation"; } @@ -66,7 +66,7 @@ system($cmd) == 0 or quit("The sorting step failed. Please correct the necessar # Filter the VCF for bad records print "\nFixing/removing bad records...\n"; -$cmd = "java -jar $gatk/dist/GenomeAnalysisTK.jar -T FilterLiftedVariants -R $newRef.fasta -B:variant,vcf $sorted_vcf -o $out"; +$cmd = "java -jar $gatk/dist/GenomeAnalysisTK.jar -T FilterLiftedVariants -R $newRef.fasta -V:variant $sorted_vcf -o $out"; system($cmd) == 0 or quit("The filtering step failed. Please correct the necessary errors before retrying."); # clean up diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala index 1d473b210..9bddfd97c 100644 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala @@ -56,15 +56,15 @@ class ExampleUnifiedGenotyper extends QScript { genotyper.input_file :+= qscript.bamFile genotyper.out = swapExt(qscript.bamFile, "bam", "unfiltered.vcf") - evalUnfiltered.rodBind :+= RodBind("eval", "VCF", genotyper.out) + evalUnfiltered.eval :+= genotyper.out evalUnfiltered.out = swapExt(genotyper.out, "vcf", "eval") - variantFilter.rodBind :+= RodBind("variant", "VCF", genotyper.out) + variantFilter.variant = genotyper.out variantFilter.out = swapExt(qscript.bamFile, "bam", "filtered.vcf") variantFilter.filterName = filterNames variantFilter.filterExpression = filterExpressions.map("\"" + _ + "\"") - evalFiltered.rodBind :+= RodBind("eval", "VCF", variantFilter.out) + evalFiltered.eval :+= variantFilter.out evalFiltered.out = swapExt(variantFilter.out, "vcf", "eval") add(genotyper, evalUnfiltered) diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/RodBind.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/RodBind.scala index 42f63e225..b4c5d91d3 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/RodBind.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/RodBind.scala @@ -1,7 +1,7 @@ package org.broadinstitute.sting.queue.extensions.gatk import java.io.File -import org.broadinstitute.sting.queue.function.FileExtension +import org.broadinstitute.sting.queue.util.FileExtension import java.lang.String /** diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/TaggedFile.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/TaggedFile.scala index ed8158b49..b19f9e430 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/TaggedFile.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/TaggedFile.scala @@ -1,7 +1,7 @@ package org.broadinstitute.sting.queue.extensions.gatk import java.io.File -import org.broadinstitute.sting.queue.function.FileExtension +import org.broadinstitute.sting.queue.util.FileExtension /** * Used to provide tagged -I input_file arguments to the GATK. diff --git a/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala index c905581fa..500f7b200 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/function/QFunction.scala @@ -387,25 +387,11 @@ trait QFunction extends Logging with QJobReport { */ protected def canon(value: Any) = { value match { - case fileExtension: FileExtension => - val newFile = absolute(fileExtension); - val newFileExtension = fileExtension.withPath(newFile.getPath) - newFileExtension - case file: File => - if (file.getClass != classOf[File]) - throw new QException("Extensions of file must also extend with FileExtension so that the path can be modified."); - absolute(file) + case file: File => IOUtils.absolute(commandDirectory, file) case x => x } } - /** - * Returns the absolute path to the file relative to the run directory and the job command directory. - * @param file File to root relative to the command directory if it is not already absolute. - * @return The absolute path to file. - */ - private def absolute(file: File) = IOUtils.absolute(commandDirectory, file) - /** * Scala sugar type for checking annotation required and exclusiveOf. */ diff --git a/public/scala/src/org/broadinstitute/sting/queue/function/FileExtension.scala b/public/scala/src/org/broadinstitute/sting/queue/util/FileExtension.scala similarity index 89% rename from public/scala/src/org/broadinstitute/sting/queue/function/FileExtension.scala rename to public/scala/src/org/broadinstitute/sting/queue/util/FileExtension.scala index e2394a5bf..9b6e52c8e 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/function/FileExtension.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/util/FileExtension.scala @@ -1,4 +1,4 @@ -package org.broadinstitute.sting.queue.function +package org.broadinstitute.sting.queue.util import java.io.File diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/IOUtils.scala b/public/scala/src/org/broadinstitute/sting/queue/util/IOUtils.scala index 79ffa8cb9..b17ccc0d5 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/util/IOUtils.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/util/IOUtils.scala @@ -3,6 +3,7 @@ package org.broadinstitute.sting.queue.util import org.apache.commons.io.FileUtils import java.io.{FileReader, File} import org.broadinstitute.sting.utils.exceptions.UserException +import org.broadinstitute.sting.queue.QException /** * A collection of utilities for modifying java.io. @@ -12,7 +13,7 @@ object IOUtils extends Logging { * Checks if the temp directory has been setup and throws an exception if they user hasn't set it correctly. * @param tempDir Temporary directory. */ - def checkTempDir(tempDir: File) = { + def checkTempDir(tempDir: File) { val tempDirPath = tempDir.getAbsolutePath // Keeps the user from leaving the temp directory as the default, and on Macs from having pluses // in the path which can cause problems with the Google Reflections library. @@ -20,7 +21,7 @@ object IOUtils extends Logging { if (tempDirPath.startsWith("/var/folders/") || (tempDirPath == "/tmp") || (tempDirPath == "/tmp/")) throw new UserException.BadTmpDir("java.io.tmpdir must be explicitly set") if (!tempDir.exists && !tempDir.mkdirs) - throw new UserException.BadTmpDir("Could not create directory: " + tempDir.getAbsolutePath()) + throw new UserException.BadTmpDir("Could not create directory: " + tempDir.getAbsolutePath) } /** @@ -35,9 +36,9 @@ object IOUtils extends Logging { throw new UserException.BadTmpDir("Could not create temp directory: " + tempDirParent) val temp = File.createTempFile(prefix + "-", suffix, tempDirParent) if (!temp.delete) - throw new UserException.BadTmpDir("Could not delete sub file: " + temp.getAbsolutePath()) + throw new UserException.BadTmpDir("Could not delete sub file: " + temp.getAbsolutePath) if (!temp.mkdir) - throw new UserException.BadTmpDir("Could not create sub directory: " + temp.getAbsolutePath()) + throw new UserException.BadTmpDir("Could not create sub directory: " + temp.getAbsolutePath) absolute(temp) } @@ -46,7 +47,7 @@ object IOUtils extends Logging { * @param file File to write to. * @param content Content to write. */ - def writeContents(file: File, content: String) = FileUtils.writeStringToFile(file, content) + def writeContents(file: File, content: String) { FileUtils.writeStringToFile(file, content) } /** * Reads content of a file into a string. @@ -146,10 +147,12 @@ object IOUtils extends Logging { * @return The absolute path to the file in the parent dir if the path was not absolute, otherwise the original path. */ def absolute(parent: File, file: File): File = { - if (file.isAbsolute) - absolute(file) - else - absolute(new File(parent, file.getPath)) + val newPath = + if (file.isAbsolute) + absolutePath(file) + else + absolutePath(new File(parent, file.getPath)) + replacePath(file, newPath) } /** @@ -159,12 +162,16 @@ object IOUtils extends Logging { * @return the absolute path to the file. */ def absolute(file: File) = { + replacePath(file, absolutePath(file)) + } + + private def absolutePath(file: File) = { var fileAbs = file.getAbsoluteFile var names = List.empty[String] while (fileAbs != null) { val name = fileAbs.getName fileAbs = fileAbs.getParentFile - + if (name == ".") { /* skip */ @@ -190,7 +197,18 @@ object IOUtils extends Logging { } } - new File(names.mkString("/", "/", "")) + names.mkString("/", "/", "") + } + + private def replacePath(file: File, path: String) = { + file match { + case fileExtension: FileExtension => + fileExtension.withPath(path) + case file: File => + if (file.getClass != classOf[File]) + throw new QException("Sub classes of java.io.File must also implement FileExtension so that the path can be modified.") + new File(path) + } } /**- -- - - - -- -