Merge branch 'master' into sgintervals

This commit is contained in:
Mark DePristo 2011-09-19 09:50:19 -04:00
commit 6ea57bf036
72 changed files with 1281 additions and 1756 deletions

View File

@ -821,6 +821,7 @@
<!-- TEST -->
<macrodef name="run-test">
<attribute name="testtype"/>
<attribute name="outputdir"/>
<attribute name="runfailed"/>
<sequential>
@ -828,10 +829,6 @@
<equals arg1="@{runfailed}" arg2="true"/>
</condition>
<condition property="test.output.dir" value="${report}/@{testtype}" else="${report}/failed_rerun">
<not><isset property="run.failed.tests"/></not>
</condition>
<!-- Get the pipeline run type. Default to dry. -->
<condition property="pipeline.run" value="dry" else="${pipeline.run}">
<equals arg1="${pipeline.run}" arg2="$${pipeline.run}" />
@ -841,10 +838,10 @@
<isset property="include.contracts" />
</condition>
<mkdir dir="${test.output.dir}"/>
<mkdir dir="@{outputdir}"/>
<echo message="Sting: Running @{testtype} test cases!"/>
<taskdef resource="testngtasks" classpath="${testng.jar}"/>
<testng outputDir="${test.output.dir}"
<testng outputDir="@{outputdir}"
haltOnFailure="false" failureProperty="test.failure"
verbose="2"
workingDir="${basedir}"
@ -885,23 +882,35 @@
</testng>
<!-- generate a report for Bamboo or Hudson to read in -->
<junitreport todir="${test.output.dir}">
<fileset dir="${test.output.dir}">
<junitreport todir="@{outputdir}">
<fileset dir="@{outputdir}">
<include name="*/*.xml"/>
</fileset>
<report format="noframes" todir="${test.output.dir}"/>
<report format="noframes" todir="@{outputdir}"/>
</junitreport>
<fail message="test failed" if="test.failure" />
</sequential>
</macrodef>
<target name="alltests">
<antcall target="test" inheritAll="false"/>
<antcall target="integrationtest" inheritAll="false"/>
<antcall target="pipelinetest" inheritAll="false"/>
</target>
<target name="alltests.public">
<antcall target="test.public" inheritAll="false"/>
<antcall target="integrationtest.public" inheritAll="false"/>
<antcall target="pipelinetest.public" inheritAll="false"/>
</target>
<!-- Our four different test conditions: Test, IntegrationTest, PerformanceTest, PipelineTest -->
<target name="test" depends="init.buildall,test.compile" description="Run unit tests">
<condition property="ttype" value="*UnitTest" else="${single}">
<not><isset property="single"/></not>
</condition>
<run-test testtype="${ttype}" runfailed="false"/>
<run-test testtype="${ttype}" outputdir="${report}/${ttype}" runfailed="false"/>
</target>
<target name="test.public" depends="init.buildpublic,test"/>
@ -909,7 +918,7 @@
<condition property="itype" value="*IntegrationTest" else="${single}">
<not><isset property="single"/></not>
</condition>
<run-test testtype="${itype}" runfailed="false"/>
<run-test testtype="${itype}" outputdir="${report}/${itype}" runfailed="false"/>
</target>
<target name="integrationtest.public" depends="init.buildpublic,integrationtest"/>
@ -917,7 +926,7 @@
<condition property="ptype" value="*PerformanceTest" else="${single}">
<not><isset property="single"/></not>
</condition>
<run-test testtype="${ptype}" runfailed="false"/>
<run-test testtype="${ptype}" outputdir="${report}/${ptype}" runfailed="false"/>
</target>
<target name="performancetest.public" depends="init.buildpublic,performancetest" />
@ -925,7 +934,7 @@
<condition property="pipetype" value="*PipelineTest" else="${single}">
<not><isset property="single"/></not>
</condition>
<run-test testtype="${pipetype}" runfailed="false"/>
<run-test testtype="${pipetype}" outputdir="${report}/${pipetype}" runfailed="false"/>
</target>
<target name="pipelinetest.public" depends="init.buildpublic,pipelinetest" />
@ -934,24 +943,24 @@
<condition property="pipetype" value="*PipelineTest" else="${single}">
<not><isset property="single"/></not>
</condition>
<run-test testtype="${pipetype}" runfailed="false"/>
<run-test testtype="${pipetype}" outputdir="${report}/${pipetype}" runfailed="false"/>
</target>
<target name="pipelinetestrun.public" depends="init.buildpublic,pipelinetestrun" />
<target name="failed-test" depends="init.buildall,test.compile">
<run-test testtype="${report}/*UnitTest/testng-failed.xml" runfailed="true"/>
<run-test testtype="${report}/*UnitTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
</target>
<target name="failed-integration" depends="init.buildall,test.compile">
<run-test testtype="${report}/*IntegrationTest/testng-failed.xml" runfailed="true"/>
<run-test testtype="${report}/*IntegrationTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
</target>
<target name="failed-performance" depends="init.buildall,test.compile">
<run-test testtype="${report}/*PerformanceTest/testng-failed.xml" runfailed="true"/>
<run-test testtype="${report}/*PerformanceTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
</target>
<target name="failed-pipeline" depends="init.buildall,test.compile">
<run-test testtype="${report}/*PipelineTest/testng-failed.xml" runfailed="true"/>
<run-test testtype="${report}/*PipelineTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
</target>
<!-- ******************************************************************************** -->

View File

@ -142,6 +142,8 @@ print(paste("Project :", inputFileName))
convertUnits <- function(gatkReportData) {
convertGroup <- function(g) {
g$runtime = g$runtime * ORIGINAL_UNITS_TO_SECONDS
g$startTime = g$startTime * ORIGINAL_UNITS_TO_SECONDS
g$doneTime = g$doneTime * ORIGINAL_UNITS_TO_SECONDS
g
}
lapply(gatkReportData, convertGroup)

View File

@ -379,7 +379,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
}
if ( tribbleType == null )
if ( ! file.canRead() | !! file.isFile() ) {
if ( ! file.canRead() | ! file.isFile() ) {
throw new UserException.BadArgumentValue(name, "Couldn't read file to determine type: " + file);
} else {
throw new UserException.CommandLineException(

View File

@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.datasources.reads;
import net.sf.picard.util.PeekableIterator;
import net.sf.samtools.GATKBAMFileSpan;
import net.sf.samtools.GATKChunk;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
@ -84,7 +85,7 @@ public class BAMScheduler implements Iterator<FilePointer> {
if(currentLocus == GenomeLoc.UNMAPPED) {
nextFilePointer = new FilePointer(GenomeLoc.UNMAPPED);
for(SAMReaderID id: dataSource.getReaderIDs())
nextFilePointer.addFileSpans(id,new GATKBAMFileSpan());
nextFilePointer.addFileSpans(id,new GATKBAMFileSpan(new GATKChunk(indexFiles.get(id).getStartOfLastLinearBin(),Long.MAX_VALUE)));
currentLocus = null;
continue;
}

View File

@ -215,6 +215,45 @@ public class GATKBAMIndex {
return (new GATKBin(bin).getBinNumber()-levelStart+1)*(BIN_GENOMIC_SPAN /levelSize);
}
/**
* Use to get close to the unmapped reads at the end of a BAM file.
* @return The file offset of the first record in the last linear bin, or -1
* if there are no elements in linear bins (i.e. no mapped reads).
*/
public long getStartOfLastLinearBin() {
openIndexFile();
seek(4);
final int sequenceCount = readInteger();
// Because no reads may align to the last sequence in the sequence dictionary,
// grab the last element of the linear index for each sequence, and return
// the last one from the last sequence that has one.
long lastLinearIndexPointer = -1;
for (int i = 0; i < sequenceCount; i++) {
// System.out.println("# Sequence TID: " + i);
final int nBins = readInteger();
// System.out.println("# nBins: " + nBins);
for (int j1 = 0; j1 < nBins; j1++) {
// Skip bin #
skipBytes(4);
final int nChunks = readInteger();
// Skip chunks
skipBytes(16 * nChunks);
}
final int nLinearBins = readInteger();
if (nLinearBins > 0) {
// Skip to last element of list of linear bins
skipBytes(8 * (nLinearBins - 1));
lastLinearIndexPointer = readLongs(1)[0];
}
}
closeIndexFile();
return lastLinearIndexPointer;
}
/**
* Gets the possible number of bins for a given reference sequence.
* @return How many bins could possibly be used according to this indexing scheme to index a single contig.

View File

@ -134,24 +134,11 @@ public class ReadShardStrategy implements ShardStrategy {
Map<SAMReaderID,SAMFileSpan> selectedReaders = new HashMap<SAMReaderID,SAMFileSpan>();
while(selectedReaders.size() == 0 && currentFilePointer != null) {
shardPosition = currentFilePointer.fileSpans;
for(SAMReaderID id: shardPosition.keySet()) {
// If the region contains location information (in other words, it is not at
// the start of the unmapped region), add the region.
if(currentFilePointer.isRegionUnmapped) {
// If the region is unmapped and no location data exists, add a null as an indicator to
// start at the next unmapped region.
if(!isIntoUnmappedRegion) {
selectedReaders.put(id,null);
isIntoUnmappedRegion = true;
}
else
selectedReaders.put(id,position.get(id));
}
else {
SAMFileSpan fileSpan = shardPosition.get(id).removeContentsBefore(position.get(id));
if(!fileSpan.isEmpty())
selectedReaders.put(id,fileSpan);
}
SAMFileSpan fileSpan = shardPosition.get(id).removeContentsBefore(position.get(id));
if(!fileSpan.isEmpty())
selectedReaders.put(id,fileSpan);
}
if(selectedReaders.size() > 0) {

View File

@ -36,7 +36,7 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
* @version 0.1
*/
public class PlatformFilter extends ReadFilter {
@Argument(fullName = "PLFilterName", shortName = "PLFilterName", doc="Discard reads with RG:PL attribute containing this strign", required=false)
@Argument(fullName = "PLFilterName", shortName = "PLFilterName", doc="Discard reads with RG:PL attribute containing this string", required=false)
protected String[] PLFilterNames;
public boolean filterOut(SAMRecord rec) {

View File

@ -68,6 +68,13 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
* -I input1.bam \
* -I input2.bam \
* --read_filter MappingQualityZero
*
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T PrintReads \
* -o output.bam \
* -I input.bam \
* -n 2000
* </pre>
*
*/

View File

@ -24,7 +24,9 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -32,10 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants;
import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -46,134 +45,429 @@ import java.util.*;
* (http://snpeff.sourceforge.net/).
*
* For each variant, chooses one of the effects of highest biological impact from the SnpEff
* output file (which must be provided on the command line via --snpEffFile:SnpEff <filename>),
* output file (which must be provided on the command line via --snpEffFile filename.vcf),
* and adds annotations on that effect.
*
* The possible biological effects and their associated impacts are defined in the class:
* org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants
*
* @author David Roazen
*/
public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotation {
// SnpEff annotation key names:
public static final String GENE_ID_KEY = "GENE_ID";
public static final String GENE_NAME_KEY = "GENE_NAME";
public static final String TRANSCRIPT_ID_KEY = "TRANSCRIPT_ID";
public static final String EXON_ID_KEY = "EXON_ID";
public static final String EXON_RANK_KEY = "EXON_RANK";
public static final String WITHIN_NON_CODING_GENE_KEY = "WITHIN_NON_CODING_GENE";
public static final String EFFECT_KEY = "EFFECT";
public static final String EFFECT_IMPACT_KEY = "EFFECT_IMPACT";
public static final String EFFECT_EXTRA_INFORMATION_KEY = "EFFECT_EXTRA_INFORMATION";
public static final String OLD_NEW_AA_KEY = "OLD_NEW_AA";
public static final String OLD_NEW_CODON_KEY = "OLD_NEW_CODON";
public static final String CODON_NUM_KEY = "CODON_NUM";
public static final String CDS_SIZE_KEY = "CDS_SIZE";
private static Logger logger = Logger.getLogger(SnpEff.class);
// We refuse to parse SnpEff output files generated by unsupported versions, or
// lacking a SnpEff version number in the VCF header:
public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.2" };
public static final String SNPEFF_VCF_HEADER_VERSION_LINE_KEY = "SnpEffVersion";
// SnpEff aggregates all effects (and effect metadata) together into a single INFO
// field annotation with the key EFF:
public static final String SNPEFF_INFO_FIELD_KEY = "EFF";
public static final String SNPEFF_EFFECT_METADATA_DELIMITER = "[()]";
public static final String SNPEFF_EFFECT_METADATA_SUBFIELD_DELIMITER = "\\|";
// Key names for the INFO field annotations we will add to each record, along
// with parsing-related information:
public enum InfoFieldKey {
EFFECT_KEY ("SNPEFF_EFFECT", -1),
IMPACT_KEY ("SNPEFF_IMPACT", 0),
CODON_CHANGE_KEY ("SNPEFF_CODON_CHANGE", 1),
AMINO_ACID_CHANGE_KEY ("SNPEFF_AMINO_ACID_CHANGE", 2),
GENE_NAME_KEY ("SNPEFF_GENE_NAME", 3),
GENE_BIOTYPE_KEY ("SNPEFF_GENE_BIOTYPE", 4),
TRANSCRIPT_ID_KEY ("SNPEFF_TRANSCRIPT_ID", 6),
EXON_ID_KEY ("SNPEFF_EXON_ID", 7);
// Actual text of the key
private final String keyName;
// Index within the effect metadata subfields from the SnpEff EFF annotation
// where each key's associated value can be found during parsing.
private final int fieldIndex;
InfoFieldKey ( String keyName, int fieldIndex ) {
this.keyName = keyName;
this.fieldIndex = fieldIndex;
}
public String getKeyName() {
return keyName;
}
public int getFieldIndex() {
return fieldIndex;
}
}
// Possible SnpEff biological effects. All effect names found in the SnpEff input file
// are validated against this list.
public enum EffectType {
NONE,
CHROMOSOME,
INTERGENIC,
UPSTREAM,
UTR_5_PRIME,
UTR_5_DELETED,
START_GAINED,
SPLICE_SITE_ACCEPTOR,
SPLICE_SITE_DONOR,
START_LOST,
SYNONYMOUS_START,
NON_SYNONYMOUS_START,
CDS,
GENE,
TRANSCRIPT,
EXON,
EXON_DELETED,
NON_SYNONYMOUS_CODING,
SYNONYMOUS_CODING,
FRAME_SHIFT,
CODON_CHANGE,
CODON_INSERTION,
CODON_CHANGE_PLUS_CODON_INSERTION,
CODON_DELETION,
CODON_CHANGE_PLUS_CODON_DELETION,
STOP_GAINED,
SYNONYMOUS_STOP,
NON_SYNONYMOUS_STOP,
STOP_LOST,
INTRON,
UTR_3_PRIME,
UTR_3_DELETED,
DOWNSTREAM,
INTRON_CONSERVED,
INTERGENIC_CONSERVED,
REGULATION,
CUSTOM,
WITHIN_NON_CODING_GENE
}
// SnpEff labels each effect as either LOW, MODERATE, or HIGH impact.
public enum EffectImpact {
LOW (1),
MODERATE (2),
HIGH (3);
private final int severityRating;
EffectImpact ( int severityRating ) {
this.severityRating = severityRating;
}
public boolean isHigherImpactThan ( EffectImpact other ) {
return this.severityRating > other.severityRating;
}
}
// SnpEff labels most effects as either CODING or NON_CODING, but sometimes omits this information.
public enum EffectCoding {
CODING,
NON_CODING,
UNKNOWN
}
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) {
validateRodBinding(walker.getSnpEffRodBinding());
checkSnpEffVersion(walker, toolkit);
}
public Map<String, Object> annotate ( RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc ) {
RodBinding<SnpEffFeature> snpEffRodBinding = walker.getSnpEffRodBinding();
validateRodBinding(snpEffRodBinding);
RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
List<SnpEffFeature> features = tracker.getValues(snpEffRodBinding, ref.getLocus());
// Get only SnpEff records that start at this locus, not merely span it:
List<VariantContext> snpEffRecords = tracker.getValues(snpEffRodBinding, ref.getLocus());
// Add only annotations for one of the most biologically-significant effects as defined in
// the SnpEffConstants class:
SnpEffFeature mostSignificantEffect = getMostSignificantEffect(features);
if ( mostSignificantEffect == null ) {
// Within this set, look for a SnpEff record whose ref/alt alleles match the record to annotate.
// If there is more than one such record, we only need to pick the first one, since the biological
// effects will be the same across all such records:
VariantContext matchingRecord = getMatchingSnpEffRecord(snpEffRecords, vc);
if ( matchingRecord == null ) {
return null;
}
return generateAnnotations(mostSignificantEffect);
// Parse the SnpEff INFO field annotation from the matching record into individual effect objects:
List<SnpEffEffect> effects = parseSnpEffRecord(matchingRecord);
if ( effects.size() == 0 ) {
return null;
}
// Add only annotations for one of the most biologically-significant effects from this set:
SnpEffEffect mostSignificantEffect = getMostSignificantEffect(effects);
return mostSignificantEffect.getAnnotations();
}
private void validateRodBinding ( RodBinding<SnpEffFeature> snpEffRodBinding ) {
private void validateRodBinding ( RodBinding<VariantContext> snpEffRodBinding ) {
if ( snpEffRodBinding == null || ! snpEffRodBinding.isBound() ) {
throw new UserException("The SnpEff annotator requires that a SnpEff output file be provided " +
"as a rodbinding on the command line, but no SnpEff rodbinding was found.");
throw new UserException("The SnpEff annotator requires that a SnpEff VCF output file be provided " +
"as a rodbinding on the command line via the --snpEffFile option, but " +
"no SnpEff rodbinding was found.");
}
}
private SnpEffFeature getMostSignificantEffect ( List<SnpEffFeature> snpEffFeatures ) {
SnpEffFeature mostSignificantEffect = null;
private void checkSnpEffVersion ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) {
RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
for ( SnpEffFeature snpEffFeature : snpEffFeatures ) {
VCFHeader snpEffVCFHeader = VCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName())).get(snpEffRodBinding.getName());
VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY);
if ( snpEffVersionLine == null || snpEffVersionLine.getValue() == null || snpEffVersionLine.getValue().trim().length() == 0 ) {
throw new UserException("Could not find a " + SNPEFF_VCF_HEADER_VERSION_LINE_KEY + " entry in the VCF header for the SnpEff " +
"input file, and so could not verify that the file was generated by a supported version of SnpEff (" +
Arrays.toString(SUPPORTED_SNPEFF_VERSIONS) + ")");
}
String snpEffVersionString = snpEffVersionLine.getValue().replaceAll("\"", "").split(" ")[0];
if ( ! isSupportedSnpEffVersion(snpEffVersionString) ) {
throw new UserException("The version of SnpEff used to generate the SnpEff input file (" + snpEffVersionString + ") " +
"is not currently supported by the GATK. Supported versions are: " + Arrays.toString(SUPPORTED_SNPEFF_VERSIONS));
}
}
private boolean isSupportedSnpEffVersion ( String versionString ) {
for ( String supportedVersion : SUPPORTED_SNPEFF_VERSIONS ) {
if ( supportedVersion.equals(versionString) ) {
return true;
}
}
return false;
}
private VariantContext getMatchingSnpEffRecord ( List<VariantContext> snpEffRecords, VariantContext vc ) {
for ( VariantContext snpEffRecord : snpEffRecords ) {
if ( snpEffRecord.hasSameAlternateAllelesAs(vc) && snpEffRecord.getReference().equals(vc.getReference()) ) {
return snpEffRecord;
}
}
return null;
}
private List<SnpEffEffect> parseSnpEffRecord ( VariantContext snpEffRecord ) {
List<SnpEffEffect> parsedEffects = new ArrayList<SnpEffEffect>();
Object effectFieldValue = snpEffRecord.getAttribute(SNPEFF_INFO_FIELD_KEY);
List<String> individualEffects;
// The VCF codec stores multi-valued fields as a List<String>, and single-valued fields as a String.
// We can have either in the case of SnpEff, since there may be one or more than one effect in this record.
if ( effectFieldValue instanceof List ) {
individualEffects = (List<String>)effectFieldValue;
}
else {
individualEffects = Arrays.asList((String)effectFieldValue);
}
for ( String effectString : individualEffects ) {
String[] effectNameAndMetadata = effectString.split(SNPEFF_EFFECT_METADATA_DELIMITER);
if ( effectNameAndMetadata.length != 2 ) {
logger.warn(String.format("Malformed SnpEff effect field at %s:%d, skipping: %s",
snpEffRecord.getChr(), snpEffRecord.getStart(), effectString));
continue;
}
String effectName = effectNameAndMetadata[0];
String[] effectMetadata = effectNameAndMetadata[1].split(SNPEFF_EFFECT_METADATA_SUBFIELD_DELIMITER, -1);
SnpEffEffect parsedEffect = new SnpEffEffect(effectName, effectMetadata);
if ( parsedEffect.isWellFormed() ) {
parsedEffects.add(parsedEffect);
}
else {
logger.warn(String.format("Skipping malformed SnpEff effect field at %s:%d. Error was: \"%s\". Field was: \"%s\"",
snpEffRecord.getChr(), snpEffRecord.getStart(), parsedEffect.getParseError(), effectString));
}
}
return parsedEffects;
}
private SnpEffEffect getMostSignificantEffect ( List<SnpEffEffect> effects ) {
SnpEffEffect mostSignificantEffect = null;
for ( SnpEffEffect effect : effects ) {
if ( mostSignificantEffect == null ||
snpEffFeature.isHigherImpactThan(mostSignificantEffect) ) {
effect.isHigherImpactThan(mostSignificantEffect) ) {
mostSignificantEffect = snpEffFeature;
mostSignificantEffect = effect;
}
}
return mostSignificantEffect;
}
private Map<String, Object> generateAnnotations ( SnpEffFeature mostSignificantEffect ) {
Map<String, Object> annotations = new LinkedHashMap<String, Object>(Utils.optimumHashSize(getKeyNames().size()));
if ( mostSignificantEffect.hasGeneID() )
annotations.put(GENE_ID_KEY, mostSignificantEffect.getGeneID());
if ( mostSignificantEffect.hasGeneName() )
annotations.put(GENE_NAME_KEY, mostSignificantEffect.getGeneName());
if ( mostSignificantEffect.hasTranscriptID() )
annotations.put(TRANSCRIPT_ID_KEY, mostSignificantEffect.getTranscriptID());
if ( mostSignificantEffect.hasExonID() )
annotations.put(EXON_ID_KEY, mostSignificantEffect.getExonID());
if ( mostSignificantEffect.hasExonRank() )
annotations.put(EXON_RANK_KEY, Integer.toString(mostSignificantEffect.getExonRank()));
if ( mostSignificantEffect.isNonCodingGene() )
annotations.put(WITHIN_NON_CODING_GENE_KEY, null);
annotations.put(EFFECT_KEY, mostSignificantEffect.getEffect().toString());
annotations.put(EFFECT_IMPACT_KEY, mostSignificantEffect.getEffectImpact().toString());
if ( mostSignificantEffect.hasEffectExtraInformation() )
annotations.put(EFFECT_EXTRA_INFORMATION_KEY, mostSignificantEffect.getEffectExtraInformation());
if ( mostSignificantEffect.hasOldAndNewAA() )
annotations.put(OLD_NEW_AA_KEY, mostSignificantEffect.getOldAndNewAA());
if ( mostSignificantEffect.hasOldAndNewCodon() )
annotations.put(OLD_NEW_CODON_KEY, mostSignificantEffect.getOldAndNewCodon());
if ( mostSignificantEffect.hasCodonNum() )
annotations.put(CODON_NUM_KEY, Integer.toString(mostSignificantEffect.getCodonNum()));
if ( mostSignificantEffect.hasCdsSize() )
annotations.put(CDS_SIZE_KEY, Integer.toString(mostSignificantEffect.getCdsSize()));
return annotations;
}
public List<String> getKeyNames() {
return Arrays.asList( GENE_ID_KEY,
GENE_NAME_KEY,
TRANSCRIPT_ID_KEY,
EXON_ID_KEY,
EXON_RANK_KEY,
WITHIN_NON_CODING_GENE_KEY,
EFFECT_KEY,
EFFECT_IMPACT_KEY,
EFFECT_EXTRA_INFORMATION_KEY,
OLD_NEW_AA_KEY,
OLD_NEW_CODON_KEY,
CODON_NUM_KEY,
CDS_SIZE_KEY
return Arrays.asList( InfoFieldKey.EFFECT_KEY.getKeyName(),
InfoFieldKey.IMPACT_KEY.getKeyName(),
InfoFieldKey.CODON_CHANGE_KEY.getKeyName(),
InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(),
InfoFieldKey.GENE_NAME_KEY.getKeyName(),
InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(),
InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(),
InfoFieldKey.EXON_ID_KEY.getKeyName()
);
}
public List<VCFInfoHeaderLine> getDescriptions() {
return Arrays.asList(
new VCFInfoHeaderLine(GENE_ID_KEY, 1, VCFHeaderLineType.String, "Gene ID for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(GENE_NAME_KEY, 1, VCFHeaderLineType.String, "Gene name for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(TRANSCRIPT_ID_KEY, 1, VCFHeaderLineType.String, "Transcript ID for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(EXON_ID_KEY, 1, VCFHeaderLineType.String, "Exon ID for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(EXON_RANK_KEY, 1, VCFHeaderLineType.Integer, "Exon rank for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(WITHIN_NON_CODING_GENE_KEY, 0, VCFHeaderLineType.Flag, "If this flag is present, the highest-impact effect resulting from the current variant is within a non-coding gene"),
new VCFInfoHeaderLine(EFFECT_KEY, 1, VCFHeaderLineType.String, "The highest-impact effect resulting from the current variant (or one of the highest-impact effects, if there is a tie)"),
new VCFInfoHeaderLine(EFFECT_IMPACT_KEY, 1, VCFHeaderLineType.String, "Impact of the highest-impact effect resulting from the current variant " + Arrays.toString(SnpEffConstants.EffectImpact.values())),
new VCFInfoHeaderLine(EFFECT_EXTRA_INFORMATION_KEY, 1, VCFHeaderLineType.String, "Additional information about the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(OLD_NEW_AA_KEY, 1, VCFHeaderLineType.String, "Old/New amino acid for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(OLD_NEW_CODON_KEY, 1, VCFHeaderLineType.String, "Old/New codon for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(CODON_NUM_KEY, 1, VCFHeaderLineType.Integer, "Codon number for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(CDS_SIZE_KEY, 1, VCFHeaderLineType.Integer, "CDS size for the highest-impact effect resulting from the current variant")
new VCFInfoHeaderLine(InfoFieldKey.EFFECT_KEY.getKeyName(), 1, VCFHeaderLineType.String, "The highest-impact effect resulting from the current variant (or one of the highest-impact effects, if there is a tie)"),
new VCFInfoHeaderLine(InfoFieldKey.IMPACT_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Impact of the highest-impact effect resulting from the current variant " + Arrays.toString(EffectImpact.values())),
new VCFInfoHeaderLine(InfoFieldKey.CODON_CHANGE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Old/New codon for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Old/New amino acid for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(InfoFieldKey.GENE_NAME_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Gene name for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Gene biotype for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Transcript ID for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(InfoFieldKey.EXON_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Exon ID for the highest-impact effect resulting from the current variant")
);
}
/**
* Helper class to parse, validate, and store a single SnpEff effect and its metadata.
*/
protected static class SnpEffEffect {
private EffectType effect;
private EffectImpact impact;
private String codonChange;
private String aminoAcidChange;
private String geneName;
private String geneBiotype;
private EffectCoding coding;
private String transcriptID;
private String exonID;
private String parseError = null;
private boolean isWellFormed = true;
private static final int EXPECTED_NUMBER_OF_METADATA_FIELDS = 8;
private static final int NUMBER_OF_METADATA_FIELDS_UPON_WARNING = 9;
private static final int NUMBER_OF_METADATA_FIELDS_UPON_ERROR = 10;
// Note that contrary to the description for the EFF field layout that SnpEff adds to the VCF header,
// errors come after warnings, not vice versa:
private static final int SNPEFF_WARNING_FIELD_INDEX = NUMBER_OF_METADATA_FIELDS_UPON_WARNING - 1;
private static final int SNPEFF_ERROR_FIELD_INDEX = NUMBER_OF_METADATA_FIELDS_UPON_ERROR - 1;
private static final int SNPEFF_CODING_FIELD_INDEX = 5;
public SnpEffEffect ( String effectName, String[] effectMetadata ) {
parseEffectName(effectName);
parseEffectMetadata(effectMetadata);
}
private void parseEffectName ( String effectName ) {
try {
effect = EffectType.valueOf(effectName);
}
catch ( IllegalArgumentException e ) {
parseError(String.format("%s is not a recognized effect type", effectName));
}
}
private void parseEffectMetadata ( String[] effectMetadata ) {
if ( effectMetadata.length != EXPECTED_NUMBER_OF_METADATA_FIELDS ) {
if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_WARNING ) {
parseError(String.format("SnpEff issued the following warning: %s", effectMetadata[SNPEFF_WARNING_FIELD_INDEX]));
}
else if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_ERROR ) {
parseError(String.format("SnpEff issued the following error: %s", effectMetadata[SNPEFF_ERROR_FIELD_INDEX]));
}
else {
parseError(String.format("Wrong number of effect metadata fields. Expected %d but found %d",
EXPECTED_NUMBER_OF_METADATA_FIELDS, effectMetadata.length));
}
return;
}
try {
impact = EffectImpact.valueOf(effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]);
}
catch ( IllegalArgumentException e ) {
parseError(String.format("Unrecognized value for effect impact: %s", effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]));
}
codonChange = effectMetadata[InfoFieldKey.CODON_CHANGE_KEY.getFieldIndex()];
aminoAcidChange = effectMetadata[InfoFieldKey.AMINO_ACID_CHANGE_KEY.getFieldIndex()];
geneName = effectMetadata[InfoFieldKey.GENE_NAME_KEY.getFieldIndex()];
geneBiotype = effectMetadata[InfoFieldKey.GENE_BIOTYPE_KEY.getFieldIndex()];
if ( effectMetadata[SNPEFF_CODING_FIELD_INDEX].trim().length() > 0 ) {
try {
coding = EffectCoding.valueOf(effectMetadata[SNPEFF_CODING_FIELD_INDEX]);
}
catch ( IllegalArgumentException e ) {
parseError(String.format("Unrecognized value for effect coding: %s", effectMetadata[SNPEFF_CODING_FIELD_INDEX]));
}
}
else {
coding = EffectCoding.UNKNOWN;
}
transcriptID = effectMetadata[InfoFieldKey.TRANSCRIPT_ID_KEY.getFieldIndex()];
exonID = effectMetadata[InfoFieldKey.EXON_ID_KEY.getFieldIndex()];
}
private void parseError ( String message ) {
isWellFormed = false;
// Cache only the first error encountered:
if ( parseError == null ) {
parseError = message;
}
}
public boolean isWellFormed() {
return isWellFormed;
}
public String getParseError() {
return parseError == null ? "" : parseError;
}
public boolean isCoding() {
return coding == EffectCoding.CODING;
}
public boolean isHigherImpactThan ( SnpEffEffect other ) {
// If one effect is within a coding gene and the other is not, the effect that is
// within the coding gene has higher impact:
if ( isCoding() && ! other.isCoding() ) {
return true;
}
else if ( ! isCoding() && other.isCoding() ) {
return false;
}
// Otherwise, both effects are either in or not in a coding gene, so we compare the impacts
// of the effects themselves:
return impact.isHigherImpactThan(other.impact);
}
public Map<String, Object> getAnnotations() {
Map<String, Object> annotations = new LinkedHashMap<String, Object>(Utils.optimumHashSize(InfoFieldKey.values().length));
addAnnotation(annotations, InfoFieldKey.EFFECT_KEY.getKeyName(), effect.toString());
addAnnotation(annotations, InfoFieldKey.IMPACT_KEY.getKeyName(), impact.toString());
addAnnotation(annotations, InfoFieldKey.CODON_CHANGE_KEY.getKeyName(), codonChange);
addAnnotation(annotations, InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(), aminoAcidChange);
addAnnotation(annotations, InfoFieldKey.GENE_NAME_KEY.getKeyName(), geneName);
addAnnotation(annotations, InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), geneBiotype);
addAnnotation(annotations, InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), transcriptID);
addAnnotation(annotations, InfoFieldKey.EXON_ID_KEY.getKeyName(), exonID);
return annotations;
}
private void addAnnotation ( Map<String, Object> annotations, String keyName, String keyValue ) {
// Only add annotations for keys associated with non-empty values:
if ( keyValue != null && keyValue.trim().length() > 0 ) {
annotations.put(keyName, keyValue);
}
}
}
}

View File

@ -40,7 +40,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
@ -86,14 +85,15 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
@ArgumentCollection
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
public RodBinding<VariantContext> getVariantRodBinding() { return variantCollection.variants; }
/**
* The INFO field will be annotated with information on the most biologically-significant effect
* listed in the SnpEff output file for each variant.
*/
@Input(fullName="snpEffFile", shortName = "snpEffFile", doc="A SnpEff output file from which to add annotations", required=false)
public RodBinding<SnpEffFeature> snpEffFile;
public RodBinding<SnpEffFeature> getSnpEffRodBinding() { return snpEffFile; }
public RodBinding<VariantContext> snpEffFile;
public RodBinding<VariantContext> getSnpEffRodBinding() { return snpEffFile; }
/**
* rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate.
@ -203,11 +203,13 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
}
if ( USE_ALL_ANNOTATIONS )
engine = new VariantAnnotatorEngine(this);
engine = new VariantAnnotatorEngine(this, getToolkit());
else
engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, this);
engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, this, getToolkit());
engine.initializeExpressions(expressionsToUse);
engine.invokeAnnotationInitializationMethods();
// setup the header fields
// note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();

View File

@ -26,13 +26,11 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationInterfaceManager;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
@ -49,6 +47,7 @@ public class VariantAnnotatorEngine {
private HashMap<RodBinding<VariantContext>, String> dbAnnotations = new HashMap<RodBinding<VariantContext>, String>();
private AnnotatorCompatibleWalker walker;
private GenomeAnalysisEngine toolkit;
private static class VAExpression {
@ -74,16 +73,18 @@ public class VariantAnnotatorEngine {
}
// use this constructor if you want all possible annotations
public VariantAnnotatorEngine(AnnotatorCompatibleWalker walker) {
public VariantAnnotatorEngine(AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit) {
this.walker = walker;
this.toolkit = toolkit;
requestedInfoAnnotations = AnnotationInterfaceManager.createAllInfoFieldAnnotations();
requestedGenotypeAnnotations = AnnotationInterfaceManager.createAllGenotypeAnnotations();
initializeDBs();
}
// use this constructor if you want to select specific annotations (and/or interfaces)
public VariantAnnotatorEngine(List<String> annotationGroupsToUse, List<String> annotationsToUse, AnnotatorCompatibleWalker walker) {
public VariantAnnotatorEngine(List<String> annotationGroupsToUse, List<String> annotationsToUse, AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit) {
this.walker = walker;
this.toolkit = toolkit;
initializeAnnotations(annotationGroupsToUse, annotationsToUse);
initializeDBs();
}
@ -113,6 +114,16 @@ public class VariantAnnotatorEngine {
dbAnnotations.put(rod, rod.getName());
}
public void invokeAnnotationInitializationMethods() {
for ( VariantAnnotatorAnnotation annotation : requestedInfoAnnotations ) {
annotation.initialize(walker, toolkit);
}
for ( VariantAnnotatorAnnotation annotation : requestedGenotypeAnnotations ) {
annotation.initialize(walker, toolkit);
}
}
public Set<VCFHeaderLine> getVCFAnnotationDescriptions() {
Set<VCFHeaderLine> descriptions = new HashSet<VCFHeaderLine>();

View File

@ -1,7 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.List;
@ -9,8 +8,9 @@ import java.util.List;
public interface AnnotatorCompatibleWalker {
// getter methods for various used bindings
public abstract RodBinding<SnpEffFeature> getSnpEffRodBinding();
public abstract RodBinding<VariantContext> getVariantRodBinding();
public abstract RodBinding<VariantContext> getSnpEffRodBinding();
public abstract RodBinding<VariantContext> getDbsnpRodBinding();
public abstract List<RodBinding<VariantContext>> getCompRodBindings();
public abstract List<RodBinding<VariantContext>> getResourceRodBindings();
}
}

View File

@ -24,18 +24,16 @@
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.List;
import java.util.Map;
@DocumentedGATKFeature(enable = true, groupName = "VariantAnnotator annotations", summary = "VariantAnnotator annotations")
public abstract class VariantAnnotatorAnnotation {
// return the INFO keys
public abstract List<String> getKeyNames();
// initialization method (optional for subclasses, and therefore non-abstract)
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) { }
}

View File

@ -175,21 +175,16 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
}
BeagleFeature beagleR2Feature = tracker.getFirstValue(beagleR2);
// ignore places where we don't have a variant
if ( beagleR2Feature == null )
return 0;
BeagleFeature beagleProbsFeature = tracker.getFirstValue(beagleProbs);
// ignore places where we don't have a variant
if ( beagleProbsFeature == null )
return 0;
BeagleFeature beaglePhasedFeature = tracker.getFirstValue(beaglePhased);
// ignore places where we don't have a variant
if ( beaglePhasedFeature == null )
return 0;
if ( beagleR2Feature == null || beagleProbsFeature == null || beaglePhasedFeature == null)
{
vcfWriter.add(vc_input);
return 1;
}
// get reference base for current position
byte refByte = ref.getBase();

View File

@ -69,14 +69,23 @@ import java.util.*;
* <h2>Output</h2>
* <p>
* Tables pertaining to different coverage summaries. Suffix on the table files declares the contents:
* </p><p>
* - no suffix: per locus coverage
* </p><p>
* - _summary: total, mean, median, quartiles, and threshold proportions, aggregated over all bases
* </p><p>
* - _statistics: coverage histograms (# locus with X coverage), aggregated over all bases
* </p><p>
* - _interval_summary: total, mean, median, quartiles, and threshold proportions, aggregated per interval
* </p><p>
* - _interval_statistics: 2x2 table of # of intervals covered to >= X depth in >=Y samples
* </p><p>
* - _gene_summary: total, mean, median, quartiles, and threshold proportions, aggregated per gene
* </p><p>
* - _gene_statistics: 2x2 table of # of genes covered to >= X depth in >= Y samples
* </p><p>
* - _cumulative_coverage_counts: coverage histograms (# locus with >= X coverage), aggregated over all bases
* </p><p>
* - _cumulative_coverage_proportions: proprotions of loci with >= X coverage, aggregated over all bases
* </p>
*

View File

@ -43,8 +43,10 @@ import java.util.List;
* Generates an alternative reference sequence over the specified interval.
*
* <p>
* Given variant ROD tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s).
* Additionally, allows for a "snpmask" ROD to set overlapping bases to 'N'.
* Given variant tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s).
* Additionally, allows for one or more "snpmask" VCFs to set overlapping bases to 'N'.
* Note that if there are multiple variants at a site, it takes the first one seen.
* Reference bases for each interval will be output as a separate fasta sequence (named numerically in order).
*
* <h2>Input</h2>
* <p>

View File

@ -42,6 +42,9 @@ import java.io.PrintStream;
*
* <p>
* The output format can be partially controlled using the provided command-line arguments.
* Specify intervals with the usual -L argument to output only the reference bases within your intervals.
* Overlapping intervals are automatically merged; reference bases for each disjoint interval will be output as a
* separate fasta sequence (named numerically in order).
*
* <h2>Input</h2>
* <p>

View File

@ -63,7 +63,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
private boolean SIMPLE_GREEDY_GENOTYPER = false;
private final static double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
final private ExactCalculation calcToUse;
protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
@ -178,22 +178,25 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
}
private static final double[][] getGLs(Map<String, Genotype> GLs) {
double[][] genotypeLikelihoods = new double[GLs.size()+1][];
private static final ArrayList<double[]> getGLs(Map<String, Genotype> GLs) {
ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>();
int j = 0;
//int j = 0;
genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
for ( Genotype sample : GLs.values() ) {
j++;
if ( sample.hasLikelihoods() ) {
//double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(GLs.get(sample).getLikelihoods());
genotypeLikelihoods[j] = sample.getLikelihoods().getAsVector();
double[] gls = sample.getLikelihoods().getAsVector();
if (MathUtils.sum(gls) < SUM_GL_THRESH_NOCALL)
genotypeLikelihoods.add(gls);
}
}
return genotypeLikelihoods;
}
// -------------------------------------------------------------------------------------
//
// Linearized, ~O(N), implementation.
@ -318,9 +321,9 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
public int linearExact(Map<String, Genotype> GLs,
double[] log10AlleleFrequencyPriors,
double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) {
final int numSamples = GLs.size();
final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs);
final int numSamples = genotypeLikelihoods.size()-1;
final int numChr = 2*numSamples;
final double[][] genotypeLikelihoods = getGLs(GLs);
final ExactACCache logY = new ExactACCache(numSamples+1);
logY.getkMinus0()[0] = 0.0; // the zero case
@ -334,14 +337,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
if ( k == 0 ) { // special case for k = 0
for ( int j=1; j <= numSamples; j++ ) {
kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods[j][idxAA];
kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods.get(j)[idxAA];
}
} else { // k > 0
final double[] kMinus1 = logY.getkMinus1();
final double[] kMinus2 = logY.getkMinus2();
for ( int j=1; j <= numSamples; j++ ) {
final double[] gl = genotypeLikelihoods[j];
final double[] gl = genotypeLikelihoods.get(j);
final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
double aa = Double.NEGATIVE_INFINITY;
@ -434,10 +437,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
if ( !vc.isVariant() )
throw new UserException("The VCF record passed in does not contain an ALT allele at " + vc.getChr() + ":" + vc.getStart());
boolean multiAllelicRecord = false;
if (vc.getAlternateAlleles().size() > 1)
multiAllelicRecord = true;
Map<String, Genotype> GLs = vc.getGenotypes();
double[][] pathMetricArray = new double[GLs.size()+1][AFofMaxLikelihood+1];
@ -454,7 +453,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
pathMetricArray[0][0] = 0.0;
// todo = can't deal with optimal dynamic programming solution with multiallelic records
if (SIMPLE_GREEDY_GENOTYPER || multiAllelicRecord) {
if (SIMPLE_GREEDY_GENOTYPER || !vc.isBiallelic()) {
sampleIndices.addAll(GLs.keySet());
sampleIdx = GLs.size();
}
@ -465,6 +464,17 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
continue;
double[] likelihoods = sample.getValue().getLikelihoods().getAsVector();
if (MathUtils.sum(likelihoods) > SUM_GL_THRESH_NOCALL) {
//System.out.print(sample.getKey()+":");
//for (int k=0; k < likelihoods.length; k++)
// System.out.format("%4.2f ",likelihoods[k]);
//System.out.println();
// all likelihoods are essentially the same: skip this sample and will later on force no call.
//sampleIdx++;
continue;
}
sampleIndices.add(sample.getKey());
for (int k=0; k <= AFofMaxLikelihood; k++) {
@ -504,22 +514,25 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
Genotype g = GLs.get(sample);
if ( !g.hasLikelihoods() )
continue;
if (SIMPLE_GREEDY_GENOTYPER || multiAllelicRecord)
bestGTguess = Utils.findIndexOfMaxEntry(g.getLikelihoods().getAsVector());
else {
int newIdx = tracebackArray[k][startIdx];
bestGTguess = startIdx - newIdx;
startIdx = newIdx;
}
// if all likelihoods are essentially the same: we want to force no-call. In this case, we skip this sample for now,
// and will add no-call genotype to GL's in a second pass
ArrayList<Allele> myAlleles = new ArrayList<Allele>();
double qual = Double.NEGATIVE_INFINITY;
double[] likelihoods = g.getLikelihoods().getAsVector();
if (SIMPLE_GREEDY_GENOTYPER || !vc.isBiallelic()) {
bestGTguess = Utils.findIndexOfMaxEntry(g.getLikelihoods().getAsVector());
}
else {
int newIdx = tracebackArray[k][startIdx];;
bestGTguess = startIdx - newIdx;
startIdx = newIdx;
}
/* System.out.format("Sample: %s GL:",sample);
for (int i=0; i < likelihoods.length; i++)
System.out.format("%1.4f ",likelihoods[i]);
System.out.format("%1.4f, ",likelihoods[i]);
*/
for (int i=0; i < likelihoods.length; i++) {
@ -570,6 +583,25 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
}
for ( Map.Entry<String, Genotype> sample : GLs.entrySet() ) {
if ( !sample.getValue().hasLikelihoods() )
continue;
Genotype g = GLs.get(sample.getKey());
double[] likelihoods = sample.getValue().getLikelihoods().getAsVector();
if (MathUtils.sum(likelihoods) <= SUM_GL_THRESH_NOCALL)
continue; // regular likelihoods
ArrayList<Allele> myAlleles = new ArrayList<Allele>();
double qual = Genotype.NO_NEG_LOG_10PERROR;
myAlleles.add(Allele.NO_CALL);
myAlleles.add(Allele.NO_CALL);
//System.out.println(myAlleles.toString());
calls.put(sample.getKey(), new Genotype(sample.getKey(), myAlleles, qual, null, g.getAttributes(), false));
}
return calls;
}

View File

@ -32,7 +32,9 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel;
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.genotype.Haplotype;
@ -413,16 +415,14 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
if (pileup != null ) {
double[] genotypeLikelihoods;
if (useOldWrongHorribleHackedUpLikelihoodModel)
genotypeLikelihoods = model.computeReadHaplotypeLikelihoods( pileup, haplotypeMap);
else
genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());
// which genotype likelihoods correspond to two most likely alleles? By convention, likelihood vector is ordered as for example
// for 3 alleles it's 00 01 11 02 12 22
GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
alleleList,
genotypeLikelihoods,
getFilteredDepth(pileup)));
@ -444,4 +444,16 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
return indelLikelihoodMap.get();
}
// Overload function in GenotypeLikelihoodsCalculationModel so that, for an indel case, we consider a deletion as part of the pileup,
// so that per-sample DP will include deletions covering the event.
protected int getFilteredDepth(ReadBackedPileup pileup) {
int count = 0;
for ( PileupElement p : pileup ) {
if (p.isDeletion() || BaseUtils.isRegularBase(p.getBase()) )
count++;
}
return count;
}
}

View File

@ -30,7 +30,6 @@ import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.SampleUtils;

View File

@ -38,7 +38,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -127,7 +126,8 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
@ArgumentCollection
protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
public RodBinding<VariantContext> getDbsnpRodBinding() { return dbsnp.dbsnp; }
public RodBinding<SnpEffFeature> getSnpEffRodBinding() { return null; }
public RodBinding<VariantContext> getVariantRodBinding() { return null; }
public RodBinding<VariantContext> getSnpEffRodBinding() { return null; }
public List<RodBinding<VariantContext>> getCompRodBindings() { return Collections.emptyList(); }
public List<RodBinding<VariantContext>> getResourceRodBindings() { return Collections.emptyList(); }
@ -210,7 +210,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
if ( verboseWriter != null )
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tAFposterior\tNormalizedPosterior");
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, this);
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, this, getToolkit());
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples);
// initialize the header

View File

@ -68,26 +68,59 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.*;
import java.util.*;
/**
* Tool for calling indels in Tumor-Normal paired sample mode; this tool supports single-sample mode as well,
* but this latter functionality is now superceded by UnifiedGenotyper.
*
* <p>
* This is a simple, counts-and-cutoffs based tool for calling indels from aligned (preferrably MSA cleaned) sequencing
* data. Two output formats supported are: BED format (minimal output, required), and extended output that includes read
* and mismtach statistics around the calls (tuned on with --verbose). The calls can be performed from a single/pooled sample,
* or from a matched pair of samples (with --somatic option). In the latter case, two input bam files must be specified,
* the order is important: indels are called from the second sample ("Tumor") and additionally annotated as germline
* if even a weak evidence for the same indel, not necessarily a confident call, exists in the first sample ("Normal"), or as somatic
* if first bam has coverage at the site but no indication for an indel. In the --somatic mode, BED output contains
* only somatic calls, while --verbose output contains all calls annotated with GERMLINE/SOMATIC keywords.
* data. Supported output formats are: BED format, extended verbose output (tab separated), and VCF. The latter two outputs
* include additional statistics such as mismtaches and base qualitites around the calls, read strandness (how many
* forward/reverse reads support ref and indel alleles) etc. It is highly recommended to use these additional
* statistics to perform post-filtering of the calls as the tool is tuned for sensitivity (in other words it will
* attempt to "call" anything remotely reasonable based only on read counts and will generate all the additional
* metrics for the post-processing tools to make the final decision). The calls are performed by default
* from a matched tumor-normal pair of samples. In this case, two (sets of) input bam files must be specified using tagged -I
* command line arguments: normal and tumor bam(s) must be passed with -I:normal and -I:tumor arguments,
* respectively. Indels are called from the tumor sample and annotated as germline
* if even a weak evidence for the same indel, not necessarily a confident call, exists in the normal sample, or as somatic
* if normal sample has coverage at the site but no indication for an indel. Note that strictly speaking the calling
* is not even attempted in normal sample: if there is an indel in normal that is not detected/does not pass a threshold
* in tumor sample, it will not be reported.
*
* <b>If any of the general usage of this tool or any of the command-line arguments for this tool are not clear to you,
* please email asivache at broadinstitute dot org and he will gladly explain everything in more detail.</b>
* To make indel calls and associated metrics for a single sample, this tool can be run with --unpaired flag (input
* bam tagging is not required in this case, and tags are completely ignored if still used: all input bams will be merged
* on the fly and assumed to represent a single sample - this tool does not check for sample id in the read groups).
*
* <h2>Input</h2>
* <p>
* Tumor and normal bam files (or single sample bam file(s) in --unpaired mode).
* </p>
*
* <h2>Output</h2>
* <p>
* Indel calls with associated metrics.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T SomaticIndelDetector \
* -o indels.vcf \
* -verbose indels.txt
* -I:normal normal.bam \
* -I:tumor tumor.bam
* </pre>
*
*/
@ReadFilters({Platform454Filter.class, MappingQualityZeroFilter.class, PlatformUnitFilter.class})
public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
// @Output
// PrintStream out;
@Output(doc="File to which variants should be written",required=true)
@Output(doc="File to write variants (indels) in VCF format",required=true)
protected VCFWriter vcf_writer = null;
@Argument(fullName="outputFile", shortName="O", doc="output file name (BED format). DEPRECATED> Use --bed", required=true)
@ -102,68 +135,80 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
@Hidden
@Argument(fullName = "genotype_intervals", shortName = "genotype",
doc = "Calls will be made at each position within the specified interval(s), whether there is an indel or it's the ref", required = false)
doc = "Calls will be made at each position within the specified interval(s), whether there is an indel or not", required = false)
public String genotypeIntervalsFile = null;
@Hidden
@Argument(fullName="genotypeIntervalsAreNotSorted", shortName="giNotSorted", required=false,
doc="This tool assumes that the genotyping interval list (--genotype_intervals) is sorted; "+
"if the list turns out to be unsorted, it will throw an exception. "+
"Use this argument when your interval list is not sorted to instruct the IndelGenotyper "+
"to sort and keep it in memory (increases memory usage!).")
doc="This tool assumes that the genotyping interval list (--genotype_intervals) is sorted; "+
"if the list turns out to be unsorted, it will throw an exception. "+
"Use this argument when your interval list is not sorted to instruct the IndelGenotyper "+
"to sort and keep it in memory (increases memory usage!).")
protected boolean GENOTYPE_NOT_SORTED = false;
@Hidden
@Argument(fullName="unpaired", shortName="unpaired",
doc="Perform unpaired calls (no somatic status detection)", required=false)
@Argument(fullName="unpaired", shortName="unpaired",
doc="Perform unpaired calls (no somatic status detection)", required=false)
boolean call_unpaired = false;
boolean call_somatic ;
boolean call_somatic ;
@Argument(fullName="verboseOutput", shortName="verbose",
doc="Verbose output file in text format", required=false)
java.io.File verboseOutput = null;
@Argument(fullName="verboseOutput", shortName="verbose",
doc="Verbose output file in text format", required=false)
java.io.File verboseOutput = null;
@Argument(fullName="bedOutput", shortName="bed",
doc="Lightweight bed output file (only positions and events, no stats/annotations)", required=false)
doc="Lightweight bed output file (only positions and events, no stats/annotations)", required=false)
java.io.File bedOutput = null;
@Argument(fullName="minCoverage", shortName="minCoverage",
doc="indel calls will be made only at sites with coverage of minCoverage or more reads; with --somatic this value is applied to tumor sample", required=false)
int minCoverage = 6;
@Argument(fullName="minCoverage", shortName="minCoverage",
doc="indel calls will be made only at sites with tumor coverage of minCoverage or more reads; "+
"with --unpaired (single sample) option, this value is used for minimum sample coverage", required=false)
int minCoverage = 6;
@Argument(fullName="minNormalCoverage", shortName="minNormalCoverage",
doc="used only with --somatic; normal sample must have at least minNormalCoverage or more reads at the site to call germline/somatic indel, otherwise the indel (in tumor) is ignored", required=false)
int minNormalCoverage = 4;
@Argument(fullName="minNormalCoverage", shortName="minNormalCoverage",
doc="used only in default (somatic) mode; normal sample must have at least minNormalCoverage "+
"or more reads at the site to call germline/somatic indel, otherwise the indel (in tumor) is ignored", required=false)
int minNormalCoverage = 4;
@Argument(fullName="minFraction", shortName="minFraction",
doc="Minimum fraction of reads with CONSENSUS indel at a site, out of all reads covering the site, required for making a call"+
" (fraction of non-consensus indels at the site is not considered here, see minConsensusFraction)", required=false)
double minFraction = 0.3;
@Argument(fullName="minFraction", shortName="minFraction",
doc="Minimum fraction of reads with CONSENSUS indel at a site, out of all reads covering the site, required for making a call"+
" (fraction of non-consensus indels at the site is not considered here, see minConsensusFraction)", required=false)
double minFraction = 0.3;
@Argument(fullName="minConsensusFraction", shortName="minConsensusFraction",
doc="Indel call is made only if fraction of CONSENSUS indel observations at a site wrt all indel observations at the site exceeds this threshold", required=false)
double minConsensusFraction = 0.7;
@Argument(fullName="minConsensusFraction", shortName="minConsensusFraction",
doc="Indel call is made only if fraction of CONSENSUS indel observations at a site wrt "+
"all indel observations at the site exceeds this threshold", required=false)
double minConsensusFraction = 0.7;
@Argument(fullName="minIndelCount", shortName="minCnt",
doc="Minimum count of reads supporting consensus indel required for making the call. "+
" This filter supercedes minFraction, i.e. indels with acceptable minFraction at low coverage "+
"(minIndelCount not met) will not pass.", required=false)
int minIndelCount = 0;
@Argument(fullName="minIndelCount", shortName="minCnt",
doc="Minimum count of reads supporting consensus indel required for making the call. "+
" This filter supercedes minFraction, i.e. indels with acceptable minFraction at low coverage "+
"(minIndelCount not met) will not pass.", required=false)
int minIndelCount = 0;
@Argument(fullName="refseq", shortName="refseq",
doc="Name of RefSeq transcript annotation file. If specified, indels will be annotated with GENOMIC/UTR/INTRON/CODING and with the gene name", required=false)
String RefseqFileName = null;
@Argument(fullName="refseq", shortName="refseq",
doc="Name of RefSeq transcript annotation file. If specified, indels will be annotated with "+
"GENOMIC/UTR/INTRON/CODING and with the gene name", required=false)
String RefseqFileName = null;
@Argument(fullName="blacklistedLanes", shortName="BL",
doc="Name of lanes (platform units) that should be ignored. Reads coming from these lanes will never be seen "+
"by this application, so they will not contribute indels to consider and will not be counted.", required=false)
PlatformUnitFilterHelper dummy;
@Argument(fullName="indel_debug", shortName="idebug", doc="Detailed printout for debugging, do not turn this on",required=false) Boolean DEBUG = false;
//@Argument(fullName="blacklistedLanes", shortName="BL",
// doc="Name of lanes (platform units) that should be ignored. Reads coming from these lanes will never be seen "+
// "by this application, so they will not contribute indels to consider and will not be counted.", required=false)
//PlatformUnitFilterHelper dummy;
@Hidden
@Argument(fullName="indel_debug", shortName="idebug", doc="Detailed printout for debugging, do not turn this on",
required=false) Boolean DEBUG = false;
@Argument(fullName="window_size", shortName="ws", doc="Size (bp) of the sliding window used for accumulating the coverage. "+
"May need to be increased to accomodate longer reads or longer deletions.",required=false) int WINDOW_SIZE = 200;
"May need to be increased to accomodate longer reads or longer deletions. A read can be fit into the "+
"window if its length on the reference (i.e. read length + length of deletion gap(s) if any) is smaller "+
"than the window size. Reads that do not fit will be ignored, so long deletions can not be called "+
"if window is too small",required=false) int WINDOW_SIZE = 200;
@Argument(fullName="maxNumberOfReads",shortName="mnr",doc="Maximum number of reads to cache in the window; if number of reads exceeds this number,"+
" the window will be skipped and no calls will be made from it",required=false) int MAX_READ_NUMBER = 10000;
private WindowContext tumor_context;
private WindowContext normal_context;
private int currentContigIndex = -1;

View File

@ -76,6 +76,42 @@ import java.util.Map;
* <h2>Output</h2>
* <p>
* A recalibration table file in CSV format that is used by the TableRecalibration walker.
* It is a comma-separated text file relating the desired covariates to the number of such bases and their rate of mismatch in the genome, and its implied empirical quality score.
*
* The first 20 lines of such a file is shown below.
* * The file begins with a series of comment lines describing:
* ** The number of counted loci
* ** The number of counted bases
* ** The number of skipped loci and the fraction skipped, due to presence in dbSNP or bad reference bases
*
* * After the comments appears a header line indicating which covariates were used as well as the ordering of elements in the subsequent records.
*
* * After the header, data records occur one per line until the end of the file. The first several items on a line are the values of the individual covariates and will change
* depending on which covariates were specified at runtime. The last three items are the data- that is, number of observations for this combination of covariates, number of
* reference mismatches, and the raw empirical quality score calculated by phred-scaling the mismatch rate.
*
* <pre>
* # Counted Sites 19451059
* # Counted Bases 56582018
* # Skipped Sites 82666
* # Fraction Skipped 1 / 235 bp
* ReadGroup,QualityScore,Cycle,Dinuc,nObservations,nMismatches,Qempirical
* SRR006446,11,65,CA,9,1,10
* SRR006446,11,48,TA,10,0,40
* SRR006446,11,67,AA,27,0,40
* SRR006446,11,61,GA,11,1,10
* SRR006446,12,34,CA,47,1,17
* SRR006446,12,30,GA,52,1,17
* SRR006446,12,36,AA,352,1,25
* SRR006446,12,17,TA,182,11,12
* SRR006446,11,48,TG,2,0,40
* SRR006446,11,67,AG,1,0,40
* SRR006446,12,34,CG,9,0,40
* SRR006446,12,30,GG,43,0,40
* ERR001876,4,31,AG,1,0,40
* ERR001876,4,31,AT,2,2,1
* ERR001876,4,31,CA,1,0,40
* </pre>
* </p>
*
* <h2>Examples</h2>

View File

@ -61,7 +61,7 @@ import java.util.List;
* CACGTTCGGcttgtgcagagcctcaaggtcatccagaggtgatAGTTTAGGGCCCTCTCAAGTCTTTCCNGTGCGCATGG[GT/AC*]CAGCCCTGGGCACCTGTNNNNNNNNNNNNNTGCTCATGGCCTTCTAGATTCCCAGGAAATGTCAGAGCTTTTCAAAGCCC
*</pre>
* are amplicon sequences resulting from running the tool. The flags (preceding the sequence itself) can be:
*
*<pre>
* Valid // amplicon is valid
* SITE_IS_FILTERED=1 // validation site is not marked 'PASS' or '.' in its filter field ("you are trying to validate a filtered variant")
* VARIANT_TOO_NEAR_PROBE=1 // there is a variant too near to the variant to be validated, potentially shifting the mass-spec peak
@ -72,10 +72,10 @@ import java.util.List;
* END_TOO_CLOSE, // variant is too close to the end of the amplicon region to give sequenom a good chance to find a suitable primer
* NO_VARIANTS_FOUND, // no variants found within the amplicon region
* INDEL_OVERLAPS_VALIDATION_SITE, // an insertion or deletion interferes directly with the site to be validated (i.e. insertion directly preceding or postceding, or a deletion that spans the site itself)
* </p>
* </pre></p>
*
* <h2>Examples</h2>
* <pre></pre>
* <pre>
* java
* -jar GenomeAnalysisTK.jar
* -T ValidationAmplicons

View File

@ -55,7 +55,23 @@ import java.util.*;
*
* <h2>Output</h2>
* <p>
* Evaluation tables.
* Evaluation tables detailing the results of the eval modules which were applied.
* For example:
* <pre>
* output.eval.gatkreport:
* ##:GATKReport.v0.1 CountVariants : Counts different classes of variants in the sample
* CountVariants CompRod CpG EvalRod JexlExpression Novelty nProcessedLoci nCalledLoci nRefLoci nVariantLoci variantRate ...
* CountVariants dbsnp CpG eval none all 65900028 135770 0 135770 0.00206024 ...
* CountVariants dbsnp CpG eval none known 65900028 47068 0 47068 0.00071423 ...
* CountVariants dbsnp CpG eval none novel 65900028 88702 0 88702 0.00134601 ...
* CountVariants dbsnp all eval none all 65900028 330818 0 330818 0.00502000 ...
* CountVariants dbsnp all eval none known 65900028 120685 0 120685 0.00183133 ...
* CountVariants dbsnp all eval none novel 65900028 210133 0 210133 0.00318866 ...
* CountVariants dbsnp non_CpG eval none all 65900028 195048 0 195048 0.00295976 ...
* CountVariants dbsnp non_CpG eval none known 65900028 73617 0 73617 0.00111710 ...
* CountVariants dbsnp non_CpG eval none novel 65900028 121431 0 121431 0.00184265 ...
* ...
* </pre>
* </p>
*
* <h2>Examples</h2>
@ -152,6 +168,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
@Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false)
private File ancestralAlignmentsFile = null;
@Argument(fullName="requireStrictAlleleMatch", shortName="strict", doc="If provided only comp and eval tracks with exactly matching reference and alternate alleles will be counted as overlapping", required=false)
private boolean requireStrictAlleleMatch = false;
// Variables
private Set<SortableJexlVCMatchExp> jexlExpressions = new TreeSet<SortableJexlVCMatchExp>();
@ -360,16 +379,16 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
if ( matchingComps.size() == 0 )
return null;
// find the comp which matches the alternate allele from eval
// find the comp which matches both the reference allele and alternate allele from eval
Allele altEval = eval.getAlternateAlleles().size() == 0 ? null : eval.getAlternateAllele(0);
for ( VariantContext comp : matchingComps ) {
Allele altComp = comp.getAlternateAlleles().size() == 0 ? null : comp.getAlternateAllele(0);
if ( (altEval == null && altComp == null) || (altEval != null && altEval.equals(altComp)) )
if ( (altEval == null && altComp == null) || (altEval != null && altEval.equals(altComp) && eval.getReference().equals(comp.getReference())) )
return comp;
}
// if none match, just return the first one
return matchingComps.get(0);
// if none match, just return the first one unless we require a strict match
return (requireStrictAlleleMatch ? null : matchingComps.get(0));
}
public Integer treeReduce(Integer lhs, Integer rhs) { return null; }

View File

@ -22,9 +22,6 @@ public class CompOverlap extends VariantEvaluator implements StandardEval {
@DataPoint(description = "number of eval SNP sites")
long nEvalVariants = 0;
@DataPoint(description = "number of comp SNP sites")
long nCompVariants = 0;
@DataPoint(description = "number of eval sites outside of comp sites")
long novelSites = 0;
@ -76,9 +73,8 @@ public class CompOverlap extends VariantEvaluator implements StandardEval {
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
boolean evalIsGood = eval != null && eval.isPolymorphic();
boolean compIsGood = comp != null && comp.isNotFiltered() && (eval == null || comp.getType() == eval.getType());
boolean compIsGood = comp != null && comp.isNotFiltered();
if (compIsGood) nCompVariants++; // count the number of comp events
if (evalIsGood) nEvalVariants++; // count the number of eval events
if (compIsGood && evalIsGood) {

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.SnpEff;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList;
@ -11,12 +12,19 @@ import java.util.List;
* Stratifies by nonsense, missense, silent, and all annotations in the input ROD, from the INFO field annotation.
*/
public class FunctionalClass extends VariantStratifier {
public enum FunctionalType {
silent,
missense,
nonsense
}
@Override
public void initialize() {
states.add("all");
states.add("silent");
states.add("missense");
states.add("nonsense");
for ( FunctionalType type : FunctionalType.values() )
states.add(type.name());
}
@ -26,10 +34,12 @@ public class FunctionalClass extends VariantStratifier {
relevantStates.add("all");
if (eval != null && eval.isVariant()) {
String type = null;
FunctionalType type = null;
if (eval.hasAttribute("refseq.functionalClass")) {
type = eval.getAttributeAsString("refseq.functionalClass");
try {
type = FunctionalType.valueOf(eval.getAttributeAsString("refseq.functionalClass"));
} catch ( Exception e ) {} // don't error out if the type isn't supported
} else if (eval.hasAttribute("refseq.functionalClass_1")) {
int annotationId = 1;
String key;
@ -37,24 +47,33 @@ public class FunctionalClass extends VariantStratifier {
do {
key = String.format("refseq.functionalClass_%d", annotationId);
String newtype = eval.getAttributeAsString(key);
if ( newtype != null && !newtype.equalsIgnoreCase("null") &&
( type == null ||
( type.equals("silent") && !newtype.equals("silent") ) ||
( type.equals("missense") && newtype.equals("nonsense") ) )
) {
type = newtype;
String newtypeStr = eval.getAttributeAsString(key);
if ( newtypeStr != null && !newtypeStr.equalsIgnoreCase("null") ) {
try {
FunctionalType newType = FunctionalType.valueOf(newtypeStr);
if ( type == null ||
( type == FunctionalType.silent && newType != FunctionalType.silent ) ||
( type == FunctionalType.missense && newType == FunctionalType.nonsense ) ) {
type = newType;
}
} catch ( Exception e ) {} // don't error out if the type isn't supported
}
annotationId++;
} while (eval.hasAttribute(key));
} else if ( eval.hasAttribute(SnpEff.InfoFieldKey.EFFECT_KEY.getKeyName() ) ) {
SnpEff.EffectType snpEffType = SnpEff.EffectType.valueOf(eval.getAttribute(SnpEff.InfoFieldKey.EFFECT_KEY.getKeyName()).toString());
if ( snpEffType == SnpEff.EffectType.STOP_GAINED )
type = FunctionalType.nonsense;
else if ( snpEffType == SnpEff.EffectType.NON_SYNONYMOUS_CODING )
type = FunctionalType.missense;
else if ( snpEffType == SnpEff.EffectType.SYNONYMOUS_CODING )
type = FunctionalType.silent;
}
if (type != null) {
if (type.equals("silent")) { relevantStates.add("silent"); }
else if (type.equals("missense")) { relevantStates.add("missense"); }
else if (type.equals("nonsense")) { relevantStates.add("nonsense"); }
if ( type != null ) {
relevantStates.add(type.name());
}
}

View File

@ -0,0 +1,76 @@
/*
* Copyright (c) 2011 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
/**
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: 3/12/11
*/
public class TrainingSet {
public RodBinding<VariantContext> rodBinding;
public boolean isKnown = false;
public boolean isTraining = false;
public boolean isAntiTraining = false;
public boolean isTruth = false;
public boolean isConsensus = false;
public double prior = 0.0;
protected final static Logger logger = Logger.getLogger(TrainingSet.class);
public TrainingSet( final RodBinding<VariantContext> rodBinding) {
this.rodBinding = rodBinding;
final Tags tags = rodBinding.getTags();
final String name = rodBinding.getName();
// Parse the tags to decide which tracks have which properties
if( tags != null ) {
isKnown = tags.containsKey("known") && tags.getValue("known").equals("true");
isTraining = tags.containsKey("training") && tags.getValue("training").equals("true");
isAntiTraining = tags.containsKey("bad") && tags.getValue("bad").equals("true");
isTruth = tags.containsKey("truth") && tags.getValue("truth").equals("true");
isConsensus = tags.containsKey("consensus") && tags.getValue("consensus").equals("true");
prior = ( tags.containsKey("prior") ? Double.parseDouble(tags.getValue("prior")) : prior );
}
// Report back to the user which tracks were found and the properties that were detected
if( !isConsensus && !isAntiTraining ) {
logger.info( String.format( "Found %s track: \tKnown = %s \tTraining = %s \tTruth = %s \tPrior = Q%.1f", name, isKnown, isTraining, isTruth, prior) );
} else if( isConsensus ) {
logger.info( String.format( "Found consensus track: %s", name) );
} else {
logger.info( String.format( "Found bad sites training track: %s", name) );
}
}
}

View File

@ -51,10 +51,10 @@ public class VariantDataManager {
private ExpandingArrayList<VariantDatum> data;
private final double[] meanVector;
private final double[] varianceVector; // this is really the standard deviation
public final ArrayList<String> annotationKeys;
public final List<String> annotationKeys;
private final VariantRecalibratorArgumentCollection VRAC;
protected final static Logger logger = Logger.getLogger(VariantDataManager.class);
protected final List<TrainingSet> trainingSets;
public VariantDataManager( final List<String> annotationKeys, final VariantRecalibratorArgumentCollection VRAC ) {
this.data = null;
@ -62,6 +62,7 @@ public class VariantDataManager {
this.VRAC = VRAC;
meanVector = new double[this.annotationKeys.size()];
varianceVector = new double[this.annotationKeys.size()];
trainingSets = new ArrayList<TrainingSet>();
}
public void setData( final ExpandingArrayList<VariantDatum> data ) {
@ -104,6 +105,31 @@ public class VariantDataManager {
}
}
public void addTrainingSet( final TrainingSet trainingSet ) {
trainingSets.add( trainingSet );
}
public boolean checkHasTrainingSet() {
for( final TrainingSet trainingSet : trainingSets ) {
if( trainingSet.isTraining ) { return true; }
}
return false;
}
public boolean checkHasTruthSet() {
for( final TrainingSet trainingSet : trainingSets ) {
if( trainingSet.isTruth ) { return true; }
}
return false;
}
public boolean checkHasKnownSet() {
for( final TrainingSet trainingSet : trainingSets ) {
if( trainingSet.isKnown ) { return true; }
}
return false;
}
public ExpandingArrayList<VariantDatum> getTrainingData() {
final ExpandingArrayList<VariantDatum> trainingData = new ExpandingArrayList<VariantDatum>();
for( final VariantDatum datum : data ) {
@ -232,57 +258,35 @@ public class VariantDataManager {
return value;
}
public void parseTrainingSets( final RefMetaDataTracker tracker, final GenomeLoc genomeLoc, final VariantContext evalVC, final VariantDatum datum, final boolean TRUST_ALL_POLYMORPHIC, final HashMap<String, Double> rodToPriorMap,
final List<RodBinding<VariantContext>> training, final List<RodBinding<VariantContext>> truth, final List<RodBinding<VariantContext>> known, final List<RodBinding<VariantContext>> badSites, final List<RodBinding<VariantContext>> resource) {
public void parseTrainingSets( final RefMetaDataTracker tracker, final GenomeLoc genomeLoc, final VariantContext evalVC, final VariantDatum datum, final boolean TRUST_ALL_POLYMORPHIC ) {
datum.isKnown = false;
datum.atTruthSite = false;
datum.atTrainingSite = false;
datum.atAntiTrainingSite = false;
datum.prior = 2.0;
//BUGBUG: need to clean this up
for( final RodBinding<VariantContext> rod : training ) {
for( final VariantContext trainVC : tracker.getValues(rod, genomeLoc) ) {
for( final TrainingSet trainingSet : trainingSets ) {
for( final VariantContext trainVC : tracker.getValues(trainingSet.rodBinding, genomeLoc) ) {
if( isValidVariant( evalVC, trainVC, TRUST_ALL_POLYMORPHIC ) ) {
datum.atTrainingSite = true;
datum.prior = Math.max( datum.prior, (rodToPriorMap.containsKey(rod.getName()) ? rodToPriorMap.get(rod.getName()) : 0.0) );
datum.isKnown = datum.isKnown || trainingSet.isKnown;
datum.atTruthSite = datum.atTruthSite || trainingSet.isTruth;
datum.atTrainingSite = datum.atTrainingSite || trainingSet.isTraining;
datum.prior = Math.max( datum.prior, trainingSet.prior );
datum.consensusCount += ( trainingSet.isConsensus ? 1 : 0 );
}
}
}
for( final RodBinding<VariantContext> rod : truth ) {
for( final VariantContext trainVC : tracker.getValues(rod, genomeLoc) ) {
if( isValidVariant( evalVC, trainVC, TRUST_ALL_POLYMORPHIC ) ) {
datum.atTruthSite = true;
datum.prior = Math.max( datum.prior, (rodToPriorMap.containsKey(rod.getName()) ? rodToPriorMap.get(rod.getName()) : 0.0) );
}
}
}
for( final RodBinding<VariantContext> rod : known ) {
for( final VariantContext trainVC : tracker.getValues(rod, genomeLoc) ) {
if( isValidVariant( evalVC, trainVC, TRUST_ALL_POLYMORPHIC ) ) {
datum.isKnown = true;
datum.prior = Math.max( datum.prior, (rodToPriorMap.containsKey(rod.getName()) ? rodToPriorMap.get(rod.getName()) : 0.0) );
}
}
}
for( final RodBinding<VariantContext> rod : resource ) {
for( final VariantContext trainVC : tracker.getValues(rod, genomeLoc) ) {
if( isValidVariant( evalVC, trainVC, TRUST_ALL_POLYMORPHIC ) ) {
datum.prior = Math.max( datum.prior, (rodToPriorMap.containsKey(rod.getName()) ? rodToPriorMap.get(rod.getName()) : 0.0) );
}
}
}
for( final RodBinding<VariantContext> rod : badSites ) {
for( final VariantContext trainVC : tracker.getValues(rod, genomeLoc) ) {
if( trainVC != null ) {
datum.atAntiTrainingSite = true;
datum.prior = Math.max( datum.prior, (rodToPriorMap.containsKey(rod.getName()) ? rodToPriorMap.get(rod.getName()) : 0.0) );
datum.atAntiTrainingSite = datum.atAntiTrainingSite || trainingSet.isAntiTraining;
}
}
}
}
private boolean isValidVariant( final VariantContext evalVC, final VariantContext trainVC, final boolean TRUST_ALL_POLYMORPHIC) {
return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() &&
((evalVC.isSNP() && trainVC.isSNP()) || ((evalVC.isIndel()||evalVC.isMixed()) && (trainVC.isIndel()||trainVC.isMixed()))) &&
(TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphic());
}
public void writeOutRecalibrationTable( final PrintStream RECAL_FILE ) {
for( final VariantDatum datum : data ) {
RECAL_FILE.println(String.format("%s,%d,%d,%.4f,%s",
@ -290,10 +294,4 @@ public class VariantDataManager {
(datum.worstAnnotation != -1 ? annotationKeys.get(datum.worstAnnotation) : "NULL")));
}
}
private boolean isValidVariant( final VariantContext evalVC, final VariantContext trainVC, final boolean TRUST_ALL_POLYMORPHIC) {
return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() &&
((evalVC.isSNP() && trainVC.isSNP()) || ((evalVC.isIndel()||evalVC.isMixed()) && (trainVC.isIndel()||trainVC.isMixed()))) &&
(TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphic());
}
}

View File

@ -77,16 +77,15 @@ import java.util.*;
* <p>
* A tranches file which shows various metrics of the recalibration callset as a function of making several slices through the data.
*
* <h2>Examples</h2>
* <h2>Example</h2>
* <pre>
* java -Xmx4g -jar GenomeAnalysisTK.jar \
* -T VariantRecalibrator \
* -R reference/human_g1k_v37.fasta \
* -input NA12878.HiSeq.WGS.bwa.cleaned.raw.hg19.subset.vcf \
* -truth:prior=15.0 hapmap_3.3.b37.sites.vcf \
* -training:prior=15.0 hapmap_3.3.b37.sites.vcf \
* -training:prior=12.0 1000G_omni2.5.b37.sites.vcf \
* -known:prior=8.0 dbsnp_132.b37.vcf \
* -resource:hapmap,known=false,training=true,truth=true,prior=15.0 hapmap_3.3.b37.sites.vcf \
* -resource:omni,known=false,training=true,truth=false,prior=12.0 1000G_omni2.5.b37.sites.vcf \
* -resource:dbsnp,known=true,training=false,truth=false,prior=8.0 dbsnp_132.b37.vcf \
* -an QD -an HaplotypeScore -an MQRankSum -an ReadPosRankSum -an FS -an MQ \
* -recalFile path/to/output.recal \
* -tranchesFile path/to/output.tranches \
@ -112,34 +111,11 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
public List<RodBinding<VariantContext>> input;
/**
* Input variants which are found to overlap with these training sites are used to build the Gaussian mixture model.
*/
@Input(fullName="training", shortName = "training", doc="A list of training variants used to train the Gaussian mixture model", required=true)
public List<RodBinding<VariantContext>> training;
/**
* When deciding where to set the cutoff in VQSLOD sensitivity to these truth sites is used.
* Typically one might want to say I dropped my threshold until I got back 99% of HapMap sites, for example.
*/
@Input(fullName="truth", shortName = "truth", doc="A list of true variants to be used when deciding the truth sensitivity cut of the final callset", required=true)
public List<RodBinding<VariantContext>> truth;
/**
* The known / novel status of a variant isn't used by the algorithm itself and is only used for reporting / display purposes.
* The output metrics are stratified by known status in order to aid in comparisons with other call sets.
*/
@Input(fullName="known", shortName = "known", doc="A list of known variants to be used for metric comparison purposes", required=false)
public List<RodBinding<VariantContext>> known = Collections.emptyList();
/**
* In addition to using the worst 3% of variants as compared to the Gaussian mixture model, we can also supplement the list
* with a database of known bad variants. Maybe these are loci which are frequently filtered out in many projects (centromere, for example).
*/
@Input(fullName="badSites", shortName = "badSites", doc="A list of known bad variants used to supplement training the negative model", required=false)
public List<RodBinding<VariantContext>> badSites = Collections.emptyList();
/**
* Any set of sites for which you would like to apply a prior probability but for which you don't want to use as training, truth, or known sites.
* Any set of VCF files to use as lists of training, truth, or known sites.
* Training - Input variants which are found to overlap with these training sites are used to build the Gaussian mixture model.
* Truth - When deciding where to set the cutoff in VQSLOD sensitivity to these truth sites is used.
* Known - The known / novel status of a variant isn't used by the algorithm itself and is only used for reporting / display purposes.
* Bad - In addition to using the worst 3% of variants as compared to the Gaussian mixture model, we can also supplement the list with a database of known bad variants.
*/
@Input(fullName="resource", shortName = "resource", doc="A list of sites for which to apply a prior probability of being correct but which aren't used by the algorithm", required=false)
public List<RodBinding<VariantContext>> resource = Collections.emptyList();
@ -205,7 +181,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
private PrintStream tranchesStream;
private final Set<String> ignoreInputFilterSet = new TreeSet<String>();
private final VariantRecalibratorEngine engine = new VariantRecalibratorEngine( VRAC );
private final HashMap<String, Double> rodToPriorMap = new HashMap<String, Double>();
//---------------------------------------------------------------------------------------------------------------
//
@ -227,18 +202,15 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
throw new UserException.CouldNotCreateOutputFile(TRANCHES_FILE, e);
}
final ArrayList<RodBinding<VariantContext>> allInputBindings = new ArrayList<RodBinding<VariantContext>>();
allInputBindings.addAll(truth);
allInputBindings.addAll(training);
allInputBindings.addAll(known);
allInputBindings.addAll(badSites);
allInputBindings.addAll(resource);
for( final RodBinding<VariantContext> rod : allInputBindings ) {
try {
rodToPriorMap.put(rod.getName(), (rod.getTags().containsKey("prior") ? Double.parseDouble(rod.getTags().getValue("prior")) : 0.0) );
} catch( NumberFormatException e ) {
throw new UserException.BadInput("Bad rod binding syntax. Prior key-value tag detected but isn't parsable. Expecting something like -training:prior=12.0 my.set.vcf");
}
for( RodBinding<VariantContext> rod : resource ) {
dataManager.addTrainingSet( new TrainingSet( rod ) );
}
if( !dataManager.checkHasTrainingSet() ) {
throw new UserException.CommandLineException( "No training set found! Please provide sets of known polymorphic loci marked with the training=true ROD binding tag. For example, -B:hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf" );
}
if( !dataManager.checkHasTruthSet() ) {
throw new UserException.CommandLineException( "No truth set found! Please provide sets of known polymorphic loci marked with the truth=true ROD binding tag. For example, -B:hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf" );
}
}
@ -270,7 +242,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
datum.isTransition = datum.isSNP && VariantContextUtils.isTransition(vc);
// Loop through the training data sets and if they overlap this loci then update the prior and training status appropriately
dataManager.parseTrainingSets( tracker, context.getLocation(), vc, datum, TRUST_ALL_POLYMORPHIC, rodToPriorMap, training, truth, known, badSites, resource ); // BUGBUG: need to clean this up to be a class, not a list of all the rod bindings
dataManager.parseTrainingSets( tracker, context.getLocation(), vc, datum, TRUST_ALL_POLYMORPHIC );
double priorFactor = QualityUtils.qualToProb( datum.prior );
//if( PERFORM_PROJECT_CONSENSUS ) { // BUGBUG: need to resurrect this functionality?
// final double consensusPrior = QualityUtils.qualToProb( 1.0 + 5.0 * datum.consensusCount );

View File

@ -234,16 +234,47 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
if (minimumN > 1 && (vcs.size() - numFilteredRecords < minimumN))
return 0;
List<VariantContext> mergedVCs = new ArrayList<VariantContext>();
List<VariantContext> preMergedVCs = new ArrayList<VariantContext>();
Map<VariantContext.Type, List<VariantContext>> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
// iterate over the types so that it's deterministic
for ( VariantContext.Type type : VariantContext.Type.values() ) {
if ( VCsByType.containsKey(type) )
mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
preMergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
}
List<VariantContext> mergedVCs = new ArrayList<VariantContext>();
// se have records merged but separated by type. If a particular record is for example a snp but all alleles are a subset of an existing mixed record,
// we will still merge those records.
if (preMergedVCs.size() > 1) {
for (VariantContext vc1 : preMergedVCs) {
VariantContext newvc = vc1;
boolean merged = false;
for (int k=0; k < mergedVCs.size(); k++) {
VariantContext vc2 = mergedVCs.get(k);
if (VariantContextUtils.allelesAreSubset(vc1,vc2) || VariantContextUtils.allelesAreSubset(vc2,vc1)) {
// all alleles of vc1 are contained in vc2 but they are of different type (say, vc1 is snp, vc2 is complex): try to merget v1 into v2
List<VariantContext> vcpair = new ArrayList<VariantContext>();
vcpair.add(vc1);
vcpair.add(vc2);
newvc = VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), vcpair,
priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC);
mergedVCs.set(k,newvc);
merged = true;
break;
}
}
if (!merged)
mergedVCs.add(vc1);
}
}
else {
mergedVCs = preMergedVCs;
}
for ( VariantContext mergedVC : mergedVCs ) {
// only operate at the start of events
if ( mergedVC == null )

View File

@ -145,10 +145,9 @@ import java.util.*;
* -R ref.fasta \
* -T SelectVariants \
* --variant input.vcf \
* -o output.vcf \
* -SM family.yaml \
* -family NA12891+NA12892=NA12878 \
* -mvq 50
* -mvq 50 \
* -o violations.vcf
*
* Creating a sample of exactly 1000 variants randomly chosen with equal probability from the variant VCF:
* java -Xmx2g -jar GenomeAnalysisTK.jar \
@ -265,17 +264,17 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
private File AF_FILE = new File("");
@Hidden
@Argument(fullName="family_structure_file", shortName="familyFile", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false)
@Argument(fullName="family_structure_file", shortName="familyFile", doc="use -family unless you know what you're doing", required=false)
private File FAMILY_STRUCTURE_FILE = null;
/**
* String formatted as dad+mom=child where these parameters determine which sample names are examined.
*/
@Argument(fullName="family_structure", shortName="family", doc="Deprecated; use the -SM argument instead", required=false)
@Argument(fullName="family_structure", shortName="family", doc="string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false)
private String FAMILY_STRUCTURE = "";
/**
* Sample metadata information will be taken from a YAML file (see the -SM argument).
* This activates the mendelian violation module that will select all variants that correspond to a mendelian violation following the rules given by the family structure.
*/
@Argument(fullName="mendelianViolation", shortName="mv", doc="output mendelian violation sites only", required=false)
private Boolean MENDELIAN_VIOLATIONS = false;
@ -306,7 +305,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
@Hidden
@Argument(fullName="outMVFile", shortName="outMVFile", doc="USE YAML FILE INSTEAD (-SM) !!! string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false)
@Argument(fullName="outMVFile", shortName="outMVFile", doc="", required=false)
private String outMVFile = null;
/* Private class used to store the intermediate variants in the integer random selection process */

View File

@ -56,11 +56,6 @@ import java.util.Set;
* A variant set to filter.
* </p>
*
* <h2>Output</h2>
* <p>
* A filtered VCF.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \

View File

@ -41,7 +41,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
import java.util.*;
/**
* Annotates a validation (from e.g. Sequenom) VCF with QC metrics (HW-equilibrium, % failed probes)
* Annotates a validation (from Sequenom for example) VCF with QC metrics (HW-equilibrium, % failed probes)
*
* <p>
* The Variant Validation Assessor is a tool for vetting/assessing validation data (containing genotypes).
@ -57,7 +57,16 @@ import java.util.*;
*
* <h2>Output</h2>
* <p>
* An annotated VCF.
* An annotated VCF. Additionally, a table like the following will be output:
* <pre>
* Total number of samples assayed: 185
* Total number of records processed: 152
* Number of Hardy-Weinberg violations: 34 (22%)
* Number of no-call violations: 12 (7%)
* Number of homozygous variant violations: 0 (0%)
* Number of records passing all filters: 106 (69%)
* Number of passing records that are polymorphic: 98 (92%)
* </pre>
* </p>
*
* <h2>Examples</h2>

View File

@ -1056,42 +1056,30 @@ public class MathUtils {
}
static public double softMax(final double x, final double y) {
if (Double.isInfinite(x))
return y;
// we need to compute log10(10^x + 10^y)
// By Jacobian logarithm identity, this is equal to
// max(x,y) + log10(1+10^-abs(x-y))
// we compute the second term as a table lookup
// with integer quantization
if (Double.isInfinite(y))
return x;
// slow exact version:
// return Math.log10(Math.pow(10.0,x) + Math.pow(10.0,y));
if (y >= x + MAX_JACOBIAN_TOLERANCE)
return y;
if (x >= y + MAX_JACOBIAN_TOLERANCE)
return x;
double diff = x-y;
// OK, so |y-x| < tol: we use the following identity then:
// we need to compute log10(10^x + 10^y)
// By Jacobian logarithm identity, this is equal to
// max(x,y) + log10(1+10^-abs(x-y))
// we compute the second term as a table lookup
// with integer quantization
//double diff = Math.abs(x-y);
double diff = x-y;
double t1 =x;
if (diff<0) { //
t1 = y;
diff= -diff;
}
// t has max(x,y), diff has abs(x-y)
// we have pre-stored correction for 0,0.1,0.2,... 10.0
//int ind = (int)Math.round(diff*INV_JACOBIAN_LOG_TABLE_STEP);
int ind = (int)(diff*INV_JACOBIAN_LOG_TABLE_STEP+0.5);
// gdebug+
//double z =Math.log10(1+Math.pow(10.0,-diff));
//System.out.format("x: %f, y:%f, app: %f, true: %f ind:%d\n",x,y,t2,z,ind);
//gdebug-
return t1+jacobianLogTable[ind];
// return Math.log10(Math.pow(10.0,x) + Math.pow(10.0,y));
}
if (diff > MAX_JACOBIAN_TOLERANCE)
return x;
else if (diff < -MAX_JACOBIAN_TOLERANCE)
return y;
else if (diff >= 0) {
int ind = (int)(diff*INV_JACOBIAN_LOG_TABLE_STEP+0.5);
return x + jacobianLogTable[ind];
}
else {
int ind = (int)(-diff*INV_JACOBIAN_LOG_TABLE_STEP+0.5);
return y + jacobianLogTable[ind];
}
}
public static double phredScaleToProbability (byte q) {
return Math.pow(10,(-q)/10.0);

View File

@ -1,282 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.codecs.snpEff;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.TribbleException;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.LineReader;
import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec;
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType;
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType;
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
/**
* Codec for decoding the output format of the SnpEff variant effect predictor tool
*
* <p>
* This format has 23 tab-delimited fields:
*
* <pre>
* Chromosome
* Position
* Reference
* Change
* Change Type: {SNP, MNP, INS, DEL}
* Zygosity: {Hom, Het}
* Quality
* Coverage
* Warnings
* Gene ID
* Gene Name
* Bio Type
* Transcript ID
* Exon ID
* Exon Rank
* Effect
* Old/New Amino Acid
* Old/New Codon
* Codon Num
* CDS Size
* Codons Around
* Amino Acids Around
* Custom Interval ID
* </pre>
* Note that we treat all except the Chromosome, Position, and Effect fields as optional.
* </p>
*
* <p>
* See also: @see <a href="http://snpeff.sourceforge.net/">SNPEff project page</a>
* </p>
*
* @author David Roazen
* @since 2011
*/
public class SnpEffCodec implements FeatureCodec, SelfScopingFeatureCodec {
public static final int EXPECTED_NUMBER_OF_FIELDS = 23;
public static final String FIELD_DELIMITER_PATTERN = "\\t";
public static final String EFFECT_FIELD_DELIMITER_PATTERN = "[,:]";
public static final String HEADER_LINE_START = "# ";
public static final String[] HEADER_FIELD_NAMES = { "Chromo",
"Position",
"Reference",
"Change",
"Change type",
"Homozygous",
"Quality",
"Coverage",
"Warnings",
"Gene_ID",
"Gene_name",
"Bio_type",
"Trancript_ID", // yes, this is how it's spelled in the SnpEff output
"Exon_ID",
"Exon_Rank",
"Effect",
"old_AA/new_AA",
"Old_codon/New_codon",
"Codon_Num(CDS)",
"CDS_size",
"Codons around",
"AAs around",
"Custom_interval_ID"
};
// The "Chromo", "Position", and "Effect" fields are required to be non-empty in every SnpEff output line:
public static final int[] REQUIRED_FIELDS = { 0, 1, 15 };
public static final String NON_CODING_GENE_FLAG = "WITHIN_NON_CODING_GENE";
public Feature decodeLoc ( String line ) {
return decode(line);
}
public Feature decode ( String line ) {
String[] tokens = line.split(FIELD_DELIMITER_PATTERN, -1);
if ( tokens.length != EXPECTED_NUMBER_OF_FIELDS ) {
throw new TribbleException.InvalidDecodeLine("Line does not have the expected (" + EXPECTED_NUMBER_OF_FIELDS +
") number of fields: found " + tokens.length + " fields.", line);
}
try {
trimAllFields(tokens);
checkForRequiredFields(tokens, line);
String contig = tokens[0];
long position = Long.parseLong(tokens[1]);
String reference = tokens[2].isEmpty() ? null : tokens[2];
String change = tokens[3].isEmpty() ? null : tokens[3];
ChangeType changeType = tokens[4].isEmpty() ? null : ChangeType.valueOf(tokens[4]);
Zygosity zygosity = tokens[5].isEmpty() ? null : Zygosity.valueOf(tokens[5]);
Double quality = tokens[6].isEmpty() ? null : Double.parseDouble(tokens[6]);
Long coverage = tokens[7].isEmpty() ? null : Long.parseLong(tokens[7]);
String warnings = tokens[8].isEmpty() ? null : tokens[8];
String geneID = tokens[9].isEmpty() ? null : tokens[9];
String geneName = tokens[10].isEmpty() ? null : tokens[10];
String bioType = tokens[11].isEmpty() ? null : tokens[11];
String transcriptID = tokens[12].isEmpty() ? null : tokens[12];
String exonID = tokens[13].isEmpty() ? null : tokens[13];
Integer exonRank = tokens[14].isEmpty() ? null : Integer.parseInt(tokens[14]);
boolean isNonCodingGene = isNonCodingGene(tokens[15]);
// Split the effect field into three subfields if the WITHIN_NON_CODING_GENE flag is present,
// otherwise split it into two subfields. We need this limit to prevent the extra effect-related information
// in the final field (when present) from being inappropriately tokenized:
int effectFieldTokenLimit = isNonCodingGene ? 3 : 2;
String[] effectFieldTokens = tokens[15].split(EFFECT_FIELD_DELIMITER_PATTERN, effectFieldTokenLimit);
EffectType effect = parseEffect(effectFieldTokens, isNonCodingGene);
String effectExtraInformation = parseEffectExtraInformation(effectFieldTokens, isNonCodingGene);
String oldAndNewAA = tokens[16].isEmpty() ? null : tokens[16];
String oldAndNewCodon = tokens[17].isEmpty() ? null : tokens[17];
Integer codonNum = tokens[18].isEmpty() ? null : Integer.parseInt(tokens[18]);
Integer cdsSize = tokens[19].isEmpty() ? null : Integer.parseInt(tokens[19]);
String codonsAround = tokens[20].isEmpty() ? null : tokens[20];
String aasAround = tokens[21].isEmpty() ? null : tokens[21];
String customIntervalID = tokens[22].isEmpty() ? null : tokens[22];
return new SnpEffFeature(contig, position, reference, change, changeType, zygosity, quality, coverage,
warnings, geneID, geneName, bioType, transcriptID, exonID, exonRank, isNonCodingGene,
effect, effectExtraInformation, oldAndNewAA, oldAndNewCodon, codonNum, cdsSize,
codonsAround, aasAround, customIntervalID);
}
catch ( NumberFormatException e ) {
throw new TribbleException.InvalidDecodeLine("Error parsing a numeric field : " + e.getMessage(), line);
}
catch ( IllegalArgumentException e ) {
throw new TribbleException.InvalidDecodeLine("Illegal value in field: " + e.getMessage(), line);
}
}
private void trimAllFields ( String[] tokens ) {
for ( int i = 0; i < tokens.length; i++ ) {
tokens[i] = tokens[i].trim();
}
}
private void checkForRequiredFields ( String[] tokens, String line ) {
for ( int requiredFieldIndex : REQUIRED_FIELDS ) {
if ( tokens[requiredFieldIndex].isEmpty() ) {
throw new TribbleException.InvalidDecodeLine("Line is missing required field \"" +
HEADER_FIELD_NAMES[requiredFieldIndex] + "\"",
line);
}
}
}
private boolean isNonCodingGene ( String effectField ) {
return effectField.startsWith(NON_CODING_GENE_FLAG);
}
private EffectType parseEffect ( String[] effectFieldTokens, boolean isNonCodingGene ) {
String effectName = "";
// If there's a WITHIN_NON_CODING_GENE flag, the effect name will be in the second subfield,
// otherwise it will be in the first subfield:
if ( effectFieldTokens.length > 1 && isNonCodingGene ) {
effectName = effectFieldTokens[1].trim();
}
else {
effectName = effectFieldTokens[0].trim();
}
return EffectType.valueOf(effectName);
}
private String parseEffectExtraInformation ( String[] effectFieldTokens, boolean isNonCodingGene ) {
// The extra effect-related information, if present, will always be the last subfield:
if ( (effectFieldTokens.length == 2 && ! isNonCodingGene) || effectFieldTokens.length == 3 ) {
return effectFieldTokens[effectFieldTokens.length - 1].trim();
}
return null;
}
public Class<SnpEffFeature> getFeatureType() {
return SnpEffFeature.class;
}
public Object readHeader ( LineReader reader ) {
String headerLine = "";
try {
headerLine = reader.readLine();
}
catch ( IOException e ) {
throw new TribbleException("Unable to read header line from input file.");
}
validateHeaderLine(headerLine);
return headerLine;
}
private void validateHeaderLine ( String headerLine ) {
if ( headerLine == null || ! headerLine.startsWith(HEADER_LINE_START) ) {
throw new TribbleException.InvalidHeader("Header line does not start with " + HEADER_LINE_START);
}
String[] headerTokens = headerLine.substring(HEADER_LINE_START.length()).split(FIELD_DELIMITER_PATTERN);
if ( headerTokens.length != EXPECTED_NUMBER_OF_FIELDS ) {
throw new TribbleException.InvalidHeader("Header line does not contain headings for the expected number (" +
EXPECTED_NUMBER_OF_FIELDS + ") of columns.");
}
for ( int columnIndex = 0; columnIndex < headerTokens.length; columnIndex++ ) {
if ( ! HEADER_FIELD_NAMES[columnIndex].equals(headerTokens[columnIndex]) ) {
throw new TribbleException.InvalidHeader("Header field #" + columnIndex + ": Expected \"" +
HEADER_FIELD_NAMES[columnIndex] + "\" but found \"" +
headerTokens[columnIndex] + "\"");
}
}
}
public boolean canDecode ( final File potentialInput ) {
try {
LineReader reader = new AsciiLineReader(new FileInputStream(potentialInput));
readHeader(reader);
}
catch ( Exception e ) {
return false;
}
return true;
}
}

View File

@ -1,115 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.codecs.snpEff;
/**
* A set of constants associated with the SnpEff codec.
*
* @author David Roazen
*/
public class SnpEffConstants {
// Possible SnpEff biological effects and their associated impacts:
public enum EffectType {
START_GAINED (EffectImpact.HIGH),
START_LOST (EffectImpact.HIGH),
EXON_DELETED (EffectImpact.HIGH),
FRAME_SHIFT (EffectImpact.HIGH),
STOP_GAINED (EffectImpact.HIGH),
STOP_LOST (EffectImpact.HIGH),
SPLICE_SITE_ACCEPTOR (EffectImpact.HIGH),
SPLICE_SITE_DONOR (EffectImpact.HIGH),
NON_SYNONYMOUS_CODING (EffectImpact.MODERATE),
UTR_5_DELETED (EffectImpact.MODERATE),
UTR_3_DELETED (EffectImpact.MODERATE),
CODON_INSERTION (EffectImpact.MODERATE),
CODON_CHANGE_PLUS_CODON_INSERTION (EffectImpact.MODERATE),
CODON_DELETION (EffectImpact.MODERATE),
CODON_CHANGE_PLUS_CODON_DELETION (EffectImpact.MODERATE),
NONE (EffectImpact.LOW),
CHROMOSOME (EffectImpact.LOW),
INTERGENIC (EffectImpact.LOW),
UPSTREAM (EffectImpact.LOW),
UTR_5_PRIME (EffectImpact.LOW),
SYNONYMOUS_START (EffectImpact.LOW),
NON_SYNONYMOUS_START (EffectImpact.LOW),
CDS (EffectImpact.LOW),
GENE (EffectImpact.LOW),
TRANSCRIPT (EffectImpact.LOW),
EXON (EffectImpact.LOW),
SYNONYMOUS_CODING (EffectImpact.LOW),
CODON_CHANGE (EffectImpact.LOW),
SYNONYMOUS_STOP (EffectImpact.LOW),
NON_SYNONYMOUS_STOP (EffectImpact.LOW),
INTRON (EffectImpact.LOW),
UTR_3_PRIME (EffectImpact.LOW),
DOWNSTREAM (EffectImpact.LOW),
INTRON_CONSERVED (EffectImpact.LOW),
INTERGENIC_CONSERVED (EffectImpact.LOW),
CUSTOM (EffectImpact.LOW);
private final EffectImpact impact;
EffectType ( EffectImpact impact ) {
this.impact = impact;
}
public EffectImpact getImpact() {
return impact;
}
}
public enum EffectImpact {
LOW (1),
MODERATE (2),
HIGH (3);
private final int severityRating;
EffectImpact ( int severityRating ) {
this.severityRating = severityRating;
}
public boolean isHigherImpactThan ( EffectImpact other ) {
return this.severityRating > other.severityRating;
}
}
// The kinds of variants supported by the SnpEff output format:
public enum ChangeType {
SNP,
MNP,
INS,
DEL
}
// Possible zygosities of SnpEff variants:
public enum Zygosity {
Hom,
Het
}
}

View File

@ -1,423 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.codecs.snpEff;
import org.broad.tribble.Feature;
import java.util.NoSuchElementException;
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType;
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectImpact;
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType;
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity;
/**
* Feature returned by the SnpEff codec -- stores the parsed field values from a line of SnpEff output.
*
* Many fields are optional, and missing values are represented by nulls. You should always call the
* hasX() method before calling the corresponding getX() method. Required fields can never be null
* and do not have a hasX() method.
*
* @author David Roazen
*/
public class SnpEffFeature implements Feature {
private String contig; // REQUIRED FIELD
private long position; // REQUIRED FIELD
private String reference;
private String change;
private ChangeType changeType;
private Zygosity zygosity;
private Double quality;
private Long coverage;
private String warnings;
private String geneID;
private String geneName;
private String bioType;
private String transcriptID;
private String exonID;
private Integer exonRank;
private boolean isNonCodingGene; // REQUIRED FIELD
private EffectType effect; // REQUIRED FIELD
private String effectExtraInformation;
private String oldAndNewAA;
private String oldAndNewCodon;
private Integer codonNum;
private Integer cdsSize;
private String codonsAround;
private String aasAround;
private String customIntervalID;
public SnpEffFeature ( String contig,
long position,
String reference,
String change,
ChangeType changeType,
Zygosity zygosity,
Double quality,
Long coverage,
String warnings,
String geneID,
String geneName,
String bioType,
String transcriptID,
String exonID,
Integer exonRank,
boolean isNonCodingGene,
EffectType effect,
String effectExtraInformation,
String oldAndNewAA,
String oldAndNewCodon,
Integer codonNum,
Integer cdsSize,
String codonsAround,
String aasAround,
String customIntervalID ) {
if ( contig == null || effect == null ) {
throw new IllegalArgumentException("contig and effect cannot be null, as they are required fields");
}
this.contig = contig;
this.position = position;
this.reference = reference;
this.change = change;
this.changeType = changeType;
this.zygosity = zygosity;
this.quality = quality;
this.coverage = coverage;
this.warnings = warnings;
this.geneID = geneID;
this.geneName = geneName;
this.bioType = bioType;
this.transcriptID = transcriptID;
this.exonID = exonID;
this.exonRank = exonRank;
this.isNonCodingGene = isNonCodingGene;
this.effect = effect;
this.effectExtraInformation = effectExtraInformation;
this.oldAndNewAA = oldAndNewAA;
this.oldAndNewCodon = oldAndNewCodon;
this.codonNum = codonNum;
this.cdsSize = cdsSize;
this.codonsAround = codonsAround;
this.aasAround = aasAround;
this.customIntervalID = customIntervalID;
}
public boolean isHigherImpactThan ( SnpEffFeature other ) {
// If one effect is in a non-coding gene and the other is not, the effect NOT in the
// non-coding gene has higher impact:
if ( ! isNonCodingGene() && other.isNonCodingGene() ) {
return true;
}
else if ( isNonCodingGene() && ! other.isNonCodingGene() ) {
return false;
}
// Otherwise, both effects are either in or not in a non-coding gene, so we compare the impacts
// of the effects themselves as defined in the SnpEffConstants class:
return getEffectImpact().isHigherImpactThan(other.getEffectImpact());
}
public String getChr() {
return contig;
}
public int getStart() {
return (int)position;
}
public int getEnd() {
return (int)position;
}
public boolean hasReference() {
return reference != null;
}
public String getReference() {
if ( reference == null ) throw new NoSuchElementException("This feature has no reference field");
return reference;
}
public boolean hasChange() {
return change != null;
}
public String getChange() {
if ( change == null ) throw new NoSuchElementException("This feature has no change field");
return change;
}
public boolean hasChangeType() {
return changeType != null;
}
public ChangeType getChangeType() {
if ( changeType == null ) throw new NoSuchElementException("This feature has no changeType field");
return changeType;
}
public boolean hasZygosity() {
return zygosity != null;
}
public Zygosity getZygosity() {
if ( zygosity == null ) throw new NoSuchElementException("This feature has no zygosity field");
return zygosity;
}
public boolean hasQuality() {
return quality != null;
}
public Double getQuality() {
if ( quality == null ) throw new NoSuchElementException("This feature has no quality field");
return quality;
}
public boolean hasCoverage() {
return coverage != null;
}
public Long getCoverage() {
if ( coverage == null ) throw new NoSuchElementException("This feature has no coverage field");
return coverage;
}
public boolean hasWarnings() {
return warnings != null;
}
public String getWarnings() {
if ( warnings == null ) throw new NoSuchElementException("This feature has no warnings field");
return warnings;
}
public boolean hasGeneID() {
return geneID != null;
}
public String getGeneID() {
if ( geneID == null ) throw new NoSuchElementException("This feature has no geneID field");
return geneID;
}
public boolean hasGeneName() {
return geneName != null;
}
public String getGeneName() {
if ( geneName == null ) throw new NoSuchElementException("This feature has no geneName field");
return geneName;
}
public boolean hasBioType() {
return bioType != null;
}
public String getBioType() {
if ( bioType == null ) throw new NoSuchElementException("This feature has no bioType field");
return bioType;
}
public boolean hasTranscriptID() {
return transcriptID != null;
}
public String getTranscriptID() {
if ( transcriptID == null ) throw new NoSuchElementException("This feature has no transcriptID field");
return transcriptID;
}
public boolean hasExonID() {
return exonID != null;
}
public String getExonID() {
if ( exonID == null ) throw new NoSuchElementException("This feature has no exonID field");
return exonID;
}
public boolean hasExonRank() {
return exonRank != null;
}
public Integer getExonRank() {
if ( exonRank == null ) throw new NoSuchElementException("This feature has no exonRank field");
return exonRank;
}
public boolean isNonCodingGene() {
return isNonCodingGene;
}
public EffectType getEffect() {
return effect;
}
public EffectImpact getEffectImpact() {
return effect.getImpact();
}
public boolean hasEffectExtraInformation() {
return effectExtraInformation != null;
}
public String getEffectExtraInformation() {
if ( effectExtraInformation == null ) throw new NoSuchElementException("This feature has no effectExtraInformation field");
return effectExtraInformation;
}
public boolean hasOldAndNewAA() {
return oldAndNewAA != null;
}
public String getOldAndNewAA() {
if ( oldAndNewAA == null ) throw new NoSuchElementException("This feature has no oldAndNewAA field");
return oldAndNewAA;
}
public boolean hasOldAndNewCodon() {
return oldAndNewCodon != null;
}
public String getOldAndNewCodon() {
if ( oldAndNewCodon == null ) throw new NoSuchElementException("This feature has no oldAndNewCodon field");
return oldAndNewCodon;
}
public boolean hasCodonNum() {
return codonNum != null;
}
public Integer getCodonNum() {
if ( codonNum == null ) throw new NoSuchElementException("This feature has no codonNum field");
return codonNum;
}
public boolean hasCdsSize() {
return cdsSize != null;
}
public Integer getCdsSize() {
if ( cdsSize == null ) throw new NoSuchElementException("This feature has no cdsSize field");
return cdsSize;
}
public boolean hasCodonsAround() {
return codonsAround != null;
}
public String getCodonsAround() {
if ( codonsAround == null ) throw new NoSuchElementException("This feature has no codonsAround field");
return codonsAround;
}
public boolean hadAasAround() {
return aasAround != null;
}
public String getAasAround() {
if ( aasAround == null ) throw new NoSuchElementException("This feature has no aasAround field");
return aasAround;
}
public boolean hasCustomIntervalID() {
return customIntervalID != null;
}
public String getCustomIntervalID() {
if ( customIntervalID == null ) throw new NoSuchElementException("This feature has no customIntervalID field");
return customIntervalID;
}
public boolean equals ( Object o ) {
if ( o == null || ! (o instanceof SnpEffFeature) ) {
return false;
}
SnpEffFeature other = (SnpEffFeature)o;
return contig.equals(other.contig) &&
position == other.position &&
(reference == null ? other.reference == null : reference.equals(other.reference)) &&
(change == null ? other.change == null : change.equals(other.change)) &&
changeType == other.changeType &&
zygosity == other.zygosity &&
(quality == null ? other.quality == null : quality.equals(other.quality)) &&
(coverage == null ? other.coverage == null : coverage.equals(other.coverage)) &&
(warnings == null ? other.warnings == null : warnings.equals(other.warnings)) &&
(geneID == null ? other.geneID == null : geneID.equals(other.geneID)) &&
(geneName == null ? other.geneName == null : geneName.equals(other.geneName)) &&
(bioType == null ? other.bioType == null : bioType.equals(other.bioType)) &&
(transcriptID == null ? other.transcriptID == null : transcriptID.equals(other.transcriptID)) &&
(exonID == null ? other.exonID == null : exonID.equals(other.exonID)) &&
(exonRank == null ? other.exonRank == null : exonRank.equals(other.exonRank)) &&
isNonCodingGene == other.isNonCodingGene &&
effect == other.effect &&
(effectExtraInformation == null ? other.effectExtraInformation == null : effectExtraInformation.equals(other.effectExtraInformation)) &&
(oldAndNewAA == null ? other.oldAndNewAA == null : oldAndNewAA.equals(other.oldAndNewAA)) &&
(oldAndNewCodon == null ? other.oldAndNewCodon == null : oldAndNewCodon.equals(other.oldAndNewCodon)) &&
(codonNum == null ? other.codonNum == null : codonNum.equals(other.codonNum)) &&
(cdsSize == null ? other.cdsSize == null : cdsSize.equals(other.cdsSize)) &&
(codonsAround == null ? other.codonsAround == null : codonsAround.equals(other.codonsAround)) &&
(aasAround == null ? other.aasAround == null : aasAround.equals(other.aasAround)) &&
(customIntervalID == null ? other.customIntervalID == null : customIntervalID.equals(other.customIntervalID));
}
public String toString() {
return "[Contig: " + contig +
" Position: " + position +
" Reference: " + reference +
" Change: " + change +
" Change Type: " + changeType +
" Zygosity: " + zygosity +
" Quality: " + quality +
" Coverage: " + coverage +
" Warnings: " + warnings +
" Gene ID: " + geneID +
" Gene Name: " + geneName +
" Bio Type: " + bioType +
" Transcript ID: " + transcriptID +
" Exon ID: " + exonID +
" Exon Rank: " + exonRank +
" Non-Coding Gene: " + isNonCodingGene +
" Effect: " + effect +
" Effect Extra Information: " + effectExtraInformation +
" Old/New AA: " + oldAndNewAA +
" Old/New Codon: " + oldAndNewCodon +
" Codon Num: " + codonNum +
" CDS Size: " + cdsSize +
" Codons Around: " + codonsAround +
" AAs Around: " + aasAround +
" Custom Interval ID: " + customIntervalID +
"]";
}
}

View File

@ -24,6 +24,7 @@ public class VCFHeader {
private final Set<VCFHeaderLine> mMetaData;
private final Map<String, VCFInfoHeaderLine> mInfoMetaData = new HashMap<String, VCFInfoHeaderLine>();
private final Map<String, VCFFormatHeaderLine> mFormatMetaData = new HashMap<String, VCFFormatHeaderLine>();
private final Map<String, VCFHeaderLine> mOtherMetaData = new HashMap<String, VCFHeaderLine>();
// the list of auxillary tags
private final Set<String> mGenotypeSampleNames = new LinkedHashSet<String>();
@ -110,6 +111,9 @@ public class VCFHeader {
VCFFormatHeaderLine formatLine = (VCFFormatHeaderLine)line;
mFormatMetaData.put(formatLine.getName(), formatLine);
}
else {
mOtherMetaData.put(line.getKey(), line);
}
}
}
@ -185,6 +189,14 @@ public class VCFHeader {
public VCFFormatHeaderLine getFormatHeaderLine(String key) {
return mFormatMetaData.get(key);
}
/**
* @param key the header key name
* @return the meta data line, or null if there is none
*/
public VCFHeaderLine getOtherHeaderLine(String key) {
return mOtherMetaData.get(key);
}
}

View File

@ -817,6 +817,28 @@ public class VariantContext implements Feature { // to enable tribble intergrati
throw new IllegalArgumentException("Requested " + i + " alternative allele but there are only " + n + " alternative alleles " + this);
}
/**
* @param other VariantContext whose alternate alleles to compare against
* @return true if this VariantContext has the same alternate alleles as other,
* regardless of ordering. Otherwise returns false.
*/
public boolean hasSameAlternateAllelesAs ( VariantContext other ) {
Set<Allele> thisAlternateAlleles = getAlternateAlleles();
Set<Allele> otherAlternateAlleles = other.getAlternateAlleles();
if ( thisAlternateAlleles.size() != otherAlternateAlleles.size() ) {
return false;
}
for ( Allele allele : thisAlternateAlleles ) {
if ( ! otherAlternateAlleles.contains(allele) ) {
return false;
}
}
return true;
}
// ---------------------------------------------------------------------------------------------------------
//
// Working with genotypes
@ -1085,14 +1107,15 @@ public class VariantContext implements Feature { // to enable tribble intergrati
}
public void validateReferenceBases(Allele reference, Byte paddedRefBase) {
// don't validate if we're an insertion or complex event
if ( !reference.isNull() && getReference().length() == 1 && !reference.basesMatch(getReference()) ) {
throw new TribbleException.InternalCodecException(String.format("the REF allele is incorrect for the record at position %s:%d, %s vs. %s", getChr(), getStart(), reference.getBaseString(), getReference().getBaseString()));
// don't validate if we're a complex event
if ( !isComplexIndel() && !reference.isNull() && !reference.basesMatch(getReference()) ) {
throw new TribbleException.InternalCodecException(String.format("the REF allele is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), reference.getBaseString(), getReference().getBaseString()));
}
// we also need to validate the padding base for simple indels
if ( hasReferenceBaseForIndel() && !getReferenceBaseForIndel().equals(paddedRefBase) )
throw new TribbleException.InternalCodecException(String.format("the padded REF base is incorrect for the record at position %s:%d, %s vs. %s", getChr(), getStart(), (char)getReferenceBaseForIndel().byteValue(), (char)paddedRefBase.byteValue()));
if ( hasReferenceBaseForIndel() && !getReferenceBaseForIndel().equals(paddedRefBase) ) {
throw new TribbleException.InternalCodecException(String.format("the padded REF base is incorrect for the record at position %s:%d, fasta says %s vs. VCF says %s", getChr(), getStart(), (char)paddedRefBase.byteValue(), (char)getReferenceBaseForIndel().byteValue()));
}
}
public void validateRSIDs(Set<String> rsIDs) {

View File

@ -663,6 +663,18 @@ public class VariantContextUtils {
return merged;
}
public static boolean allelesAreSubset(VariantContext vc1, VariantContext vc2) {
// if all alleles of vc1 are a contained in alleles of vc2, return true
if (!vc1.getReference().equals(vc2.getReference()))
return false;
for (Allele a :vc1.getAlternateAlleles()) {
if (!vc2.getAlternateAlleles().contains(a))
return false;
}
return true;
}
public static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) {
// see if we need to trim common reference base from all alleles
boolean trimVC;

View File

@ -0,0 +1,86 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.testng.Assert;
import org.testng.annotations.Test;
import org.broadinstitute.sting.gatk.walkers.annotator.SnpEff.SnpEffEffect;
public class SnpEffUnitTest {
@Test
public void testParseWellFormedEffect() {
String effectName = "NON_SYNONYMOUS_CODING";
String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
Assert.assertTrue( effect.isWellFormed() && effect.isCoding() );
}
@Test
public void testParseInvalidEffectNameEffect() {
String effectName = "MADE_UP_EFFECT";
String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
Assert.assertFalse(effect.isWellFormed());
}
@Test
public void testParseInvalidEffectImpactEffect() {
String effectName = "NON_SYNONYMOUS_CODING";
String[] effectMetadata = { "MEDIUM", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
Assert.assertFalse(effect.isWellFormed());
}
@Test
public void testParseWrongNumberOfMetadataFieldsEffect() {
String effectName = "NON_SYNONYMOUS_CODING";
String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990" };
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
Assert.assertFalse(effect.isWellFormed());
}
@Test
public void testParseSnpEffWarningEffect() {
String effectName = "NON_SYNONYMOUS_CODING";
String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829", "SNPEFF_WARNING" };
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
Assert.assertTrue( ! effect.isWellFormed() && effect.getParseError().equals("SnpEff issued the following warning: SNPEFF_WARNING") );
}
@Test
public void testParseSnpEffErrorEffect() {
String effectName = "NON_SYNONYMOUS_CODING";
String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829", "", "SNPEFF_ERROR" };
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
Assert.assertTrue( ! effect.isWellFormed() && effect.getParseError().equals("SnpEff issued the following error: SNPEFF_ERROR") );
}
}

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.WalkerTest;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.annotations.Test;
import java.util.Arrays;
@ -129,12 +130,24 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testSnpEffAnnotations() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantAnnotator -R " + b37KGReference + " -NO_HEADER -o %s -A SnpEff --variant " +
validationDataLocation + "1000G.exomes.vcf --snpEffFile " + validationDataLocation +
"snpEff_1.9.6_1000G.exomes.vcf_hg37.61.out -L 1:26,000,000-26,500,000",
"-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " +
validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation +
"snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000",
1,
Arrays.asList("03eae1dab19a9358250890594bf53607")
Arrays.asList("486fc6a5ca1819f5ab180d5d72b1ebc9")
);
executeTest("Testing SnpEff annotations", spec);
}
@Test
public void testSnpEffAnnotationsUnsupportedVersion() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " +
validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation +
"snpEff.AFR.unfiltered.unsupported.version.vcf -L 1:1-1,500,000",
1,
UserException.class
);
executeTest("Testing SnpEff annotations (unsupported version)", spec);
}
}

View File

@ -41,7 +41,7 @@ public class BeagleIntegrationTest extends WalkerTest {
"--beagleR2:BEAGLE " + beagleValidationDataLocation + "inttestbgl.r2 " +
"--beagleProbs:BEAGLE " + beagleValidationDataLocation + "inttestbgl.gprobs " +
"--beaglePhased:BEAGLE " + beagleValidationDataLocation + "inttestbgl.phased " +
"-o %s -NO_HEADER", 1, Arrays.asList("3531451e84208264104040993889aaf4"));
"-o %s -NO_HEADER", 1, Arrays.asList("b445d280fd8fee1eeb4aacb3f5a54847"));
executeTest("test BeagleOutputToVCF", spec);
}
@ -72,7 +72,7 @@ public class BeagleIntegrationTest extends WalkerTest {
"--beagleR2:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.r2 "+
"--beagleProbs:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.gprobs.bgl "+
"--beaglePhased:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.phased.bgl "+
"-L 20:1-70000 -o %s -NO_HEADER ",1,Arrays.asList("8dd6ec53994fb46c5c22af8535d22965"));
"-L 20:1-70000 -o %s -NO_HEADER ",1,Arrays.asList("51a57ea565176edd96d907906914b0ee"));
executeTest("testBeagleChangesSitesToRef",spec);
}

View File

@ -18,6 +18,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " -NO_HEADER -glm BOTH --dbsnp " + b36dbSNP129;
private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " -NO_HEADER -glm INDEL --dbsnp " + b36dbSNP129;
private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper -R " + b37KGReference + " -NO_HEADER -glm INDEL --dbsnp " + b37dbSNP132;
// --------------------------------------------------------------------------------------------------------------
//
@ -28,7 +29,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSamplePilot1() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("149e6ad9b3fd23551254a691286a96b3"));
Arrays.asList("e6639ea2dc81635c706e6c35921406d7"));
executeTest("test MultiSample Pilot1", spec);
}
@ -49,7 +50,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testSingleSamplePilot2() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
Arrays.asList("82d469145c174486ccc494884852cc58"));
Arrays.asList("d1cbd1fb9f3f7323941a95bc2def7e5a"));
executeTest("test SingleSample Pilot2", spec);
}
@ -59,7 +60,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
//
// --------------------------------------------------------------------------------------------------------------
private final static String COMPRESSED_OUTPUT_MD5 = "a5a9f38c645d6004d4640765a8b77ce4";
private final static String COMPRESSED_OUTPUT_MD5 = "2732b169cdccb21eb3ea00429619de79";
@Test
public void testCompressedOutput() {
@ -80,7 +81,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
String md5 = "0a45761c0e557d9c2080eb9e7f4f6c41";
String md5 = "cbac3960bbcb9d6192c57549208c182c";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
@ -159,8 +160,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testHeterozyosity() {
HashMap<Double, String> e = new HashMap<Double, String>();
e.put( 0.01, "af5199fbc0853cf5888acdcc88f012bc" );
e.put( 1.0 / 1850, "4e6938645ccde1fdf204ffbf4e88170f" );
e.put( 0.01, "aed69402ddffe7f2ed5ca98563bfba02" );
e.put( 1.0 / 1850, "fa94a059f08c1821b721335d93ed2ea5" );
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -184,7 +185,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("213ebaaaacf850312d885e918eb33500"));
Arrays.asList("1c080e6596d4c830bb5d147b04e2a82c"));
executeTest(String.format("test multiple technologies"), spec);
}
@ -203,7 +204,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -L 1:10,000,000-10,100,000" +
" -baq CALCULATE_AS_NECESSARY",
1,
Arrays.asList("3aecba34a89f3525afa57a38dc20e6cd"));
Arrays.asList("9129ad748ca3be2d3b321d2d7e83ae5b"));
executeTest(String.format("test calling with BAQ"), spec);
}
@ -222,7 +223,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("043973c719a85de29a35a33a674616fb"));
Arrays.asList("0bece77ce6bc447438ef9b2921b2dc41"));
executeTest(String.format("test indel caller in SLX"), spec);
}
@ -237,7 +238,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -minIndelCnt 1" +
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("68d4e6c1849e892467aed61c33e7bf24"));
Arrays.asList("5fe98ee853586dc9db58f0bc97daea63"));
executeTest(String.format("test indel caller in SLX witn low min allele count"), spec);
}
@ -250,7 +251,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -o %s" +
" -L 1:10,000,000-10,500,000",
1,
Arrays.asList("f86d453c5d2d2f33fb28ae2050658a5e"));
Arrays.asList("790b1a1d6ab79eee8c24812bb8ca6fae"));
executeTest(String.format("test indel calling, multiple technologies"), spec);
}
@ -276,7 +277,14 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
Arrays.asList("e66b7321e2ac91742ad3ef91040daafd"));
executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
Arrays.asList("4be308fd9e8167ebee677f62a7a753b7"));
executeTest("test MultiSample 1000G Phase1 indels with complicated records emitting all sites", spec4);
}
}

View File

@ -6,7 +6,7 @@ import org.testng.annotations.Test;
import java.util.Arrays;
public class VariantEvalIntegrationTest extends WalkerTest {
private static String variantEvalTestDataRoot = validationDataLocation + "/VariantEval";
private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval";
private static String fundamentalTestVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf";
private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.vcf";
private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.HG00625.vcf";
@ -14,6 +14,27 @@ public class VariantEvalIntegrationTest extends WalkerTest {
private static String cmdRoot = "-T VariantEval" +
" -R " + b36KGReference;
@Test
public void testFunctionClassWithSnpeff() {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T VariantEval",
"-R " + b37KGReference,
"--dbsnp " + b37dbSNP132,
"--eval " + validationDataLocation + "snpEff.AFR.unfiltered.VariantAnnotator.output.vcf",
"-noEV",
"-EV TiTvVariantEvaluator",
"-noST",
"-ST FunctionalClass",
"-BTI eval",
"-o %s"
),
1,
Arrays.asList("f5f811ceb973d7fd6c1b2b734f1b2b12")
);
executeTest("testFunctionClassWithSnpeff", spec);
}
@Test
public void testStratifySamplesAndExcludeMonomorphicSites() {
WalkerTestSpec spec = new WalkerTestSpec(
@ -256,7 +277,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" --eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" +
" --comp:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf";
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s",
1, Arrays.asList("2df4f8911ffc3c8d042298723ed465f8"));
1, Arrays.asList("f70997b6a3e7fdc89d11e1d61a2463d4"));
executeTestParallel("testSelect1", spec);
}
@ -273,7 +294,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
@Test
public void testCompVsEvalAC() {
String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance --eval:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf --comp:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("ed54aa127b173d8ad8b6482f2a929a42"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("407682de41dcf139ea635e9cda21b912"));
executeTestParallel("testCompVsEvalAC",spec);
}
@ -291,7 +312,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
@Test
public void testCompOverlap() {
String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals --comp:comphapmap " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf --eval " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("462d4784dd55294ef9d5118217b157a5"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("009ecc8376a20dce81ff5299ef6bfecb"));
executeTestParallel("testCompOverlap",spec);
}
@ -303,7 +324,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" --dbsnp " + b37dbSNP132 +
" --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
" -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("18c44636e36d6657110bf984f8eac181"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("424c9d438b1faa59b2c29413ba32f37b"));
executeTestParallel("testEvalTrackWithoutGenotypes",spec);
}
@ -315,7 +336,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
" --eval:evalBC " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" +
" -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("1b8ae4fd10de0888bd843f833859d990"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("18fa0b89ebfff51141975d7e4ce7a159"));
executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec);
}
@ -332,13 +353,13 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" -noST -noEV -ST Novelty -EV CompOverlap" +
" -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("a3c2177849cb00fdff99574cff7f0e4f"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("0b81d97f843ec4a1a4222d1f9949bfca"));
executeTestParallel("testMultipleCompTracks",spec);
}
@Test
public void testPerSampleAndSubsettedSampleHaveSameResults() {
String md5 = "dab415cc76846e18fcf8c78f2b2ee033";
String md5 = "b0565ac61b2860248e4abd478a177b5e";
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(

View File

@ -41,11 +41,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
//System.out.printf("PARAMS FOR %s is %s%n", vcf, clusterFile);
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b37KGReference +
" -known:prior=10.0 " + GATKDataLocation + "dbsnp_132_b37.leftAligned.vcf" +
" -training:prior=15.0 " + comparisonDataLocation + "Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf" +
" -truth:prior=15.0 " + comparisonDataLocation + "Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf" +
" -training:prior=12.0 " + comparisonDataLocation + "Validated/Omni2.5_chip/Omni25_sites_1525_samples.b37.vcf" +
" -truth:prior=12.0 " + comparisonDataLocation + "Validated/Omni2.5_chip/Omni25_sites_1525_samples.b37.vcf" +
" -resource:known=true,prior=10.0 " + GATKDataLocation + "dbsnp_132_b37.leftAligned.vcf" +
" -resource:truth=true,training=true,prior=15.0 " + comparisonDataLocation + "Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf" +
" -resource:training=true,truth=true,prior=12.0 " + comparisonDataLocation + "Validated/Omni2.5_chip/Omni25_sites_1525_samples.b37.vcf" +
" -T VariantRecalibrator" +
" -input " + params.inVCF +
" -L 20:1,000,000-40,000,000" +
@ -89,9 +87,8 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
//System.out.printf("PARAMS FOR %s is %s%n", vcf, clusterFile);
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b37KGReference +
" -known:prior=10.0 " + GATKDataLocation + "dbsnp_132_b37.leftAligned.vcf" +
" -training:prior=15.0 " + comparisonDataLocation + "Validated/Mills_Devine_Indels_2011/ALL.wgs.indels_mills_devine_hg19_leftAligned_collapsed_double_hit.sites.vcf" +
" -truth:prior=15.0 " + comparisonDataLocation + "Validated/Mills_Devine_Indels_2011/ALL.wgs.indels_mills_devine_hg19_leftAligned_collapsed_double_hit.sites.vcf" +
" -resource:known=true,prior=10.0 " + GATKDataLocation + "dbsnp_132_b37.leftAligned.vcf" +
" -resource:training=true,truth=true,prior=15.0 " + comparisonDataLocation + "Validated/Mills_Devine_Indels_2011/ALL.wgs.indels_mills_devine_hg19_leftAligned_collapsed_double_hit.sites.vcf" +
" -T VariantRecalibrator" +
" -input " + params.inVCF +
" -L 20:1,000,000-40,000,000" +

View File

@ -90,7 +90,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
@Test public void combineTrioCalls() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", "", "1d5a021387a8a86554db45a29f66140f"); } // official project VCF files in tabix format
@Test public void combineTrioCallsMin() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "YRI.trio.2010_03.genotypes.vcf.gz", " -minimalVCF", "20163d60f18a46496f6da744ab5cc0f9"); } // official project VCF files in tabix format
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "f1cf095c2fe9641b7ca1f8ee2c46fd4a"); }
@Test public void combine2Indels() { combine2("CEU.dindel.vcf4.trio.2010_06.indel.genotypes.vcf", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "312a22aedb088b678bc891f1a1b03c91"); }
@Test public void combineSNPsAndIndels() { combine2("CEU.trio.2010_03.genotypes.vcf.gz", "CEU.dindel.vcf4.low_coverage.2010_06.indel.genotypes.vcf", "", "e144b6283765494bfe8189ac59965083"); }
@ -110,7 +110,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
" -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
" -genotypeMergeOptions UNIQUIFY -L 1"),
1,
Arrays.asList("1de95f91ca15d2a8856de35dee0ce33e"));
Arrays.asList("35acb0f15f9cd18c653ede4e15e365c9"));
executeTest("threeWayWithRefs", spec);
}

View File

@ -98,7 +98,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest {
" -EV CompOverlap -noEV -noST" +
" -o %s",
1,
Arrays.asList("ea09bf764adba9765b99921c5ba2c709")
Arrays.asList("d46a735ffa898f4aa6b3758c5b03f06d")
);
executeTest("testVCFStreamingChain", selectTestSpec);

View File

@ -113,4 +113,16 @@ public class ValidateVariantsIntegrationTest extends WalkerTest {
executeTest("test bad alt allele", spec);
}
@Test
public void testBadAllele2() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("validationExampleBad3.vcf", "REF"),
0,
UserException.MalformedFile.class
);
executeTest("test bad ref allele in deletion", spec);
}
}

View File

@ -1,259 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.codecs.snpEff;
import org.apache.commons.io.input.ReaderInputStream;
import org.broad.tribble.TribbleException;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.LineReader;
import org.testng.Assert;
import org.testng.annotations.Test;
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType;
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType;
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity;
import java.io.StringReader;
public class SnpEffCodecUnitTest {
@Test
public void testParseWellFormedSnpEffHeaderLine() {
String wellFormedSnpEffHeaderLine = "# Chromo\tPosition\tReference\tChange\tChange type\t" +
"Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" +
"Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" +
"AAs around\tCustom_interval_ID";
SnpEffCodec codec = new SnpEffCodec();
LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(wellFormedSnpEffHeaderLine)));
String headerReturned = (String)codec.readHeader(reader);
Assert.assertEquals(headerReturned, wellFormedSnpEffHeaderLine);
}
@Test(expectedExceptions = TribbleException.InvalidHeader.class)
public void testParseWrongNumberOfFieldsSnpEffHeaderLine() {
String wrongNumberOfFieldsSnpEffHeaderLine = "# Chromo\tPosition\tReference\tChange\tChange type\t" +
"Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" +
"Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" +
"AAs around";
SnpEffCodec codec = new SnpEffCodec();
LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(wrongNumberOfFieldsSnpEffHeaderLine)));
codec.readHeader(reader);
}
@Test(expectedExceptions = TribbleException.InvalidHeader.class)
public void testParseMisnamedColumnSnpEffHeaderLine() {
String misnamedColumnSnpEffHeaderLine = "# Chromo\tPosition\tRef\tChange\tChange type\t" +
"Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" +
"Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" +
"AAs around\tCustom_interval_ID";
SnpEffCodec codec = new SnpEffCodec();
LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(misnamedColumnSnpEffHeaderLine)));
codec.readHeader(reader);
}
@Test
public void testParseSimpleEffectSnpEffLine() {
String simpleEffectSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
"OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\t918\t\t\t";
SnpEffFeature expectedFeature = new SnpEffFeature("1",
69428l,
"T",
"G",
ChangeType.SNP,
Zygosity.Hom,
6049.69,
61573l,
null,
"ENSG00000177693",
"OR4F5",
"mRNA",
"ENST00000326183",
"exon_1_69055_70108",
1,
false,
EffectType.NON_SYNONYMOUS_CODING,
null,
"F/C",
"TTT/TGT",
113,
918,
null,
null,
null
);
SnpEffCodec codec = new SnpEffCodec();
SnpEffFeature feature = (SnpEffFeature)codec.decode(simpleEffectSnpEffLine);
Assert.assertEquals(feature, expectedFeature);
}
@Test
public void testParseNonCodingRegionSnpEffLine() {
String nonCodingRegionSnpEffLine = "1\t1337592\tG\tC\tSNP\tHom\t1935.52\t21885\t\tENSG00000250188\t" +
"RP4-758J18.5\tmRNA\tENST00000514958\texon_1_1337454_1338076\t2\tWITHIN_NON_CODING_GENE, NON_SYNONYMOUS_CODING\t" +
"L/V\tCTA/GTA\t272\t952\t\t\t";
SnpEffFeature expectedFeature = new SnpEffFeature("1",
1337592l,
"G",
"C",
ChangeType.SNP,
Zygosity.Hom,
1935.52,
21885l,
null,
"ENSG00000250188",
"RP4-758J18.5",
"mRNA",
"ENST00000514958",
"exon_1_1337454_1338076",
2,
true,
EffectType.NON_SYNONYMOUS_CODING,
null,
"L/V",
"CTA/GTA",
272,
952,
null,
null,
null
);
SnpEffCodec codec = new SnpEffCodec();
SnpEffFeature feature = (SnpEffFeature)codec.decode(nonCodingRegionSnpEffLine);
Assert.assertEquals(feature, expectedFeature);
}
@Test
public void testParseExtraEffectInformationSnpEffLine() {
String extraEffectInformationSnpEffLine = "1\t879537\tT\tC\tSNP\tHom\t341.58\t13733\t\tENSG00000187634\tSAMD11\t" +
"mRNA\tENST00000341065\t\t\tUTR_3_PRIME: 4 bases from transcript end\t\t\t\t\t\t\t";
SnpEffFeature expectedFeature = new SnpEffFeature("1",
879537l,
"T",
"C",
ChangeType.SNP,
Zygosity.Hom,
341.58,
13733l,
null,
"ENSG00000187634",
"SAMD11",
"mRNA",
"ENST00000341065",
null,
null,
false,
EffectType.UTR_3_PRIME,
"4 bases from transcript end",
null,
null,
null,
null,
null,
null,
null
);
SnpEffCodec codec = new SnpEffCodec();
SnpEffFeature feature = (SnpEffFeature)codec.decode(extraEffectInformationSnpEffLine);
Assert.assertEquals(feature, expectedFeature);
}
@Test
public void testParseMultiEffectSnpEffLine() {
String multiEffectSnpEffLine = "1\t901901\tC\tT\tSNP\tHom\t162.91\t4646\t\tENSG00000187583\tPLEKHN1\tmRNA\t" +
"ENST00000379410\texon_1_901877_901994\t1\tSTART_GAINED: ATG, UTR_5_PRIME: 11 bases from TSS\t\t\t\t\t\t\t";
SnpEffFeature expectedFeature = new SnpEffFeature("1",
901901l,
"C",
"T",
ChangeType.SNP,
Zygosity.Hom,
162.91,
4646l,
null,
"ENSG00000187583",
"PLEKHN1",
"mRNA",
"ENST00000379410",
"exon_1_901877_901994",
1,
false,
EffectType.START_GAINED,
"ATG, UTR_5_PRIME: 11 bases from TSS",
null,
null,
null,
null,
null,
null,
null
);
SnpEffCodec codec = new SnpEffCodec();
SnpEffFeature feature = (SnpEffFeature)codec.decode(multiEffectSnpEffLine);
Assert.assertEquals(feature, expectedFeature);
}
@Test(expectedExceptions = TribbleException.InvalidDecodeLine.class)
public void testParseWrongNumberOfFieldsSnpEffLine() {
String wrongNumberOfFieldsSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
"OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\t918\t\t";
SnpEffCodec codec = new SnpEffCodec();
SnpEffFeature feature = (SnpEffFeature)codec.decode(wrongNumberOfFieldsSnpEffLine);
}
@Test(expectedExceptions = TribbleException.InvalidDecodeLine.class)
public void testParseBlankEffectFieldSnpEffLine() {
String blankEffectFieldSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
"OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\t\tF/C\tTTT/TGT\t113\t918\t\t\t";
SnpEffCodec codec = new SnpEffCodec();
SnpEffFeature feature = (SnpEffFeature)codec.decode(blankEffectFieldSnpEffLine);
}
@Test(expectedExceptions = TribbleException.InvalidDecodeLine.class)
public void testParseInvalidNumericFieldSnpEffLine() {
String invalidNumericFieldSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
"OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\tfoo\t\t\t";;
SnpEffCodec codec = new SnpEffCodec();
SnpEffFeature feature = (SnpEffFeature)codec.decode(invalidNumericFieldSnpEffLine);
}
}

View File

@ -6,10 +6,7 @@
<dependencies>
<!-- Recalibration analysis script -->
<class name="org.broadinstitute.sting.analyzecovariates.AnalyzeCovariates" />
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.DinucCovariate" />
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.QualityScoreCovariate" />
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.CycleCovariate" />
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.ReadGroupCovariate" />
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.*" />
</dependencies>
</executable>
<resources>

View File

@ -1,10 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<package name="FindContaminatingReadGroups">
<executable name="FindContaminatingReadGroups">
<main-class name="org.broadinstitute.sting.gatk.CommandLineGATK" />
<resource-bundle file="StingText.properties" />
<dependencies>
<class name="org.broadinstitute.sting.playground.gatk.walkers.contamination.FindContaminatingReadGroupsWalker" />
</dependencies>
</executable>
</package>

View File

@ -1,20 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<package name="GATKResources">
<resources>
<file name="/humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod" />
<file name="/humgen/gsa-scr1/GATK_Data/dbsnp_129_b37.rod" />
<file name="/humgen/gsa-scr1/GATK_Data/dbsnp_130_b36.rod" />
<file name="/humgen/1kg/reference/human_b36_both.fasta" />
<file name="/humgen/1kg/reference/human_b36_both.dict" />
<file name="/humgen/1kg/reference/human_b36_both.fasta.fai" />
<file name="/humgen/gsa-scr1/GATK_Data/dbsnp_129_hg18.rod" />
<file name="/humgen/gsa-scr1/GATK_Data/dbsnp_130_hg18.rod" />
<file name="/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta" />
<file name="/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.dict" />
<file name="/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta.fai" />
<file name="/humgen/gsa-scr1/GATK_Data/dbsnp_130_b37.rod" />
<file name="/humgen/1kg/reference/human_g1k_v37.fasta" />
<file name="/humgen/1kg/reference/human_g1k_v37.dict" />
<file name="/humgen/1kg/reference/human_g1k_v37.fasta.fai" />
</resources>
</package>

View File

@ -1,11 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<package name="IndelGenotyper">
<executable name="IndelGenotyper">
<main-class name="org.broadinstitute.sting.gatk.CommandLineGATK" />
<resource-bundle file="StingText.properties" />
<dependencies>
<!-- Local realignment around indels -->
<class name="org.broadinstitute.sting.gatk.walkers.indels.IndelGenotyperV2Walker" />
</dependencies>
</executable>
</package>

View File

@ -1,12 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<package name="LocalRealignmentAroundIndels">
<executable name="LocalRealignmentAroundIndels">
<main-class name="org.broadinstitute.sting.gatk.CommandLineGATK" />
<resource-bundle file="StingText.properties" />
<dependencies>
<!-- Local realignment around indels -->
<class name="org.broadinstitute.sting.gatk.walkers.indels.RealignerTargetCreator" />
<class name="org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner" />
</dependencies>
</executable>
</package>

View File

@ -1,18 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<package name="QualityScoresRecalibration">
<executable name="QualityScoresRecalibration">
<main-class name="org.broadinstitute.sting.gatk.CommandLineGATK" />
<resource-bundle file="StingText.properties" />
<dependencies>
<!-- Quality scores recalibration -->
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.CovariateCounterWalker" />
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.TableRecalibrationWalker" />
<!-- Recalibration Covariates -->
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.DinucCovariate" />
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.QualityScoreCovariate" />
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.CycleCovariate" />
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.ReadGroupCovariate" />
<class name="org.broadinstitute.sting.gatk.walkers.recalibration.TileCovariate" />
</dependencies>
</executable>
</package>

View File

@ -1,13 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<package name="RMDIndexer">
<executable name="RMDIndexer">
<main-class name="org.broadinstitute.sting.gatk.refdata.indexer.RMDIndexer" />
<resource-bundle file="StingText.properties" />
<dependencies>
<package name="org.broad.tribble.*" />
<package name="org.broadinstitute.sting.gatk.refdata.features.*" />
<!-- the class itself -->
<class name="org.broadinstitute.sting.gatk.refdata.indexer.RMDIndexer" />
</dependencies>
</executable>
</package>

View File

@ -1,11 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<package name="UnifiedGenotyper">
<executable name="UnifiedGenotyper">
<main-class name="org.broadinstitute.sting.gatk.CommandLineGATK" />
<resource-bundle file="StingText.properties" />
<dependencies>
<!-- Unified genotyper -->
<class name="org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyper" />
</dependencies>
</executable>
</package>

View File

@ -1,26 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<package name="VariantAnnotator">
<executable name="VariantAnnotator">
<main-class name="org.broadinstitute.sting.gatk.CommandLineGATK" />
<resource-bundle file="StingText.properties" />
<dependencies>
<!-- Variant annotator -->
<class name="org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotator" />
<!-- The interfacess -->
<class name="org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationType" />
<class name="org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation" />
<class name="org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation" />
<class name="org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation" />
<!-- The annotations -->
<class name="org.broadinstitute.sting.gatk.walkers.annotator.DepthPerAlleleBySample" />
<class name="org.broadinstitute.sting.gatk.walkers.annotator.DepthOfCoverage" />
<class name="org.broadinstitute.sting.gatk.walkers.annotator.HomopolymerRun" />
<class name="org.broadinstitute.sting.gatk.walkers.annotator.MappingQualityZero" />
<class name="org.broadinstitute.sting.gatk.walkers.annotator.QualByDepth" />
<class name="org.broadinstitute.sting.gatk.walkers.annotator.RMSMappingQuality" />
<class name="org.broadinstitute.sting.gatk.walkers.annotator.SpanningDeletions" />
<class name="org.broadinstitute.sting.gatk.walkers.annotator.HaplotypeScore" />
<class name="org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCounts" />
</dependencies>
</executable>
</package>

View File

@ -1,18 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<package name="VariantEval">
<executable name="VariantEval">
<main-class name="org.broadinstitute.sting.gatk.CommandLineGATK" />
<resource-bundle file="StingText.properties" />
<dependencies>
<class name="org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker" />
<class name="org.broadinstitute.sting.gatk.walkers.varianteval.CountVariants" />
<class name="org.broadinstitute.sting.gatk.walkers.varianteval.CompOverlap" />
<class name="org.broadinstitute.sting.gatk.walkers.varianteval.GenotypeConcordance" />
<class name="org.broadinstitute.sting.gatk.walkers.varianteval.MendelianViolationEvaluator" />
<class name="org.broadinstitute.sting.gatk.walkers.varianteval.TiTvVariantEvaluator" />
<class name="org.broadinstitute.sting.gatk.walkers.varianteval.ValidationRate" />
<class name="org.broadinstitute.sting.gatk.walkers.varianteval.VariantQualityScore" />
<class name="org.broadinstitute.sting.gatk.walkers.varianteval.CountFunctionalClasses" />
</dependencies>
</executable>
</package>

View File

@ -1,13 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<package name="VariantFiltration">
<executable name="VariantFiltration">
<main-class name="org.broadinstitute.sting.gatk.CommandLineGATK" />
<resource-bundle file="StingText.properties" />
<dependencies>
<!-- Workaround - depend on the logger impl required by JEXL -->
<package name="org.apache.commons.logging.impl" />
<!-- Variant filtration -->
<class name="org.broadinstitute.sting.gatk.walkers.filters.VariantFiltrationWalker" />
</dependencies>
</executable>
</package>

View File

@ -1,12 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<package name="VariantRecalibration">
<executable name="VariantRecalibration">
<main-class name="org.broadinstitute.sting.gatk.CommandLineGATK" />
<resource-bundle file="StingText.properties" />
<dependencies>
<!-- Variant recalibration -->
<class name="org.broadinstitute.sting.gatk.walkers.variantrecalibration.GenerateVariantClustersWalker" />
<class name="org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibrator" />
</dependencies>
</executable>
</package>

View File

@ -36,7 +36,7 @@ my $unsorted_vcf = "$tmp_prefix.unsorted.vcf";
# lift over the file
print "Lifting over the vcf...";
my $cmd = "java -jar $gatk/dist/GenomeAnalysisTK.jar -T LiftoverVariants -R $oldRef.fasta -B:variant,vcf $in -o $unsorted_vcf -chain $chain -dict $newRef.dict";
my $cmd = "java -jar $gatk/dist/GenomeAnalysisTK.jar -T LiftoverVariants -R $oldRef.fasta -V:variant $in -o $unsorted_vcf -chain $chain -dict $newRef.dict";
if ($recordOriginalLocation) {
$cmd .= " -recordOriginalLocation";
}
@ -66,7 +66,7 @@ system($cmd) == 0 or quit("The sorting step failed. Please correct the necessar
# Filter the VCF for bad records
print "\nFixing/removing bad records...\n";
$cmd = "java -jar $gatk/dist/GenomeAnalysisTK.jar -T FilterLiftedVariants -R $newRef.fasta -B:variant,vcf $sorted_vcf -o $out";
$cmd = "java -jar $gatk/dist/GenomeAnalysisTK.jar -T FilterLiftedVariants -R $newRef.fasta -V:variant $sorted_vcf -o $out";
system($cmd) == 0 or quit("The filtering step failed. Please correct the necessary errors before retrying.");
# clean up

View File

@ -22,8 +22,8 @@ class MethodsDevelopmentCallingPipeline extends QScript {
@Argument(shortName="noBAQ", doc="turns off BAQ calculation", required=false)
var noBAQ: Boolean = false
@Argument(shortName="indels", doc="calls indels with the Unified Genotyper", required=false)
var callIndels: Boolean = false
@Argument(shortName="noIndels", doc="do not call indels with the Unified Genotyper", required=false)
var noIndels: Boolean = false
@Argument(shortName="LOCAL_ET", doc="Doesn't use the AWS S3 storage for ET option", required=false)
var LOCAL_ET: Boolean = false
@ -165,7 +165,7 @@ class MethodsDevelopmentCallingPipeline extends QScript {
val goldStandard = true
for (target <- targets) {
if( !skipCalling ) {
if (callIndels) add(new indelCall(target), new indelFilter(target), new indelEvaluation(target))
if (!noIndels) add(new indelCall(target), new indelFilter(target), new indelEvaluation(target))
add(new snpCall(target))
add(new VQSR(target, !goldStandard))
add(new applyVQSR(target, !goldStandard))
@ -248,12 +248,10 @@ class MethodsDevelopmentCallingPipeline extends QScript {
this.reference_sequence = t.reference
this.intervalsString ++= List(t.intervals)
this.input :+= ( if ( goldStandard ) { t.goldStandard_VCF } else { t.rawVCF } )
this.training :+= new TaggedFile( t.hapmapFile, "prior=15.0")
this.truth :+= new TaggedFile( t.hapmapFile, "prior=15.0")
this.training :+= new TaggedFile( omni_b37, "prior=12.0")
this.truth :+= new TaggedFile( omni_b37, "prior=12.0")
this.training :+= new TaggedFile( training_1000G, "prior=10.0" )
this.known :+= new TaggedFile( t.dbsnpFile, "prior=2.0" )
this.resource :+= new TaggedFile( t.hapmapFile, "training=true,truth=true,prior=15.0" )
this.resource :+= new TaggedFile( omni_b37, "training=true,truth=true,prior=12.0" )
this.resource :+= new TaggedFile( training_1000G, "training=true,prior=10.0" )
this.resource :+= new TaggedFile( t.dbsnpFile, "known=true,prior=2.0" )
this.resource :+= new TaggedFile( projectConsensus_1000G, "prior=8.0" )
this.use_annotation ++= List("QD", "HaplotypeScore", "MQRankSum", "ReadPosRankSum", "MQ", "FS")
if(t.nSamples >= 10) {

View File

@ -56,15 +56,15 @@ class ExampleUnifiedGenotyper extends QScript {
genotyper.input_file :+= qscript.bamFile
genotyper.out = swapExt(qscript.bamFile, "bam", "unfiltered.vcf")
evalUnfiltered.rodBind :+= RodBind("eval", "VCF", genotyper.out)
evalUnfiltered.eval :+= genotyper.out
evalUnfiltered.out = swapExt(genotyper.out, "vcf", "eval")
variantFilter.rodBind :+= RodBind("variant", "VCF", genotyper.out)
variantFilter.variant = genotyper.out
variantFilter.out = swapExt(qscript.bamFile, "bam", "filtered.vcf")
variantFilter.filterName = filterNames
variantFilter.filterExpression = filterExpressions.map("\"" + _ + "\"")
evalFiltered.rodBind :+= RodBind("eval", "VCF", variantFilter.out)
evalFiltered.eval :+= variantFilter.out
evalFiltered.out = swapExt(variantFilter.out, "vcf", "eval")
add(genotyper, evalUnfiltered)

View File

@ -156,7 +156,7 @@ class FunctionEdge(val function: QFunction, val inputs: QNode, val outputs: QNod
val maxLines = 100
val tailLines = IOUtils.tail(errorFile, maxLines)
val nl = "%n".format()
val summary = if (tailLines.size <= maxLines) "Last %d lines".format(maxLines) else "Contents"
val summary = if (tailLines.size > maxLines) "Last %d lines".format(maxLines) else "Contents"
logger.error("%s of %s:%n%s".format(summary, errorFile, tailLines.mkString(nl)))
} else {
logger.error("Unable to access log file: %s".format(errorFile))

View File

@ -1,7 +1,7 @@
package org.broadinstitute.sting.queue.extensions.gatk
import java.io.File
import org.broadinstitute.sting.queue.function.FileExtension
import org.broadinstitute.sting.queue.util.FileExtension
import java.lang.String
/**

View File

@ -1,7 +1,7 @@
package org.broadinstitute.sting.queue.extensions.gatk
import java.io.File
import org.broadinstitute.sting.queue.function.FileExtension
import org.broadinstitute.sting.queue.util.FileExtension
/**
* Used to provide tagged -I input_file arguments to the GATK.

View File

@ -387,25 +387,11 @@ trait QFunction extends Logging with QJobReport {
*/
protected def canon(value: Any) = {
value match {
case fileExtension: FileExtension =>
val newFile = absolute(fileExtension);
val newFileExtension = fileExtension.withPath(newFile.getPath)
newFileExtension
case file: File =>
if (file.getClass != classOf[File])
throw new QException("Extensions of file must also extend with FileExtension so that the path can be modified.");
absolute(file)
case file: File => IOUtils.absolute(commandDirectory, file)
case x => x
}
}
/**
* Returns the absolute path to the file relative to the run directory and the job command directory.
* @param file File to root relative to the command directory if it is not already absolute.
* @return The absolute path to file.
*/
private def absolute(file: File) = IOUtils.absolute(commandDirectory, file)
/**
* Scala sugar type for checking annotation required and exclusiveOf.
*/

View File

@ -1,4 +1,4 @@
package org.broadinstitute.sting.queue.function
package org.broadinstitute.sting.queue.util
import java.io.File

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.queue.util
import org.apache.commons.io.FileUtils
import java.io.{FileReader, File}
import org.broadinstitute.sting.utils.exceptions.UserException
import org.broadinstitute.sting.queue.QException
/**
* A collection of utilities for modifying java.io.
@ -12,7 +13,7 @@ object IOUtils extends Logging {
* Checks if the temp directory has been setup and throws an exception if they user hasn't set it correctly.
* @param tempDir Temporary directory.
*/
def checkTempDir(tempDir: File) = {
def checkTempDir(tempDir: File) {
val tempDirPath = tempDir.getAbsolutePath
// Keeps the user from leaving the temp directory as the default, and on Macs from having pluses
// in the path which can cause problems with the Google Reflections library.
@ -20,7 +21,7 @@ object IOUtils extends Logging {
if (tempDirPath.startsWith("/var/folders/") || (tempDirPath == "/tmp") || (tempDirPath == "/tmp/"))
throw new UserException.BadTmpDir("java.io.tmpdir must be explicitly set")
if (!tempDir.exists && !tempDir.mkdirs)
throw new UserException.BadTmpDir("Could not create directory: " + tempDir.getAbsolutePath())
throw new UserException.BadTmpDir("Could not create directory: " + tempDir.getAbsolutePath)
}
/**
@ -35,9 +36,9 @@ object IOUtils extends Logging {
throw new UserException.BadTmpDir("Could not create temp directory: " + tempDirParent)
val temp = File.createTempFile(prefix + "-", suffix, tempDirParent)
if (!temp.delete)
throw new UserException.BadTmpDir("Could not delete sub file: " + temp.getAbsolutePath())
throw new UserException.BadTmpDir("Could not delete sub file: " + temp.getAbsolutePath)
if (!temp.mkdir)
throw new UserException.BadTmpDir("Could not create sub directory: " + temp.getAbsolutePath())
throw new UserException.BadTmpDir("Could not create sub directory: " + temp.getAbsolutePath)
absolute(temp)
}
@ -46,7 +47,7 @@ object IOUtils extends Logging {
* @param file File to write to.
* @param content Content to write.
*/
def writeContents(file: File, content: String) = FileUtils.writeStringToFile(file, content)
def writeContents(file: File, content: String) { FileUtils.writeStringToFile(file, content) }
/**
* Reads content of a file into a string.
@ -146,10 +147,12 @@ object IOUtils extends Logging {
* @return The absolute path to the file in the parent dir if the path was not absolute, otherwise the original path.
*/
def absolute(parent: File, file: File): File = {
if (file.isAbsolute)
absolute(file)
else
absolute(new File(parent, file.getPath))
val newPath =
if (file.isAbsolute)
absolutePath(file)
else
absolutePath(new File(parent, file.getPath))
replacePath(file, newPath)
}
/**
@ -159,12 +162,16 @@ object IOUtils extends Logging {
* @return the absolute path to the file.
*/
def absolute(file: File) = {
replacePath(file, absolutePath(file))
}
private def absolutePath(file: File) = {
var fileAbs = file.getAbsoluteFile
var names = List.empty[String]
while (fileAbs != null) {
val name = fileAbs.getName
fileAbs = fileAbs.getParentFile
if (name == ".") {
/* skip */
@ -190,7 +197,18 @@ object IOUtils extends Logging {
}
}
new File(names.mkString("/", "/", ""))
names.mkString("/", "/", "")
}
private def replacePath(file: File, path: String) = {
file match {
case fileExtension: FileExtension =>
fileExtension.withPath(path)
case file: File =>
if (file.getClass != classOf[File])
throw new QException("Sub classes of java.io.File must also implement FileExtension so that the path can be modified.")
new File(path)
}
}
/**