Merge branch 'master' of ssh://chartl@tin.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Christopher Hartl 2011-09-21 16:40:47 -04:00
commit ef05827c7b
70 changed files with 1586 additions and 556 deletions

View File

@ -163,6 +163,14 @@
<!-- Remove old versions of ivy jars AFTER the ivy:retrieve has been class loaded. -->
<delete file="${ivy.jar.dir}/ivy-2.0.0.jar"/>
<delete file="${ivy.jar.dir}/ivy-2.2.0-rc1.jar"/>
<!--
An old versions of the ivy-1.4.1.xml does not contain /ivy-module/configuration/conf/@name="compile".
Easier to upgrade to 1.4.4 than try to deal with xmlproperty and conditional deletion in ant.
Just in case we remove explicit 1.4.4 and go back to 1.4.1, try to clean out the file for now.
-->
<delete file="${ivy.home}/cache/javax.mail/mail/ivy-1.4.1.xml"/>
<delete file="${ivy.home}/cache/javax.mail/mail/ivydata-1.4.1.properties"/>
<delete file="${ivy.home}/cache/javax.mail/mail/jars/mail-1.4.1.jar"/>
</target>
<target name="init.buildall">

View File

@ -15,10 +15,8 @@
<!-- Tribble -->
<dependency org="org.broad" name="tribble" rev="latest.integration"/>
<dependency org="log4j" name="log4j" rev="1.2.15">
<!-- Don't include javax.mail here in default, only used in scala->default by commons-email -->
<exclude org="javax.mail" />
</dependency>
<dependency org="log4j" name="log4j" rev="1.2.15"/>
<dependency org="javax.mail" name="mail" rev="1.4.4"/>
<dependency org="colt" name="colt" rev="1.2.0"/>
<dependency org="jboss" name="javassist" rev="3.7.ga"/>
<dependency org="org.simpleframework" name="simple-xml" rev="2.0.4"/>

View File

@ -114,7 +114,7 @@ public class AnalyzeCovariates extends CommandLineProgram {
private String RECAL_FILE = "output.recal_data.csv";
@Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
private String OUTPUT_DIR = "analyzeCovariates/";
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required = false)
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required = false)
private String PATH_TO_RSCRIPT = "Rscript";
@Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false)
private String PATH_TO_RESOURCES = "public/R/";

View File

@ -929,6 +929,14 @@ public class GenomeAnalysisEngine {
return readsDataSource.getHeader(reader);
}
/**
* Gets the master sequence dictionary for this GATK engine instance
* @return a never-null dictionary listing all of the contigs known to this engine instance
*/
public SAMSequenceDictionary getMasterSequenceDictionary() {
return getReferenceDataSource().getReference().getSequenceDictionary();
}
/**
* Returns data source object encapsulating all essential info and handlers used to traverse
* reads; header merger, individual file readers etc can be accessed through the returned data source object.

View File

@ -46,7 +46,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
else if ( stub.getOutputStream() != null ) {
this.file = null;
this.stream = stub.getOutputStream();
writer = new StandardVCFWriter(stream, stub.doNotWriteGenotypes());
writer = new StandardVCFWriter(stream, stub.getMasterSequenceDictionary(), stub.doNotWriteGenotypes());
}
else
throw new ReviewedStingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");
@ -71,7 +71,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
}
// The GATK/Tribble can't currently index block-compressed files on the fly. Disable OTF indexing even if the user explicitly asked for it.
return new StandardVCFWriter(file, this.stream, indexOnTheFly && !stub.isCompressed(), stub.doNotWriteGenotypes());
return new StandardVCFWriter(file, this.stream, stub.getMasterSequenceDictionary(), indexOnTheFly && !stub.isCompressed(), stub.doNotWriteGenotypes());
}

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.io.stubs;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.gatk.CommandLineExecutable;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
@ -150,6 +151,15 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
return isCompressed;
}
/**
* Gets the master sequence dictionary from the engine associated with this stub
* @link GenomeAnalysisEngine.getMasterSequenceDictionary
* @return
*/
public SAMSequenceDictionary getMasterSequenceDictionary() {
return engine.getMasterSequenceDictionary();
}
/**
* Should we tell the VCF writer not to write genotypes?
* @return true if the writer should not write genotypes.

View File

@ -101,7 +101,7 @@ public class RMDIndexer extends CommandLineProgram {
Index index = IndexFactory.createIndex(inputFileSource, codec, approach);
// add writing of the sequence dictionary, if supplied
builder.setIndexSequenceDictionary(inputFileSource, index, ref.getSequenceDictionary(), indexFile, false);
builder.validateAndUpdateIndexSequenceDictionary(inputFileSource, index, ref.getSequenceDictionary());
// create the output stream, and write the index
LittleEndianOutputStream stream = new LittleEndianOutputStream(new FileOutputStream(indexFile));

View File

@ -0,0 +1,106 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata.tracks;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import org.apache.log4j.Logger;
import org.broad.tribble.index.Index;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
/**
* Utilities for working with Sequence Dictionaries embedded in tribble indices
*
* @author Your Name
* @since Date created
*/
public class IndexDictionaryUtils {
private final static Logger logger = Logger.getLogger(IndexDictionaryUtils.class);
// a constant we use for marking sequence dictionary entries in the Tribble index property list
public static final String SequenceDictionaryPropertyPredicate = "DICT:";
/**
* get the sequence dictionary from the track, if available. If not, make it from the contig list that is always in the index
* @param index the index file to use
* @return a SAMSequenceDictionary if available, null if unavailable
*/
public static SAMSequenceDictionary getSequenceDictionaryFromProperties(Index index) {
SAMSequenceDictionary dict = new SAMSequenceDictionary();
for (Map.Entry<String,String> entry : index.getProperties().entrySet()) {
if (entry.getKey().startsWith(SequenceDictionaryPropertyPredicate))
dict.addSequence(new SAMSequenceRecord(entry.getKey().substring(SequenceDictionaryPropertyPredicate.length() , entry.getKey().length()),
Integer.valueOf(entry.getValue())));
}
return dict;
}
/**
* create the sequence dictionary with the contig list; a backup approach
* @param index the index file to use
* @param dict the sequence dictionary to add contigs to
* @return the filled-in sequence dictionary
*/
static SAMSequenceDictionary createSequenceDictionaryFromContigList(Index index, SAMSequenceDictionary dict) {
LinkedHashSet<String> seqNames = index.getSequenceNames();
if (seqNames == null) {
return dict;
}
for (String name : seqNames) {
SAMSequenceRecord seq = new SAMSequenceRecord(name, 0);
dict.addSequence(seq);
}
return dict;
}
public static void setIndexSequenceDictionary(Index index, SAMSequenceDictionary dict) {
for ( SAMSequenceRecord seq : dict.getSequences() ) {
final String contig = IndexDictionaryUtils.SequenceDictionaryPropertyPredicate + seq.getSequenceName();
final String length = String.valueOf(seq.getSequenceLength());
index.addProperty(contig,length);
}
}
public static void validateTrackSequenceDictionary(final String trackName,
final SAMSequenceDictionary trackDict,
final SAMSequenceDictionary referenceDict,
final ValidationExclusion.TYPE validationExclusionType ) {
// if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation
if (trackDict == null || trackDict.size() == 0)
logger.info("Track " + trackName + " doesn't have a sequence dictionary built in, skipping dictionary validation");
else {
Set<String> trackSequences = new TreeSet<String>();
for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
trackSequences.add(dictionaryEntry.getSequenceName());
SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, trackName, trackDict, "reference", referenceDict);
}
}
}

View File

@ -25,7 +25,6 @@
package org.broadinstitute.sting.gatk.refdata.tracks;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import org.apache.log4j.Logger;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.FeatureSource;
@ -41,7 +40,6 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -52,16 +50,11 @@ import org.broadinstitute.sting.utils.instrumentation.Sizeof;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
/**
*
* @author aaron
*
* @author aaron
* `
* Class RMDTrackBuilder
*
@ -76,9 +69,6 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
private final static Logger logger = Logger.getLogger(RMDTrackBuilder.class);
public final static boolean MEASURE_TRIBBLE_QUERY_PERFORMANCE = false;
// a constant we use for marking sequence dictionary entries in the Tribble index property list
public static final String SequenceDictionaryPropertyPredicate = "DICT:";
// private sequence dictionary we use to set our tracks with
private SAMSequenceDictionary dict = null;
@ -210,13 +200,19 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
try { logger.info(String.format(" Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); }
catch (ReviewedStingException e) { }
sequenceDictionary = getSequenceDictionaryFromProperties(index);
sequenceDictionary = IndexDictionaryUtils.getSequenceDictionaryFromProperties(index);
// if we don't have a dictionary in the Tribble file, and we've set a dictionary for this builder, set it in the file if they match
if (sequenceDictionary.size() == 0 && dict != null) {
File indexFile = Tribble.indexFile(inputFile);
setIndexSequenceDictionary(inputFile,index,dict,indexFile,true);
sequenceDictionary = getSequenceDictionaryFromProperties(index);
validateAndUpdateIndexSequenceDictionary(inputFile, index, dict);
try { // re-write the index
writeIndexToDisk(index,indexFile,new FSLockWithShared(indexFile));
} catch (IOException e) {
logger.warn("Unable to update index with the sequence dictionary for file " + indexFile + "; this will not effect your run of the GATK");
}
sequenceDictionary = IndexDictionaryUtils.getSequenceDictionaryFromProperties(index);
}
if ( MEASURE_TRIBBLE_QUERY_PERFORMANCE )
@ -363,88 +359,31 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
// this can take a while, let them know what we're doing
logger.info("Creating Tribble index in memory for file " + inputFile);
Index idx = IndexFactory.createIndex(inputFile, codec, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
setIndexSequenceDictionary(inputFile, idx, dict, null, false);
validateAndUpdateIndexSequenceDictionary(inputFile, idx, dict);
return idx;
}
// ---------------------------------------------------------------------------------------------------------
// static functions to work with the sequence dictionaries of indexes
// ---------------------------------------------------------------------------------------------------------
/**
* get the sequence dictionary from the track, if available. If not, make it from the contig list that is always in the index
* @param index the index file to use
* @return a SAMSequenceDictionary if available, null if unavailable
*/
public static SAMSequenceDictionary getSequenceDictionaryFromProperties(Index index) {
SAMSequenceDictionary dict = new SAMSequenceDictionary();
for (Map.Entry<String,String> entry : index.getProperties().entrySet()) {
if (entry.getKey().startsWith(SequenceDictionaryPropertyPredicate))
dict.addSequence(new SAMSequenceRecord(entry.getKey().substring(SequenceDictionaryPropertyPredicate.length() , entry.getKey().length()),
Integer.valueOf(entry.getValue())));
}
return dict;
}
/**
* create the sequence dictionary with the contig list; a backup approach
* @param index the index file to use
* @param dict the sequence dictionary to add contigs to
* @return the filled-in sequence dictionary
*/
private static SAMSequenceDictionary createSequenceDictionaryFromContigList(Index index, SAMSequenceDictionary dict) {
LinkedHashSet<String> seqNames = index.getSequenceNames();
if (seqNames == null) {
return dict;
}
for (String name : seqNames) {
SAMSequenceRecord seq = new SAMSequenceRecord(name, 0);
dict.addSequence(seq);
}
return dict;
}
/**
* set the sequence dictionary of the track. This function checks that the contig listing of the underlying file is compatible.
* (that each contig in the index is in the sequence dictionary).
* @param inputFile for proper error message formatting.
* @param dict the sequence dictionary
* @param index the index file
* @param indexFile the index file
* @param rewriteIndex should we rewrite the index when we're done?
*
*/
public void setIndexSequenceDictionary(File inputFile, Index index, SAMSequenceDictionary dict, File indexFile, boolean rewriteIndex) {
if (dict == null) return;
SAMSequenceDictionary currentDict = createSequenceDictionaryFromContigList(index, new SAMSequenceDictionary());
validateTrackSequenceDictionary(inputFile.getAbsolutePath(),currentDict,dict);
public void validateAndUpdateIndexSequenceDictionary(final File inputFile, final Index index, final SAMSequenceDictionary dict) {
if (dict == null) throw new ReviewedStingException("BUG: dict cannot be null");
// check that every contig in the RMD contig list is at least in the sequence dictionary we're being asked to set
for (SAMSequenceRecord seq : currentDict.getSequences()) {
if (dict.getSequence(seq.getSequenceName()) == null)
continue;
index.addProperty(SequenceDictionaryPropertyPredicate + dict.getSequence(seq.getSequenceName()).getSequenceName(), String.valueOf(dict.getSequence(seq.getSequenceName()).getSequenceLength()));
}
// re-write the index
if (rewriteIndex) try {
writeIndexToDisk(index,indexFile,new FSLockWithShared(indexFile));
} catch (IOException e) {
logger.warn("Unable to update index with the sequence dictionary for file " + indexFile + "; this will not effect your run of the GATK");
}
final SAMSequenceDictionary currentDict = IndexDictionaryUtils.createSequenceDictionaryFromContigList(index, new SAMSequenceDictionary());
validateTrackSequenceDictionary(inputFile.getAbsolutePath(), currentDict, dict);
// actually update the dictionary in the index
IndexDictionaryUtils.setIndexSequenceDictionary(index, dict);
}
public void validateTrackSequenceDictionary(String trackName, SAMSequenceDictionary trackDict, SAMSequenceDictionary referenceDict) {
// if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation
if (trackDict == null || trackDict.size() == 0)
logger.info("Track " + trackName + " doesn't have a sequence dictionary built in, skipping dictionary validation");
else {
Set<String> trackSequences = new TreeSet<String>();
for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
trackSequences.add(dictionaryEntry.getSequenceName());
SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, trackName, trackDict, "reference", referenceDict);
}
public void validateTrackSequenceDictionary(final String trackName,
final SAMSequenceDictionary trackDict,
final SAMSequenceDictionary referenceDict ) {
IndexDictionaryUtils.validateTrackSequenceDictionary(trackName, trackDict, referenceDict, validationExclusionType);
}
}

View File

@ -358,7 +358,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
public void printOnTraversalDone() {
printProgress(null, null, true);
final double elapsed = timer.getElapsedTime();
final double elapsed = timer == null ? 0 : timer.getElapsedTime();
ReadMetrics cumulativeMetrics = engine.getCumulativeMetrics();

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers;
import net.sf.samtools.SAMSequenceDictionary;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
@ -77,6 +78,15 @@ public abstract class Walker<MapType, ReduceType> {
return toolkit;
}
/**
* Gets the master sequence dictionary for this walker
* @link GenomeAnalysisEngine.getMasterSequenceDictionary
* @return
*/
protected SAMSequenceDictionary getMasterSequenceDictionary() {
return getToolkit().getMasterSequenceDictionary();
}
/**
* (conceptual static) method that states whether you want to see reads piling up at a locus
* that contain a deletion at the locus.

View File

@ -26,7 +26,7 @@ public class SBByDepth extends AnnotationByDepth {
if (!vc.hasAttribute(VCFConstants.STRAND_BIAS_KEY))
return null;
double sBias = Double.valueOf(vc.getAttributeAsString(VCFConstants.STRAND_BIAS_KEY));
double sBias = vc.getAttributeAsDouble(VCFConstants.STRAND_BIAS_KEY, -1);
final Map<String, Genotype> genotypes = vc.getGenotypes();
if ( genotypes == null || genotypes.size() == 0 )

View File

@ -58,6 +58,13 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
// lacking a SnpEff version number in the VCF header:
public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.2" };
public static final String SNPEFF_VCF_HEADER_VERSION_LINE_KEY = "SnpEffVersion";
public static final String SNPEFF_VCF_HEADER_COMMAND_LINE_KEY = "SnpEffCmd";
// When we write the SnpEff version number and command line to the output VCF, we change
// the key name slightly so that the output VCF won't be confused in the future for an
// output file produced by SnpEff directly:
public static final String OUTPUT_VCF_HEADER_VERSION_LINE_KEY = "Original" + SNPEFF_VCF_HEADER_VERSION_LINE_KEY;
public static final String OUTPUT_VCF_HEADER_COMMAND_LINE_KEY = "Original" + SNPEFF_VCF_HEADER_COMMAND_LINE_KEY;
// SnpEff aggregates all effects (and effect metadata) together into a single INFO
// field annotation with the key EFF:
@ -165,10 +172,26 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
UNKNOWN
}
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) {
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) {
// Make sure that we actually have a valid SnpEff rod binding (just in case the user specified -A SnpEff
// without providing a SnpEff rod via --snpEffFile):
validateRodBinding(walker.getSnpEffRodBinding());
checkSnpEffVersion(walker, toolkit);
RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
// Make sure that the SnpEff version number and command-line header lines are present in the VCF header of
// the SnpEff rod, and that the file was generated by a supported version of SnpEff:
VCFHeader snpEffVCFHeader = VCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName())).get(snpEffRodBinding.getName());
VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY);
VCFHeaderLine snpEffCommandLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_COMMAND_LINE_KEY);
checkSnpEffVersion(snpEffVersionLine);
checkSnpEffCommandLine(snpEffCommandLine);
// If everything looks ok, add the SnpEff version number and command-line header lines to the
// header of the VCF output file, changing the key names so that our output file won't be
// mistaken in the future for a SnpEff output file:
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_VERSION_LINE_KEY, snpEffVersionLine.getValue()));
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue()));
}
public Map<String, Object> annotate ( RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc ) {
@ -204,12 +227,7 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
}
}
private void checkSnpEffVersion ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) {
RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
VCFHeader snpEffVCFHeader = VCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName())).get(snpEffRodBinding.getName());
VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY);
private void checkSnpEffVersion ( VCFHeaderLine snpEffVersionLine ) {
if ( snpEffVersionLine == null || snpEffVersionLine.getValue() == null || snpEffVersionLine.getValue().trim().length() == 0 ) {
throw new UserException("Could not find a " + SNPEFF_VCF_HEADER_VERSION_LINE_KEY + " entry in the VCF header for the SnpEff " +
"input file, and so could not verify that the file was generated by a supported version of SnpEff (" +
@ -224,6 +242,14 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
}
}
private void checkSnpEffCommandLine ( VCFHeaderLine snpEffCommandLine ) {
if ( snpEffCommandLine == null || snpEffCommandLine.getValue() == null || snpEffCommandLine.getValue().trim().length() == 0 ) {
throw new UserException("Could not find a " + SNPEFF_VCF_HEADER_COMMAND_LINE_KEY + " entry in the VCF header for the SnpEff " +
"input file, which should be added by all supported versions of SnpEff (" +
Arrays.toString(SUPPORTED_SNPEFF_VERSIONS) + ")");
}
}
private boolean isSupportedSnpEffVersion ( String versionString ) {
for ( String supportedVersion : SUPPORTED_SNPEFF_VERSIONS ) {
if ( supportedVersion.equals(versionString) ) {
@ -248,10 +274,13 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
List<SnpEffEffect> parsedEffects = new ArrayList<SnpEffEffect>();
Object effectFieldValue = snpEffRecord.getAttribute(SNPEFF_INFO_FIELD_KEY);
List<String> individualEffects;
if ( effectFieldValue == null ) {
return parsedEffects;
}
// The VCF codec stores multi-valued fields as a List<String>, and single-valued fields as a String.
// We can have either in the case of SnpEff, since there may be one or more than one effect in this record.
List<String> individualEffects;
if ( effectFieldValue instanceof List ) {
individualEffects = (List<String>)effectFieldValue;
}

View File

@ -214,8 +214,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, this, getToolkit());
engine.initializeExpressions(expressionsToUse);
engine.invokeAnnotationInitializationMethods();
// setup the header fields
// note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
@ -225,6 +223,8 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
hInfo.add(line);
}
engine.invokeAnnotationInitializationMethods(hInfo);
VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
vcfWriter.writeHeader(vcfHeader);

View File

@ -114,13 +114,13 @@ public class VariantAnnotatorEngine {
dbAnnotations.put(rod, rod.getName());
}
public void invokeAnnotationInitializationMethods() {
public void invokeAnnotationInitializationMethods( Set<VCFHeaderLine> headerLines ) {
for ( VariantAnnotatorAnnotation annotation : requestedInfoAnnotations ) {
annotation.initialize(walker, toolkit);
annotation.initialize(walker, toolkit, headerLines);
}
for ( VariantAnnotatorAnnotation annotation : requestedGenotypeAnnotations ) {
annotation.initialize(walker, toolkit);
annotation.initialize(walker, toolkit, headerLines);
}
}

View File

@ -25,9 +25,11 @@
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.util.List;
import java.util.Set;
@DocumentedGATKFeature(enable = true, groupName = "VariantAnnotator annotations", summary = "VariantAnnotator annotations")
public abstract class VariantAnnotatorAnnotation {
@ -35,5 +37,5 @@ public abstract class VariantAnnotatorAnnotation {
public abstract List<String> getKeyNames();
// initialization method (optional for subclasses, and therefore non-abstract)
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) { }
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) { }
}

View File

@ -63,9 +63,12 @@ import java.util.*;
* <h2>Input</h2>
* <p>
* One or more bam files (with proper headers) to be analyzed for coverage statistics
* (Optional) A REFSEQ Rod to aggregate coverage to the gene level
* </p>
*
* <p>
*(Optional) A REFSEQ Rod to aggregate coverage to the gene level
* <p>
* (for information about creating the REFSEQ Rod, please consult the RefSeqCodec documentation)
*</p></p>
* <h2>Output</h2>
* <p>
* Tables pertaining to different coverage summaries. Suffix on the table files declares the contents:
@ -93,7 +96,7 @@ import java.util.*;
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T VariantEval \
* -T DepthOfCoverage \
* -o file_name_base \
* -I input_bams.list
* [-geneList refSeq.sorted.txt] \

View File

@ -276,8 +276,11 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
if ( elt.isReducedRead() ) {
// reduced read representation
byte qual = elt.getReducedQual();
add(obsBase, qual, (byte)0, (byte)0, elt.getReducedCount()); // fast calculation of n identical likelihoods
return elt.getReducedCount(); // we added nObs bases here
if ( BaseUtils.isRegularBase( elt.getBase() )) {
add(obsBase, qual, (byte)0, (byte)0, elt.getReducedCount()); // fast calculation of n identical likelihoods
return elt.getReducedCount(); // we added nObs bases here
} else // odd bases or deletions => don't use them
return 0;
} else {
byte qual = qualToUse(elt, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual);
return qual > 0 ? add(obsBase, qual, (byte)0, (byte)0, 1) : 0;

View File

@ -73,7 +73,7 @@ public class HaplotypeIndelErrorModel {
baseMatchArray = new double[MAX_CACHED_QUAL+1];
baseMismatchArray = new double[MAX_CACHED_QUAL+1];
for (int k=1; k <= MAX_CACHED_QUAL; k++) {
double baseProb = QualityUtils.qualToProb(k);
double baseProb = QualityUtils.qualToProb((byte)k);
baseMatchArray[k] = probToQual(baseProb);

View File

@ -37,7 +37,7 @@ public class PhasingRead extends BaseArray {
public PhasingRead(int length, int mappingQual) {
super(length);
this.mappingProb = new PreciseNonNegativeDouble(QualityUtils.qualToProb(mappingQual));
this.mappingProb = new PreciseNonNegativeDouble(QualityUtils.qualToProb((byte)mappingQual));
this.baseProbs = new PreciseNonNegativeDouble[length];
Arrays.fill(this.baseProbs, null);

View File

@ -44,12 +44,12 @@ public class RefSeqDataParser {
String nameKeyToUseMultiplePrefix = nameKeyToUse + "_";
Map<String, String> entriesToNames = new HashMap<String, String>();
Integer numRecords = vc.getAttributeAsIntegerNoException(NUM_RECORDS_KEY);
if (numRecords != null) {
int numRecords = vc.getAttributeAsInt(NUM_RECORDS_KEY, -1);
if (numRecords != -1) {
boolean done = false;
if (numRecords == 1) { // Check if perhaps the single record doesn't end with "_1":
String name = vc.getAttributeAsStringNoException(nameKeyToUse);
String name = vc.getAttributeAsString(nameKeyToUse, null);
if (name != null) {
entriesToNames.put(nameKeyToUse, name);
done = true;
@ -59,14 +59,14 @@ public class RefSeqDataParser {
if (!done) {
for (int i = 1; i <= numRecords; i++) {
String key = nameKeyToUseMultiplePrefix + i;
String name = vc.getAttributeAsStringNoException(key);
String name = vc.getAttributeAsString(key, null);
if (name != null)
entriesToNames.put(key, name);
}
}
}
else { // no entry with the # of records:
String name = vc.getAttributeAsStringNoException(nameKeyToUse);
String name = vc.getAttributeAsString(nameKeyToUse, null);
if (name != null) {
entriesToNames.put(nameKeyToUse, name);
}

View File

@ -110,12 +110,12 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
case SNP:
nVariantLoci++;
nSNPs++;
if (vc1.getAttributeAsBoolean("ISSINGLETON")) nSingletons++;
if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++;
break;
case MNP:
nVariantLoci++;
nMNPs++;
if (vc1.getAttributeAsBoolean("ISSINGLETON")) nSingletons++;
if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++;
break;
case INDEL:
nVariantLoci++;
@ -137,7 +137,7 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
String refStr = vc1.getReference().getBaseString().toUpperCase();
String aaStr = vc1.hasAttribute("ANCESTRALALLELE") ? vc1.getAttributeAsString("ANCESTRALALLELE").toUpperCase() : null;
String aaStr = vc1.hasAttribute("ANCESTRALALLELE") ? vc1.getAttributeAsString("ANCESTRALALLELE", null).toUpperCase() : null;
// if (aaStr.equals(".")) {
// aaStr = refStr;
// }

View File

@ -219,7 +219,8 @@ public class GenotypePhasingEvaluator extends VariantEvaluator {
}
public static Double getPQ(Genotype gt) {
return gt.getAttributeAsDoubleNoException(ReadBackedPhasingWalker.PQ_KEY);
Double d = gt.getAttributeAsDouble(ReadBackedPhasingWalker.PQ_KEY, -1);
return d == -1 ? null : d;
}
public static boolean topMatchesTop(AllelePair b1, AllelePair b2) {

View File

@ -120,7 +120,7 @@ public class SimpleMetricsByAC extends VariantEvaluator implements StandardEval
if ( eval.hasGenotypes() )
ac = eval.getChromosomeCount(eval.getAlternateAllele(0));
else if ( eval.hasAttribute("AC") ) {
ac = Integer.valueOf(eval.getAttributeAsString("AC"));
ac = eval.getAttributeAsInt("AC", -1);
}
if ( ac != -1 ) {

View File

@ -49,18 +49,14 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
else nTv++;
}
String refStr = vc.getReference().getBaseString().toUpperCase();
String aaStr = vc.getAttributeAsString("ANCESTRALALLELE").toUpperCase();
if (aaStr != null && !aaStr.equalsIgnoreCase("null") && !aaStr.equals(".")) {
BaseUtils.BaseSubstitutionType aaSubType = BaseUtils.SNPSubstitutionType(aaStr.getBytes()[0], vc.getAlternateAllele(0).getBases()[0]);
//System.out.println(refStr + " " + vc.getAttributeAsString("ANCESTRALALLELE").toUpperCase() + " " + aaSubType);
if (aaSubType == BaseUtils.BaseSubstitutionType.TRANSITION) {
nTiDerived++;
} else if (aaSubType == BaseUtils.BaseSubstitutionType.TRANSVERSION) {
nTvDerived++;
if (vc.hasAttribute("ANCESTRALALLELE")) {
final String aaStr = vc.getAttributeAsString("ANCESTRALALLELE", "null").toUpperCase();
if ( ! aaStr.equals(".") ) {
switch ( BaseUtils.SNPSubstitutionType(aaStr.getBytes()[0], vc.getAlternateAllele(0).getBases()[0] ) ) {
case TRANSITION: nTiDerived++; break;
case TRANSVERSION: nTvDerived++; break;
default: break;
}
}
}
}

View File

@ -131,7 +131,7 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
//// System.out.printf(" ac = %d%n", ac);
}
else
ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY);
ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY, 0);
return ac > 0 ? SiteStatus.POLY : SiteStatus.MONO;
} else {
return TREAT_ALL_SITES_IN_EVAL_VCF_AS_CALLED ? SiteStatus.POLY : SiteStatus.NO_CALL; // we can't figure out what to do

View File

@ -44,7 +44,7 @@ public class AlleleCount extends VariantStratifier {
if (eval != null) {
int AC = -1;
if ( eval.hasAttribute("AC") && eval.getAttribute("AC") instanceof Integer ) {
AC = eval.getAttributeAsInt("AC");
AC = eval.getAttributeAsInt("AC", 0);
} else if ( eval.isVariant() ) {
for (Allele allele : eval.getAlternateAlleles())
AC = Math.max(AC, eval.getChromosomeCount(allele));

View File

@ -28,7 +28,7 @@ public class AlleleFrequency extends VariantStratifier {
if (eval != null) {
try {
relevantStates.add(String.format("%.3f", (5.0 * MathUtils.round(eval.getAttributeAsDouble("AF") / 5.0, 3))));
relevantStates.add(String.format("%.3f", (5.0 * MathUtils.round(eval.getAttributeAsDouble("AF", 0.0) / 5.0, 3))));
} catch (Exception e) {
return relevantStates;
}

View File

@ -90,8 +90,8 @@ public class Degeneracy extends VariantStratifier {
Integer frame = null;
if (eval.hasAttribute("refseq.functionalClass")) {
aa = eval.getAttributeAsString("refseq.variantAA");
frame = eval.getAttributeAsInt("refseq.frame");
aa = eval.getAttributeAsString("refseq.variantAA", null);
frame = eval.getAttributeAsInt("refseq.frame", 0);
} else if (eval.hasAttribute("refseq.functionalClass_1")) {
int annotationId = 1;
String key;
@ -99,7 +99,7 @@ public class Degeneracy extends VariantStratifier {
do {
key = String.format("refseq.functionalClass_%d", annotationId);
String newtype = eval.getAttributeAsString(key);
String newtype = eval.getAttributeAsString(key, null);
if ( newtype != null &&
( type == null ||
@ -109,13 +109,13 @@ public class Degeneracy extends VariantStratifier {
type = newtype;
String aakey = String.format("refseq.variantAA_%d", annotationId);
aa = eval.getAttributeAsString(aakey);
aa = eval.getAttributeAsString(aakey, null);
if (aa != null) {
String framekey = String.format("refseq.frame_%d", annotationId);
if (eval.hasAttribute(framekey)) {
frame = eval.getAttributeAsInt(framekey);
frame = eval.getAttributeAsInt(framekey, 0);
}
}
}

View File

@ -28,7 +28,7 @@ public class FunctionalClass extends VariantStratifier {
}
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
ArrayList<String> relevantStates = new ArrayList<String>();
relevantStates.add("all");
@ -38,7 +38,7 @@ public class FunctionalClass extends VariantStratifier {
if (eval.hasAttribute("refseq.functionalClass")) {
try {
type = FunctionalType.valueOf(eval.getAttributeAsString("refseq.functionalClass"));
type = FunctionalType.valueOf(eval.getAttributeAsString("refseq.functionalClass", null));
} catch ( Exception e ) {} // don't error out if the type isn't supported
} else if (eval.hasAttribute("refseq.functionalClass_1")) {
int annotationId = 1;
@ -47,7 +47,7 @@ public class FunctionalClass extends VariantStratifier {
do {
key = String.format("refseq.functionalClass_%d", annotationId);
String newtypeStr = eval.getAttributeAsString(key);
String newtypeStr = eval.getAttributeAsString(key, null);
if ( newtypeStr != null && !newtypeStr.equalsIgnoreCase("null") ) {
try {
FunctionalType newType = FunctionalType.valueOf(newtypeStr);

View File

@ -115,7 +115,7 @@ public class VQSRCalibrationCurve {
if ( vc.isFiltered() )
return 0.0;
else if ( vc.hasAttribute(VQSRQualKey) ) {
double qual = vc.getAttributeAsDouble(VQSRQualKey);
double qual = vc.getAttributeAsDouble(VQSRQualKey, 0.0);
return probTrueVariant(qual);
} else {
throw new UserException.VariantContextMissingRequiredField(VQSRQualKey, vc);
@ -143,7 +143,7 @@ public class VQSRCalibrationCurve {
for ( int i = 0; i < log10Likelihoods.length; i++) {
double p = Math.pow(10, log10Likelihoods[i]);
double q = alpha * p + (1-alpha) * noInfoPr;
if ( DEBUG ) System.out.printf(" vqslod = %.2f, p = %.2e, alpha = %.2e, q = %.2e%n", vc.getAttributeAsDouble(VQSRQualKey), p, alpha, q);
if ( DEBUG ) System.out.printf(" vqslod = %.2f, p = %.2e, alpha = %.2e, q = %.2e%n", vc.getAttributeAsDouble(VQSRQualKey, 0.0), p, alpha, q);
updated[i] = Math.log10(q);
}

View File

@ -155,7 +155,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
private double[] TS_TRANCHES = new double[] {100.0, 99.9, 99.0, 90.0};
@Argument(fullName="ignore_filter", shortName="ignoreFilter", doc="If specified the variant recalibrator will use variants even if the specified filter name is marked in the input VCF file", required=false)
private String[] IGNORE_INPUT_FILTERS = null;
@Argument(fullName="path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required=false)
@Argument(fullName="path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required=false)
private String PATH_TO_RSCRIPT = "Rscript";
@Argument(fullName="rscript_file", shortName="rscriptFile", doc="The output rscript file generated by the VQSR to aid in visualization of the input data and learned model", required=false)
private String RSCRIPT_FILE = null;

View File

@ -99,7 +99,7 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
final VCFHeader vcfHeader = new VCFHeader(metaData, samples);
writer = new StandardVCFWriter(file, false);
writer = new StandardVCFWriter(file, getMasterSequenceDictionary(), false);
writer.writeHeader(vcfHeader);
}

View File

@ -75,7 +75,7 @@ public class RandomlySplitVariants extends RodWalker<Integer, Integer> {
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames));
vcfWriter1.writeHeader(new VCFHeader(hInfo, samples));
vcfWriter2 = new StandardVCFWriter(file2, true);
vcfWriter2 = new StandardVCFWriter(file2, getMasterSequenceDictionary(), true);
vcfWriter2.writeHeader(new VCFHeader(hInfo, samples));
}

View File

@ -574,7 +574,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
// ok we have a comp VC and we need to match the AF spectrum of inputAFRodName.
// We then pick a variant with probablity AF*desiredFraction
if ( sub.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) ) {
String afo = sub.getAttributeAsString(VCFConstants.ALLELE_FREQUENCY_KEY);
String afo = sub.getAttributeAsString(VCFConstants.ALLELE_FREQUENCY_KEY, null);
double af;
double afBoost = 1.0;

View File

@ -192,7 +192,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
if ( getters.containsKey(field) ) {
val = getters.get(field).get(vc);
} else if ( vc.hasAttribute(field) ) {
val = vc.getAttributeAsString(field);
val = vc.getAttributeAsString(field, null);
} else if ( isWildCard(field) ) {
Set<String> wildVals = new HashSet<String>();
for ( Map.Entry<String,Object> elt : vc.getAttributes().entrySet()) {

View File

@ -9,14 +9,17 @@ import net.sf.samtools.SAMUtils;
* @author Kiran Garimella
*/
public class QualityUtils {
public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE;
public final static double MIN_REASONABLE_ERROR = 0.0001;
public final static byte MAX_REASONABLE_Q_SCORE = 40;
public final static byte MIN_USABLE_Q_SCORE = 6;
public final static int MAPPING_QUALITY_UNAVAILABLE = 255;
private static double qualToErrorProbCache[] = new double[256];
static {
for (int i = 0; i < 256; i++) qualToErrorProbCache[i] = qualToErrorProbRaw((byte)i);
}
/**
* Private constructor. No instantiating this class!
*/
@ -33,10 +36,6 @@ public class QualityUtils {
return 1.0 - qualToErrorProb(qual);
}
static public double qualToProb(int qual) {
return qualToProb( (double)qual );
}
static public double qualToProb(double qual) {
return 1.0 - Math.pow(10.0, qual/(-10.0));
}
@ -48,10 +47,14 @@ public class QualityUtils {
* @param qual a quality score (0-40)
* @return a probability (0.0-1.0)
*/
static public double qualToErrorProb(byte qual) {
static public double qualToErrorProbRaw(byte qual) {
return Math.pow(10.0, ((double) qual)/-10.0);
}
static public double qualToErrorProb(byte qual) {
return qualToErrorProbCache[qual];
}
/**
* Convert a probability to a quality score. Note, this is capped at Q40.
*
@ -110,88 +113,4 @@ public class QualityUtils {
//return (byte) Math.min(qual, maxQual);
return (byte) Math.max(Math.min(qual, maxQual), 1);
}
/**
* Compress a base and a probability into a single byte so that it can be output in a SAMRecord's SQ field.
* Note: the highest probability this function can encode is 64%, so this function should only never be used on the best base hypothesis.
* Another note: the probability encoded here gets rounded to the nearest 1%.
*
* @param baseIndex the base index
* @param prob the base probability
* @return a byte containing the index and the probability
*/
static public byte baseAndProbToCompressedQuality(int baseIndex, double prob) {
byte compressedQual = 0;
compressedQual = (byte) baseIndex;
byte cprob = (byte) (100.0*prob);
byte qualmask = (byte) 252;
compressedQual += ((cprob << 2) & qualmask);
return compressedQual;
}
/**
* From a compressed base, extract the base index (0:A, 1:C, 2:G, 3:T)
*
* @param compressedQual the compressed quality score, as returned by baseAndProbToCompressedQuality
* @return base index
*/
static public int compressedQualityToBaseIndex(byte compressedQual) {
return (int) (compressedQual & 0x3);
}
/**
* From a compressed base, extract the base probability
*
* @param compressedQual the compressed quality score, as returned by baseAndProbToCompressedQuality
* @return the probability
*/
static public double compressedQualityToProb(byte compressedQual) {
// Because java natives are signed, extra care must be taken to avoid
// shifting a 1 into the sign bit in the implicit promotion of 2 to an int.
int x2 = ((int) compressedQual) & 0xff;
x2 = (x2 >>> 2);
return ((double) x2)/100.0;
}
/**
* Return the complement of a compressed quality
*
* @param compressedQual the compressed quality score (as returned by baseAndProbToCompressedQuality)
* @return the complementary compressed quality
*/
static public byte complementCompressedQuality(byte compressedQual) {
int baseIndex = compressedQualityToBaseIndex(compressedQual);
double prob = compressedQualityToProb(compressedQual);
return baseAndProbToCompressedQuality(BaseUtils.complementIndex(baseIndex), prob);
}
/**
* Return the reverse complement of a byte array of compressed qualities
*
* @param compressedQuals a byte array of compressed quality scores
* @return the reverse complement of the byte array
*/
static public byte[] reverseComplementCompressedQualityArray(byte[] compressedQuals) {
byte[] rcCompressedQuals = new byte[compressedQuals.length];
for (int pos = 0; pos < compressedQuals.length; pos++) {
rcCompressedQuals[compressedQuals.length - pos - 1] = complementCompressedQuality(compressedQuals[pos]);
}
return rcCompressedQuals;
}
/**
* Return the reverse of a byte array of qualities (compressed or otherwise)
* @param quals the array of bytes to be reversed
* @return the reverse of the quality array
*/
static public byte[] reverseQualityArray( byte[] quals ) {
return Utils.reverse(quals); // no sense in duplicating functionality
}
}

View File

@ -53,7 +53,7 @@ public class RScriptExecutor {
public static class RScriptArgumentCollection {
@Advanced
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required = false)
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required = false)
public String PATH_TO_RSCRIPT = "Rscript";
@Advanced

View File

@ -240,22 +240,34 @@ public class Utils {
return ret.toString();
}
//public static String join(String separator, Collection<String> strings) {
// return join( separator, strings.toArray(new String[0]) );
//}
public static <T> String join(String separator, Collection<T> objects) {
if(objects.isEmpty()) {
/**
* Returns a string of the form elt1.toString() [sep elt2.toString() ... sep elt.toString()] for a collection of
* elti objects (note there's no actual space between sep and the elti elements). Returns
* "" if collection is empty. If collection contains just elt, then returns elt.toString()
*
* @param separator the string to use to separate objects
* @param objects a collection of objects. the element order is defined by the iterator over objects
* @param <T> the type of the objects
* @return a non-null string
*/
public static <T> String join(final String separator, final Collection<T> objects) {
if (objects.isEmpty()) { // fast path for empty collection
return "";
}
Iterator<T> iter = objects.iterator();
final StringBuilder ret = new StringBuilder(iter.next().toString());
while(iter.hasNext()) {
ret.append(separator);
ret.append(iter.next().toString());
}
} else {
final Iterator<T> iter = objects.iterator();
final T first = iter.next();
return ret.toString();
if ( ! iter.hasNext() ) // fast path for singleton collections
return first.toString();
else { // full path for 2+ collection that actually need a join
final StringBuilder ret = new StringBuilder(first.toString());
while(iter.hasNext()) {
ret.append(separator);
ret.append(iter.next().toString());
}
return ret.toString();
}
}
}
public static String dupString(char c, int nCopies) {

View File

@ -5,6 +5,7 @@ import net.sf.picard.reference.ReferenceSequence;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -131,19 +132,18 @@ public class BAQ {
private final static double EM = 0.33333333333;
private final static double EI = 0.25;
private double[][][] EPSILONS = new double[256][256][64];
private double[][][] EPSILONS = new double[256][256][SAMUtils.MAX_PHRED_SCORE+1];
private void initializeCachedData() {
for ( int i = 0; i < 256; i++ )
for ( int j = 0; j < 256; j++ )
for ( int q = 0; q < 64; q++ ) {
double qual = qual2prob[q < minBaseQual ? minBaseQual : q];
for ( int q = 0; q <= SAMUtils.MAX_PHRED_SCORE; q++ ) {
EPSILONS[i][j][q] = 1.0;
}
for ( char b1 : "ACGTacgt".toCharArray() ) {
for ( char b2 : "ACGTacgt".toCharArray() ) {
for ( int q = 0; q < 64; q++ ) {
for ( int q = 0; q <= SAMUtils.MAX_PHRED_SCORE; q++ ) {
double qual = qual2prob[q < minBaseQual ? minBaseQual : q];
double e = Character.toLowerCase(b1) == Character.toLowerCase(b2) ? 1 - qual : qual * EM;
EPSILONS[(byte)b1][(byte)b2][q] = e;
@ -152,7 +152,7 @@ public class BAQ {
}
}
private double calcEpsilon( byte ref, byte read, byte qualB ) {
protected double calcEpsilon( byte ref, byte read, byte qualB ) {
return EPSILONS[ref][read][qualB];
}

View File

@ -324,6 +324,8 @@ public class ClippingOp {
if (index <= stop && cigarElementIterator.hasNext())
cigarElement = cigarElementIterator.next();
else
break;
}
// add the remaining cigar elements
@ -363,6 +365,8 @@ public class ClippingOp {
index += shift;
if (index < start && cigarElementIterator.hasNext())
cigarElement = cigarElementIterator.next();
else
break;
}
// check if we are hard clipping indels

View File

@ -1,6 +1,8 @@
package org.broadinstitute.sting.utils.clipreads;
import com.google.java.contract.Requires;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -56,17 +58,34 @@ public class ReadClipper {
return hardClipByReferenceCoordinates(refStart, -1);
}
private int numDeletions(SAMRecord read) {
int result = 0;
for (CigarElement e: read.getCigar().getCigarElements()) {
if ( e.getOperator() == CigarOperator.DELETION || e.getOperator() == CigarOperator.D )
result =+ e.getLength();
}
return result;
}
private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart);
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop);
int stop = (refStop < 0) ? read.getReadLength() - 1: ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop);
if (start < 0 || stop > read.getReadLength() - 1)
if (start < 0 || stop > read.getReadLength() - 1 + numDeletions(read))
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
// TODO add requires statement/check in the Hardclip function
// TODO add check in the Hardclip function
if ( start > stop )
stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read));
//This tries to fix the bug where the deletion is counted a read base and as a result, the hardCLipper runs into
//an endless loop when hard clipping the cigar string because the read coordinates are not covered by the read
stop -= numDeletions(read);
if ( start > stop )
start -= numDeletions(read);
//System.out.println("Clipping start/stop: " + start + "/" + stop);
this.addOp(new ClippingOp(start, stop));
SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);

View File

@ -12,19 +12,35 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
import java.util.ArrayList;
/**
* TODO FOR CHRIS HARTL
* Allows for reading in RefSeq information
*
* <p>
* Codec Description
* Parses a sorted UCSC RefSeq file (see below) into relevant features: the gene name, the unique gene name (if multiple transcrips get separate entries), exons, gene start/stop, coding start/stop,
* strandedness of transcription.
* </p>
*
* <p>
* See also: link to file specification
* Instructions for generating a RefSeq file for use with the RefSeq codec can be found on the Wiki here
* <a href="http://www.broadinstitute.org/gsa/wiki/index.php/RefSeq">http://www.broadinstitute.org/gsa/wiki/index.php/RefSeq</a>
* </p>
* <h2> Usage </h2>
* The RefSeq Rod can be bound as any other rod, and is specified by REFSEQ, for example
* <pre>
* -refSeqBinding:REFSEQ /path/to/refSeq.txt
* </pre>
*
* You will need to consult individual walkers for the binding name ("refSeqBinding", above)
*
* <h2>File format example</h2>
* If you want to define your own file for use, the format is (tab delimited):
* bin, name, chrom, strand, transcription start, transcription end, coding start, coding end, num exons, exon starts, exon ends, id, alt. name, coding start status (complete/incomplete), coding end status (complete,incomplete)
* and exon frames, for example:
* <pre>
* 76 NM_001011874 1 - 3204562 3661579 3206102 3661429 3 3204562,3411782,3660632, 3207049,3411982,3661579, 0 Xkr4 cmpl cmpl 1,2,0,
* </pre>
* for more information see <a href="http://skip.ucsc.edu/cgi-bin/hgTables?hgsid=5651&hgta_doSchemaDb=mm8&hgta_doSchemaTable=refGene">here</a>
* <p>
* A BAM file containing <b>exactly one sample</b>.
*
* </p>
*
* @author Mark DePristo

View File

@ -6,6 +6,7 @@ import org.broad.tribble.FeatureCodec;
import org.broad.tribble.NameAwareCodec;
import org.broad.tribble.TribbleException;
import org.broad.tribble.readers.LineReader;
import org.broad.tribble.util.BlockCompressedInputStream;
import org.broad.tribble.util.ParsingUtils;
import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -215,7 +216,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
int nParts = ParsingUtils.split(line, parts, VCFConstants.FIELD_SEPARATOR_CHAR, true);
// if we have don't have a header, or we have a header with no genotyping data check that we have eight columns. Otherwise check that we have nine (normal colummns + genotyping data)
if (( (header == null || (header != null && !header.hasGenotypingData())) && nParts != NUM_STANDARD_FIELDS) ||
if (( (header == null || !header.hasGenotypingData()) && nParts != NUM_STANDARD_FIELDS) ||
(header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) )
throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) +
" tokens, and saw " + nParts + " )", lineNo);
@ -227,7 +228,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
throw new UserException.MalformedVCF(message, lineNo);
}
private static void generateException(String message, int lineNo) {
protected static void generateException(String message, int lineNo) {
throw new UserException.MalformedVCF(message, lineNo);
}
@ -345,6 +346,9 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
generateException("The VCF specification requires a valid info field");
if ( !infoField.equals(VCFConstants.EMPTY_INFO_FIELD) ) {
if ( infoField.indexOf("\t") != -1 || infoField.indexOf(" ") != -1 )
generateException("The VCF specification does not allow for whitespace in the INFO field");
int infoValueSplitSize = ParsingUtils.split(infoField, infoValueArray, VCFConstants.INFO_FIELD_SEPARATOR_CHAR);
for (int i = 0; i < infoValueSplitSize; i++) {
String key;
@ -587,7 +591,8 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
public final static boolean canDecodeFile(final File potentialInput, final String MAGIC_HEADER_LINE) {
try {
return isVCFStream(new FileInputStream(potentialInput), MAGIC_HEADER_LINE) ||
isVCFStream(new GZIPInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE);
isVCFStream(new GZIPInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE) ||
isVCFStream(new BlockCompressedInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE);
} catch ( FileNotFoundException e ) {
return false;
} catch ( IOException e ) {
@ -598,12 +603,17 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
private final static boolean isVCFStream(final InputStream stream, final String MAGIC_HEADER_LINE) {
try {
byte[] buff = new byte[MAGIC_HEADER_LINE.length()];
stream.read(buff, 0, MAGIC_HEADER_LINE.length());
String firstLine = new String(buff);
stream.close();
return firstLine.startsWith(MAGIC_HEADER_LINE);
int nread = stream.read(buff, 0, MAGIC_HEADER_LINE.length());
boolean eq = Arrays.equals(buff, MAGIC_HEADER_LINE.getBytes());
return eq;
// String firstLine = new String(buff);
// return firstLine.startsWith(MAGIC_HEADER_LINE);
} catch ( IOException e ) {
return false;
} catch ( RuntimeException e ) {
return false;
} finally {
try { stream.close(); } catch ( IOException e ) {}
}
}
}

View File

@ -0,0 +1,144 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.codecs.vcf;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.SAMSequenceDictionary;
import org.broad.tribble.Tribble;
import org.broad.tribble.TribbleException;
import org.broad.tribble.index.DynamicIndexCreator;
import org.broad.tribble.index.Index;
import org.broad.tribble.index.IndexFactory;
import org.broad.tribble.util.LittleEndianOutputStream;
import org.broad.tribble.util.PositionalStream;
import org.broadinstitute.sting.gatk.refdata.tracks.IndexDictionaryUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.*;
/**
* this class writes VCF files
*/
public abstract class IndexingVCFWriter implements VCFWriter {
final private String name;
private final SAMSequenceDictionary refDict;
private OutputStream outputStream;
private PositionalStream positionalStream = null;
private DynamicIndexCreator indexer = null;
private LittleEndianOutputStream idxStream = null;
@Requires({"name != null",
"! ( location == null && output == null )",
"! ( enableOnTheFlyIndexing && location == null )"})
protected IndexingVCFWriter(final String name, final File location, final OutputStream output, final SAMSequenceDictionary refDict, final boolean enableOnTheFlyIndexing) {
outputStream = output;
this.name = name;
this.refDict = refDict;
if ( enableOnTheFlyIndexing ) {
try {
idxStream = new LittleEndianOutputStream(new FileOutputStream(Tribble.indexFile(location)));
//System.out.println("Creating index on the fly for " + location);
indexer = new DynamicIndexCreator(IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
indexer.initialize(location, indexer.defaultBinSize());
positionalStream = new PositionalStream(output);
outputStream = positionalStream;
} catch ( IOException ex ) {
// No matter what we keep going, since we don't care if we can't create the index file
idxStream = null;
indexer = null;
positionalStream = null;
}
}
}
@Ensures("result != null")
public OutputStream getOutputStream() {
return outputStream;
}
@Ensures("result != null")
public String getStreamName() {
return name;
}
public abstract void writeHeader(VCFHeader header);
/**
* attempt to close the VCF file
*/
public void close() {
// try to close the index stream (keep it separate to help debugging efforts)
if ( indexer != null ) {
try {
Index index = indexer.finalizeIndex(positionalStream.getPosition());
IndexDictionaryUtils.setIndexSequenceDictionary(index, refDict);
index.write(idxStream);
idxStream.close();
} catch (IOException e) {
throw new ReviewedStingException("Unable to close index for " + getStreamName(), e);
}
}
}
/**
* add a record to the file
*
* @param vc the Variant Context object
*/
public void add(VariantContext vc) {
// if we are doing on the fly indexing, add the record ***before*** we write any bytes
if ( indexer != null )
indexer.addFeature(vc, positionalStream.getPosition());
}
/**
* Returns a reasonable "name" for this writer, to display to the user if something goes wrong
*
* @param location
* @param stream
* @return
*/
protected static final String writerName(final File location, final OutputStream stream) {
return location == null ? stream.toString() : location.getAbsolutePath();
}
/**
* Returns a output stream writing to location, or throws a UserException if this fails
* @param location
* @return
*/
protected static OutputStream openOutputStream(final File location) {
try {
return new FileOutputStream(location);
} catch (FileNotFoundException e) {
throw new UserException.CouldNotCreateOutputFile(location, "Unable to create VCF writer", e);
}
}
}

View File

@ -24,6 +24,7 @@
package org.broadinstitute.sting.utils.codecs.vcf;
import net.sf.samtools.SAMSequenceDictionary;
import org.broad.tribble.Tribble;
import org.broad.tribble.TribbleException;
import org.broad.tribble.index.DynamicIndexCreator;
@ -44,46 +45,30 @@ import java.util.*;
/**
* this class writes VCF files
*/
public class StandardVCFWriter implements VCFWriter {
public class StandardVCFWriter extends IndexingVCFWriter {
// the print stream we're writing to
final protected BufferedWriter mWriter;
// should we write genotypes or just sites?
final protected boolean doNotWriteGenotypes;
// the VCF header we're storing
protected VCFHeader mHeader = null;
// the print stream we're writing to
protected BufferedWriter mWriter;
protected PositionalStream positionalStream = null;
// were filters applied?
protected boolean filtersWereAppliedToContext = false;
// should we write genotypes or just sites?
protected boolean doNotWriteGenotypes = false;
protected DynamicIndexCreator indexer = null;
protected File indexFile = null;
LittleEndianOutputStream idxStream = null;
File location = null;
/**
* create a VCF writer, given a file to write to
*
* @param location the file location to write to
*/
public StandardVCFWriter(File location) {
this(location, openOutputStream(location), true, false);
public StandardVCFWriter(final File location, final SAMSequenceDictionary refDict) {
this(location, openOutputStream(location), refDict, true, false);
}
public StandardVCFWriter(File location, boolean enableOnTheFlyIndexing) {
this(location, openOutputStream(location), enableOnTheFlyIndexing, false);
}
/**
* create a VCF writer, given a stream to write to
*
* @param output the file location to write to
*/
public StandardVCFWriter(OutputStream output) {
this(output, false);
public StandardVCFWriter(File location, final SAMSequenceDictionary refDict, boolean enableOnTheFlyIndexing) {
this(location, openOutputStream(location), refDict, enableOnTheFlyIndexing, false);
}
/**
@ -92,33 +77,23 @@ public class StandardVCFWriter implements VCFWriter {
* @param output the file location to write to
* @param doNotWriteGenotypes do not write genotypes
*/
public StandardVCFWriter(OutputStream output, boolean doNotWriteGenotypes) {
mWriter = new BufferedWriter(new OutputStreamWriter(output));
public StandardVCFWriter(final OutputStream output, final SAMSequenceDictionary refDict, final boolean doNotWriteGenotypes) {
this(null, output, refDict, false, doNotWriteGenotypes);
}
public StandardVCFWriter(final File location, final OutputStream output, final SAMSequenceDictionary refDict, final boolean enableOnTheFlyIndexing, boolean doNotWriteGenotypes) {
super(writerName(location, output), location, output, refDict, enableOnTheFlyIndexing);
mWriter = new BufferedWriter(new OutputStreamWriter(getOutputStream())); // todo -- fix buffer size
this.doNotWriteGenotypes = doNotWriteGenotypes;
}
public StandardVCFWriter(File location, OutputStream output, boolean enableOnTheFlyIndexing, boolean doNotWriteGenotypes) {
this.location = location;
if ( enableOnTheFlyIndexing ) {
indexFile = Tribble.indexFile(location);
try {
idxStream = new LittleEndianOutputStream(new FileOutputStream(indexFile));
//System.out.println("Creating index on the fly for " + location);
indexer = new DynamicIndexCreator(IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
indexer.initialize(location, indexer.defaultBinSize());
positionalStream = new PositionalStream(output);
output = positionalStream;
} catch ( IOException ex ) {
// No matter what we keep going, since we don't care if we can't create the index file
}
}
//mWriter = new BufferedWriter(new OutputStreamWriter(new PositionalStream(output)));
mWriter = new BufferedWriter(new OutputStreamWriter(output));
this.doNotWriteGenotypes = doNotWriteGenotypes;
}
// --------------------------------------------------------------------------------
//
// VCFWriter interface functions
//
// --------------------------------------------------------------------------------
@Override
public void writeHeader(VCFHeader header) {
mHeader = doNotWriteGenotypes ? new VCFHeader(header.getMetaData()) : header;
@ -158,44 +133,24 @@ public class StandardVCFWriter implements VCFWriter {
mWriter.flush(); // necessary so that writing to an output stream will work
}
catch (IOException e) {
throw new TribbleException("IOException writing the VCF header to " + locationString(), e);
throw new ReviewedStingException("IOException writing the VCF header to " + getStreamName(), e);
}
}
private String locationString() {
return location == null ? mWriter.toString() : location.getAbsolutePath();
}
/**
* attempt to close the VCF file
*/
@Override
public void close() {
// try to close the vcf stream
try {
mWriter.flush();
mWriter.close();
} catch (IOException e) {
throw new TribbleException("Unable to close " + locationString() + " because of " + e.getMessage());
throw new ReviewedStingException("Unable to close " + getStreamName(), e);
}
// try to close the index stream (keep it separate to help debugging efforts)
if ( indexer != null ) {
try {
Index index = indexer.finalizeIndex(positionalStream.getPosition());
index.write(idxStream);
idxStream.close();
} catch (IOException e) {
throw new TribbleException("Unable to close index for " + locationString() + " because of " + e.getMessage());
}
}
}
protected static OutputStream openOutputStream(File location) {
try {
return new FileOutputStream(location);
} catch (FileNotFoundException e) {
throw new TribbleException("Unable to create VCF file at location: " + location);
}
super.close();
}
/**
@ -203,28 +158,17 @@ public class StandardVCFWriter implements VCFWriter {
*
* @param vc the Variant Context object
*/
@Override
public void add(VariantContext vc) {
add(vc, false);
}
/**
* add a record to the file
*
* @param vc the Variant Context object
* @param refBaseShouldBeAppliedToEndOfAlleles *** THIS SHOULD BE FALSE EXCEPT FOR AN INDEL AT THE EXTREME BEGINNING OF A CONTIG (WHERE THERE IS NO PREVIOUS BASE, SO WE USE THE BASE AFTER THE EVENT INSTEAD)
*/
public void add(VariantContext vc, boolean refBaseShouldBeAppliedToEndOfAlleles) {
if ( mHeader == null )
throw new IllegalStateException("The VCF Header must be written before records can be added: " + locationString());
throw new IllegalStateException("The VCF Header must be written before records can be added: " + getStreamName());
if ( doNotWriteGenotypes )
vc = VariantContext.modifyGenotypes(vc, null);
try {
vc = VariantContext.createVariantContextWithPaddedAlleles(vc, refBaseShouldBeAppliedToEndOfAlleles);
// if we are doing on the fly indexing, add the record ***before*** we write any bytes
if ( indexer != null ) indexer.addFeature(vc, positionalStream.getPosition());
vc = VariantContext.createVariantContextWithPaddedAlleles(vc, false);
super.add(vc);
Map<Allele, String> alleleMap = new HashMap<Allele, String>(vc.getAlleles().size());
alleleMap.put(Allele.NO_CALL, VCFConstants.EMPTY_ALLELE); // convenience for lookup
@ -275,7 +219,7 @@ public class StandardVCFWriter implements VCFWriter {
mWriter.write(VCFConstants.FIELD_SEPARATOR);
// FILTER
String filters = vc.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(vc.getFilters())) : (filtersWereAppliedToContext || vc.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
String filters = getFilterString(vc, filtersWereAppliedToContext);
mWriter.write(filters);
mWriter.write(VCFConstants.FIELD_SEPARATOR);
@ -317,9 +261,22 @@ public class StandardVCFWriter implements VCFWriter {
mWriter.write("\n");
mWriter.flush(); // necessary so that writing to an output stream will work
} catch (IOException e) {
throw new RuntimeException("Unable to write the VCF object to " + locationString());
throw new RuntimeException("Unable to write the VCF object to " + getStreamName());
}
}
// --------------------------------------------------------------------------------
//
// implementation functions
//
// --------------------------------------------------------------------------------
public static final String getFilterString(final VariantContext vc) {
return getFilterString(vc, false);
}
public static final String getFilterString(final VariantContext vc, boolean forcePASS) {
return vc.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(vc.getFilters())) : (forcePASS || vc.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
}
private String getQualValue(double qual) {
@ -462,7 +419,7 @@ public class StandardVCFWriter implements VCFWriter {
mWriter.write(encoding);
}
private static String formatVCFField(Object val) {
public static String formatVCFField(Object val) {
String result;
if ( val == null )
result = VCFConstants.MISSING_VALUE_v4;
@ -524,12 +481,11 @@ public class StandardVCFWriter implements VCFWriter {
}
public static int countOccurrences(char c, String s) {
private static int countOccurrences(char c, String s) {
int count = 0;
for (int i = 0; i < s.length(); i++) {
count += s.charAt(i) == c ? 1 : 0;
}
return count;
}
}

View File

@ -105,34 +105,37 @@ public class VCFCodec extends AbstractVCFCodec {
* @return a set of the filters applied or null if filters were not applied to the record (e.g. as per the missing value in a VCF)
*/
protected Set<String> parseFilters(String filterString) {
return parseFilters(filterHash, lineNo, filterString);
}
public static Set<String> parseFilters(final Map<String, LinkedHashSet<String>> cache, final int lineNo, final String filterString) {
// null for unfiltered
if ( filterString.equals(VCFConstants.UNFILTERED) )
return null;
// empty set for passes filters
LinkedHashSet<String> fFields = new LinkedHashSet<String>();
if ( filterString.equals(VCFConstants.PASSES_FILTERS_v4) )
return fFields;
return Collections.emptySet();
if ( filterString.equals(VCFConstants.PASSES_FILTERS_v3) )
generateException(VCFConstants.PASSES_FILTERS_v3 + " is an invalid filter name in vcf4");
generateException(VCFConstants.PASSES_FILTERS_v3 + " is an invalid filter name in vcf4", lineNo);
if ( filterString.length() == 0 )
generateException("The VCF specification requires a valid filter status");
generateException("The VCF specification requires a valid filter status: filter was " + filterString, lineNo);
// do we have the filter string cached?
if ( filterHash.containsKey(filterString) )
return filterHash.get(filterString);
if ( cache != null && cache.containsKey(filterString) )
return Collections.unmodifiableSet(cache.get(filterString));
// empty set for passes filters
LinkedHashSet<String> fFields = new LinkedHashSet<String>();
// otherwise we have to parse and cache the value
if ( filterString.indexOf(VCFConstants.FILTER_CODE_SEPARATOR) == -1 )
fFields.add(filterString);
else
fFields.addAll(Arrays.asList(filterString.split(VCFConstants.FILTER_CODE_SEPARATOR)));
filterHash.put(filterString, fFields);
fFields = fFields;
if ( cache != null ) cache.put(filterString, fFields);
return fFields;
return Collections.unmodifiableSet(fFields);
}

View File

@ -0,0 +1,256 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.gcf;
import org.broadinstitute.sting.utils.codecs.vcf.StandardVCFWriter;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.*;
import java.util.*;
/**
* GATK binary VCF record
*
* @author Your Name
* @since Date created
*/
public class GCF {
private final static int RECORD_TERMINATOR = 123456789;
private int chromOffset;
private int start, stop;
private String id;
private List<Allele> alleleMap;
private int alleleOffsets[];
private float qual;
private byte refPad;
private String info;
private int filterOffset;
private List<GCFGenotype> genotypes = Collections.emptyList();
public GCF(final GCFHeaderBuilder GCFHeaderBuilder, final VariantContext vc, boolean skipGenotypes) {
chromOffset = GCFHeaderBuilder.encodeString(vc.getChr());
start = vc.getStart();
stop = vc.getEnd();
refPad = vc.hasReferenceBaseForIndel() ? vc.getReferenceBaseForIndel() : 0;
id = vc.getID();
// encode alleles
alleleMap = new ArrayList<Allele>(vc.getNAlleles());
alleleOffsets = new int[vc.getNAlleles()];
alleleMap.add(vc.getReference());
alleleOffsets[0] = GCFHeaderBuilder.encodeAllele(vc.getReference());
for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) {
alleleMap.add(vc.getAlternateAllele(i));
alleleOffsets[i+1] = GCFHeaderBuilder.encodeAllele(vc.getAlternateAllele(i));
}
qual = (float)vc.getNegLog10PError(); //qualToByte(vc.getPhredScaledQual());
info = infoFieldString(vc, GCFHeaderBuilder);
filterOffset = GCFHeaderBuilder.encodeString(StandardVCFWriter.getFilterString(vc));
if ( ! skipGenotypes ) {
genotypes = encodeGenotypes(GCFHeaderBuilder, vc);
}
}
public GCF(DataInputStream inputStream, boolean skipGenotypes) throws IOException, EOFException {
chromOffset = inputStream.readInt();
// have we reached the footer?
if ( chromOffset == GCFHeader.FOOTER_START_MARKER )
throw new EOFException();
start = inputStream.readInt();
stop = inputStream.readInt();
id = inputStream.readUTF();
refPad = inputStream.readByte();
alleleOffsets = readIntArray(inputStream);
qual = inputStream.readFloat();
info = inputStream.readUTF();
filterOffset = inputStream.readInt();
int nGenotypes = inputStream.readInt();
int sizeOfGenotypes = inputStream.readInt();
if ( skipGenotypes ) {
genotypes = Collections.emptyList();
inputStream.skipBytes(sizeOfGenotypes);
} else {
genotypes = new ArrayList<GCFGenotype>(nGenotypes);
for ( int i = 0; i < nGenotypes; i++ )
genotypes.add(new GCFGenotype(this, inputStream));
}
int recordDone = inputStream.readInt();
if ( recordDone != RECORD_TERMINATOR )
throw new UserException.MalformedFile("Record not terminated by RECORD_TERMINATOR key");
}
public int write(DataOutputStream outputStream) throws IOException {
int startSize = outputStream.size();
outputStream.writeInt(chromOffset);
outputStream.writeInt(start);
outputStream.writeInt(stop);
outputStream.writeUTF(id);
outputStream.writeByte(refPad);
writeIntArray(alleleOffsets, outputStream, true);
outputStream.writeFloat(qual);
outputStream.writeUTF(info);
outputStream.writeInt(filterOffset);
int nGenotypes = genotypes.size();
int expectedSizeOfGenotypes = nGenotypes == 0 ? 0 : genotypes.get(0).sizeInBytes() * nGenotypes;
outputStream.writeInt(nGenotypes);
outputStream.writeInt(expectedSizeOfGenotypes);
int obsSizeOfGenotypes = 0;
for ( GCFGenotype g : genotypes )
obsSizeOfGenotypes += g.write(outputStream);
if ( obsSizeOfGenotypes != expectedSizeOfGenotypes )
throw new RuntimeException("Expect and observed genotype sizes disagree! expect = " + expectedSizeOfGenotypes + " obs =" + obsSizeOfGenotypes);
outputStream.writeInt(RECORD_TERMINATOR);
return outputStream.size() - startSize;
}
public VariantContext decode(final String source, final GCFHeader header) {
final String contig = header.getString(chromOffset);
alleleMap = header.getAlleles(alleleOffsets);
double negLog10PError = qual; // QualityUtils.qualToErrorProb(qual);
Set<String> filters = header.getFilters(filterOffset);
Map<String, Object> attributes = new HashMap<String, Object>();
attributes.put("INFO", info);
Byte refPadByte = refPad == 0 ? null : refPad;
Map<String, Genotype> genotypes = decodeGenotypes(header);
return new VariantContext(source, contig, start, stop, alleleMap, genotypes, negLog10PError, filters, attributes, refPadByte);
}
private Map<String, Genotype> decodeGenotypes(final GCFHeader header) {
if ( genotypes.isEmpty() )
return VariantContext.NO_GENOTYPES;
else {
Map<String, Genotype> map = new TreeMap<String, Genotype>();
for ( int i = 0; i < genotypes.size(); i++ ) {
final String sampleName = header.getSample(i);
final Genotype g = genotypes.get(i).decode(sampleName, header, this, alleleMap);
map.put(sampleName, g);
}
return map;
}
}
private List<GCFGenotype> encodeGenotypes(final GCFHeaderBuilder GCFHeaderBuilder, final VariantContext vc) {
int nGenotypes = vc.getNSamples();
if ( nGenotypes > 0 ) {
List<GCFGenotype> genotypes = new ArrayList<GCFGenotype>(nGenotypes);
for ( int i = 0; i < nGenotypes; i++ ) genotypes.add(null);
for ( Genotype g : vc.getGenotypes().values() ) {
int i = GCFHeaderBuilder.encodeSample(g.getSampleName());
genotypes.set(i, new GCFGenotype(GCFHeaderBuilder, alleleMap, g));
}
return genotypes;
} else {
return Collections.emptyList();
}
}
public int getNAlleles() { return alleleOffsets.length; }
private final String infoFieldString(VariantContext vc, final GCFHeaderBuilder GCFHeaderBuilder) {
StringBuilder s = new StringBuilder();
boolean first = true;
for ( Map.Entry<String, Object> field : vc.getAttributes().entrySet() ) {
String key = field.getKey();
if ( key.equals(VariantContext.ID_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_MAP_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY) )
continue;
int stringIndex = GCFHeaderBuilder.encodeString(key);
String outputValue = StandardVCFWriter.formatVCFField(field.getValue());
if ( outputValue != null ) {
if ( ! first ) s.append(";");
s.append(stringIndex).append("=").append(outputValue);
first = false;
}
}
return s.toString();
}
protected final static int BUFFER_SIZE = 1048576; // 2**20
public static DataInputStream createDataInputStream(final InputStream stream) {
return new DataInputStream(new BufferedInputStream(stream, BUFFER_SIZE));
}
public static FileInputStream createFileInputStream(final File file) throws FileNotFoundException {
return new FileInputStream(file);
}
protected final static int[] readIntArray(final DataInputStream inputStream) throws IOException {
return readIntArray(inputStream, inputStream.readInt());
}
protected final static int[] readIntArray(final DataInputStream inputStream, int size) throws IOException {
int[] array = new int[size];
for ( int i = 0; i < array.length; i++ )
array[i] = inputStream.readInt();
return array;
}
protected final static void writeIntArray(int[] array, final DataOutputStream outputStream, boolean writeSize) throws IOException {
if ( writeSize ) outputStream.writeInt(array.length);
for ( int i : array )
outputStream.writeInt(i);
}
protected final static byte[] readByteArray(final DataInputStream inputStream) throws IOException {
return readByteArray(inputStream, inputStream.readInt());
}
protected final static byte[] readByteArray(final DataInputStream inputStream, int size) throws IOException {
byte[] array = new byte[size];
for ( int i = 0; i < array.length; i++ )
array[i] = inputStream.readByte();
return array;
}
protected final static void writeByteArray(byte[] array, final DataOutputStream outputStream, boolean writeSize) throws IOException {
if ( writeSize ) outputStream.writeInt(array.length);
for ( byte i : array )
outputStream.writeByte(i);
}
protected final static byte qualToByte(double phredScaledQual) {
return (byte)Math.round(Math.min(phredScaledQual, 255));
}
}

View File

@ -0,0 +1,147 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.gcf;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.*;
/**
* GATK binary VCF record
*
* @author Your Name
* @since Date created
*/
public class GCFGenotype {
private byte gq;
private int gt;
private int dp;
private int ad[];
private byte[] pl;
// todo -- what to do about phasing? Perhaps we shouldn't support it
// todo -- is the FL field generic or just a flag? Should we even support per sample filtering?
public GCFGenotype(final GCFHeaderBuilder GCFHeaderBuilder, final List<Allele> allAlleles, Genotype genotype) {
gq = GCF.qualToByte(genotype.getPhredScaledQual());
gt = encodeAlleles(genotype.getAlleles(), allAlleles);
dp = genotype.getAttributeAsInt("DP", 0);
int nAlleles = allAlleles.size();
ad = new int[nAlleles];
int npls = nAllelesToNPls(nAlleles);
pl = new byte[npls];
}
private int nAllelesToNPls( int nAlleles ) {
return nAlleles*(nAlleles+1) / 2;
}
public GCFGenotype(GCF GCF, DataInputStream inputStream) throws IOException {
int gqInt = inputStream.readUnsignedByte();
gq = (byte)gqInt;
gt = inputStream.readInt();
dp = inputStream.readInt();
ad = GCF.readIntArray(inputStream, GCF.getNAlleles());
pl = GCF.readByteArray(inputStream, nAllelesToNPls(GCF.getNAlleles()));
}
// 2 alleles => 1 + 8 + 8 + 3 => 20
protected int sizeInBytes() {
return 1 // gq
+ 4 * 2 // gt + dp
+ 4 * ad.length // ad
+ 1 * pl.length; // pl
}
public Genotype decode(final String sampleName, final GCFHeader header, GCF GCF, List<Allele> alleleIndex) {
final List<Allele> alleles = decodeAlleles(gt, alleleIndex);
final double negLog10PError = gq / 10.0;
final Set<String> filters = Collections.emptySet();
final Map<String, Object> attributes = new HashMap<String, Object>();
attributes.put("DP", dp);
attributes.put("AD", ad);
attributes.put("PL", pl);
return new Genotype(sampleName, alleles, negLog10PError, filters, attributes, false);
}
private static int encodeAlleles(List<Allele> gtList, List<Allele> allAlleles) {
final int nAlleles = gtList.size();
if ( nAlleles > 4 )
throw new IllegalArgumentException("encodeAlleles doesn't support more than 4 alt alleles, but I saw " + gtList);
int gtInt = 0;
for ( int i = 0; i < nAlleles ; i++ ) {
final int bitOffset = i * 8;
final int allelei = getAlleleIndex(gtList.get(i), allAlleles);
final int gti = (allelei + 1) << bitOffset;
gtInt = gtInt | gti;
}
return gtInt;
}
private static int getAlleleIndex(Allele q, List<Allele> allAlleles) {
if ( q.isNoCall() )
return 254;
for ( int i = 0; i < allAlleles.size(); i++ )
if ( q.equals(allAlleles.get(i)) )
return i;
throw new IllegalStateException("getAlleleIndex passed allele not in map! allele " + q + " allAlleles " + allAlleles);
}
private static List<Allele> decodeAlleles(int gtInt, List<Allele> alleleIndex) {
List<Allele> alleles = new ArrayList<Allele>(4);
for ( int i = 0; i < 32; i += 8 ) {
final int gi = (gtInt & (0x000000FF << i)) >> i;
if ( gi != 0 ) {
final int allelei = gi - 1;
alleles.add( allelei == 254 ? Allele.NO_CALL : alleleIndex.get(allelei) );
} else {
break;
}
}
return alleles;
}
public int write(DataOutputStream outputStream) throws IOException {
int startSize = outputStream.size();
outputStream.writeByte(gq);
outputStream.writeInt(gt);
outputStream.writeInt(dp);
GCF.writeIntArray(ad, outputStream, false);
GCF.writeByteArray(pl, outputStream, false);
return outputStream.size() - startSize;
}
}

View File

@ -0,0 +1,205 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.gcf;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import java.io.*;
import java.util.*;
/**
* [Short one sentence description of this walker]
* <p/>
* <p>
* [Functionality of this walker]
* </p>
* <p/>
* <h2>Input</h2>
* <p>
* [Input description]
* </p>
* <p/>
* <h2>Output</h2>
* <p>
* [Output description]
* </p>
* <p/>
* <h2>Examples</h2>
* <pre>
* java
* -jar GenomeAnalysisTK.jar
* -T $WalkerName
* </pre>
*
* @author Your Name
* @since Date created
*/
public class GCFHeader {
final protected static Logger logger = Logger.getLogger(GCFHeader.class);
public final static int GCF_VERSION = 1;
public final static byte[] GCF_FILE_START_MARKER = "GCF\1".getBytes();
public final static int FOOTER_START_MARKER = -1;
public final static long HEADER_FORWARD_REFERENCE_OFFSET = GCF_FILE_START_MARKER.length + 4; // for the version
final int version;
long footerPosition;
final List<Allele> alleles;
final List<String> strings;
final List<String> samples;
final List<Set<String>> filters;
public GCFHeader(final Map<Allele, Integer> allelesIn, final Map<String, Integer> stringIn, final Map<String, Integer> samplesIn) {
version = GCF_VERSION;
footerPosition = 0;
this.alleles = linearize(allelesIn);
this.strings = linearize(stringIn);
this.samples = linearize(samplesIn);
this.filters = null; // not used with this constructor
}
public GCFHeader(FileInputStream fileInputStream) throws IOException {
DataInputStream inputStream = new DataInputStream(fileInputStream);
byte[] headerTest = new byte[GCF_FILE_START_MARKER.length];
inputStream.read(headerTest);
if ( ! Arrays.equals(headerTest, GCF_FILE_START_MARKER) ) {
throw new UserException("Could not read GVCF file. GCF_FILE_START_MARKER missing. Saw " + new String(headerTest));
} else {
version = inputStream.readInt();
logger.info("Read GCF version " + version);
footerPosition = inputStream.readLong();
logger.info("Read footer position of " + footerPosition);
long lastPos = fileInputStream.getChannel().position();
logger.info(" Last position is " + lastPos);
// seek to the footer
fileInputStream.getChannel().position(footerPosition);
if ( inputStream.readInt() != FOOTER_START_MARKER )
throw new UserException.MalformedFile("Malformed GCF file: couldn't find the footer marker");
alleles = stringsToAlleles(readStrings(inputStream));
strings = readStrings(inputStream);
samples = readStrings(inputStream);
logger.info(String.format("Allele map of %d elements", alleles.size()));
logger.info(String.format("String map of %d elements", strings.size()));
logger.info(String.format("Sample map of %d elements", samples.size()));
filters = initializeFilterCache();
fileInputStream.getChannel().position(lastPos);
}
}
public static int writeHeader(final DataOutputStream outputStream) throws IOException {
int startBytes = outputStream.size();
outputStream.write(GCF_FILE_START_MARKER);
outputStream.writeInt(GCF_VERSION);
outputStream.writeLong(0);
return outputStream.size() - startBytes;
}
public int writeFooter(final DataOutputStream outputStream) throws IOException {
int startBytes = outputStream.size();
outputStream.writeInt(FOOTER_START_MARKER); // has to be the same as chrom encoding
write(outputStream, allelesToStrings(alleles));
write(outputStream, strings);
write(outputStream, samples);
return outputStream.size() - startBytes;
}
private void write(DataOutputStream outputStream, List<String> l) throws IOException {
outputStream.writeInt(l.size());
for ( String elt : l ) outputStream.writeUTF(elt);
}
private List<String> allelesToStrings(List<Allele> alleles) {
List<String> strings = new ArrayList<String>(alleles.size());
for ( Allele allele : alleles ) strings.add(allele.toString());
return strings;
}
private List<Set<String>> initializeFilterCache() {
// required to allow offset -> set lookup
List<Set<String>> l = new ArrayList<Set<String>>(strings.size());
for ( int i = 0; i < strings.size(); i++ ) l.add(null);
return l;
}
private static List<Allele> stringsToAlleles(final List<String> strings) {
final List<Allele> alleles = new ArrayList<Allele>(strings.size());
for ( String string : strings ) {
boolean isRef = string.endsWith("*");
if ( isRef ) string = string.substring(0, string.length() - 1);
alleles.add(Allele.create(string, isRef));
}
return alleles;
}
private static List<String> readStrings(final DataInputStream inputStream) throws IOException {
final int nStrings = inputStream.readInt();
final List<String> strings = new ArrayList<String>(nStrings);
for ( int i = 0; i < nStrings; i++ ) {
strings.add(inputStream.readUTF());
}
return strings;
}
private static <T> List<T> linearize(final Map<T, Integer> map) {
final ArrayList<T> l = new ArrayList<T>(map.size());
for ( int i = 0; i < map.size(); i++ ) l.add(null);
for ( final Map.Entry<T, Integer> elt : map.entrySet() )
l.set(elt.getValue(), elt.getKey());
return l;
}
public String getSample(final int offset) { return samples.get(offset); }
public String getString(final int offset) { return strings.get(offset); }
public Allele getAllele(final int offset) { return alleles.get(offset); }
public List<Allele> getAlleles(final int[] offsets) {
final List<Allele> alleles = new ArrayList<Allele>(offsets.length);
for ( int i : offsets ) alleles.add(getAllele(i));
return alleles;
}
public Set<String> getFilters(final int offset) {
Set<String> cached = filters.get(offset);
if ( cached != null )
return cached;
else {
final String filterString = getString(offset);
if ( filterString.equals(VCFConstants.UNFILTERED) )
return null; // UNFILTERED records are represented by null
else {
Set<String> set = VCFCodec.parseFilters(null, -1, filterString);
filters.set(offset, set); // remember the result
return set;
}
}
}
}

View File

@ -0,0 +1,80 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.gcf;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import java.util.HashMap;
import java.util.Map;
/**
* [Short one sentence description of this walker]
* <p/>
* <p>
* [Functionality of this walker]
* </p>
* <p/>
* <h2>Input</h2>
* <p>
* [Input description]
* </p>
* <p/>
* <h2>Output</h2>
* <p>
* [Output description]
* </p>
* <p/>
* <h2>Examples</h2>
* <pre>
* java
* -jar GenomeAnalysisTK.jar
* -T $WalkerName
* </pre>
*
* @author Your Name
* @since Date created
*/
public class GCFHeaderBuilder {
Map<Allele, Integer> alleles = new HashMap<Allele, Integer>();
Map<String, Integer> strings = new HashMap<String, Integer>();
Map<String, Integer> samples = new HashMap<String, Integer>();
public GCFHeader createHeader() {
return new GCFHeader(alleles, strings, samples);
}
public int encodeString(final String chr) { return encode(strings, chr); }
public int encodeAllele(final Allele allele) { return encode(alleles, allele); }
public int encodeSample(final String sampleName) { return encode(samples, sampleName); }
private <T> int encode(Map<T, Integer> map, T key) {
Integer v = map.get(key);
if ( v == null ) {
v = map.size();
map.put(key, v);
}
return v;
}
}

View File

@ -0,0 +1,123 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.gcf;
import net.sf.samtools.SAMSequenceDictionary;
import org.broadinstitute.sting.utils.codecs.vcf.IndexingVCFWriter;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.*;
/**
* GCFWriter implementing the VCFWriter interface
* @author Your Name
* @since Date created
*/
public class GCFWriter extends IndexingVCFWriter {
final boolean skipGenotypes;
final FileOutputStream fileOutputStream;
final DataOutputStream dataOutputStream;
final GCFHeaderBuilder gcfHeaderBuilder;
int nbytes = 0;
VCFHeader header = null;
File location;
// --------------------------------------------------------------------------------
//
// Constructors
//
// --------------------------------------------------------------------------------
public GCFWriter(final File location, final SAMSequenceDictionary refDict, boolean enableOnTheFlyIndexing, boolean doNotWriteGenotypes) {
super(writerName(location, null), location, null, refDict, enableOnTheFlyIndexing);
this.location = location;
this.skipGenotypes = doNotWriteGenotypes;
// write the output
try {
fileOutputStream = new FileOutputStream(location);
dataOutputStream = createDataOutputStream(fileOutputStream);
gcfHeaderBuilder = new GCFHeaderBuilder();
} catch ( FileNotFoundException e ) {
throw new UserException.CouldNotCreateOutputFile(location, e);
}
}
// --------------------------------------------------------------------------------
//
// VCFWriter interface functions
//
// --------------------------------------------------------------------------------
@Override
public void writeHeader(VCFHeader header) {
this.header = header;
try {
nbytes += GCFHeader.writeHeader(dataOutputStream);
} catch ( IOException e ) {
throw new UserException.CouldNotCreateOutputFile(getStreamName(), "Couldn't write header", e);
}
}
@Override
public void add(VariantContext vc) {
super.add(vc);
GCF gcf = new GCF(gcfHeaderBuilder, vc, skipGenotypes);
try {
nbytes += gcf.write(dataOutputStream);
} catch ( IOException e ) {
throw new UserException.CouldNotCreateOutputFile(getStreamName(), "Failed to add gcf record " + gcf + " to stream " + getStreamName(), e);
}
}
@Override
public void close() {
// todo -- write out VCF header lines
GCFHeader gcfHeader = gcfHeaderBuilder.createHeader();
try {
long headerPosition = nbytes;
nbytes += gcfHeader.writeFooter(dataOutputStream);
dataOutputStream.close();
//System.out.println("Writing forward reference to " + headerPosition);
RandomAccessFile raFile = new RandomAccessFile(location, "rw");
raFile.seek(GCFHeader.HEADER_FORWARD_REFERENCE_OFFSET);
raFile.writeLong(headerPosition);
raFile.close();
} catch ( IOException e ) {
throw new ReviewedStingException("Failed to close GCFWriter " + getStreamName(), e);
}
super.close();
}
private static final DataOutputStream createDataOutputStream(final OutputStream stream) {
return new DataOutputStream(new BufferedOutputStream(stream, GCF.BUFFER_SIZE));
}
}

View File

@ -118,31 +118,40 @@ public class ReadUtils {
/**
* This enum represents all the different ways in which a read can overlap an interval.
*
* NO_OVERLAP:
* NO_OVERLAP_CONTIG:
* read and interval are in different contigs.
*
* NO_OVERLAP_LEFT:
* the read does not overlap the interval.
*
* |----------------| (interval)
* <----------------> (read)
*
* NO_OVERLAP_RIGHT:
* the read does not overlap the interval.
*
* |----------------| (interval)
* <----------------> (read)
*
* LEFT_OVERLAP:
* OVERLAP_LEFT:
* the read starts before the beginning of the interval but ends inside of it
*
* |----------------| (interval)
* <----------------> (read)
*
* RIGHT_OVERLAP:
* OVERLAP_RIGHT:
* the read starts inside the interval but ends outside of it
*
* |----------------| (interval)
* <----------------> (read)
*
* FULL_OVERLAP:
* OVERLAP_LEFT_AND_RIGHT:
* the read starts before the interval and ends after the interval
*
* |-----------| (interval)
* <-------------------> (read)
*
* CONTAINED:
* OVERLAP_CONTAINED:
* the read starts and ends inside the interval
*
* |----------------| (interval)
@ -658,7 +667,7 @@ public class ReadUtils {
return ReadAndIntervalOverlap.OVERLAP_RIGHT;
}
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd()"})
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"})
public static int getRefCoordSoftUnclippedStart(SAMRecord read) {
int start = read.getUnclippedStart();
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
@ -670,9 +679,13 @@ public class ReadUtils {
return start;
}
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd()"})
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"})
public static int getRefCoordSoftUnclippedEnd(SAMRecord read) {
int stop = read.getUnclippedStart();
if (readIsEntirelyInsertion(read))
return stop;
int shift = 0;
CigarOperator lastOperator = null;
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
@ -686,6 +699,14 @@ public class ReadUtils {
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
}
private static boolean readIsEntirelyInsertion(SAMRecord read) {
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
if (cigarElement.getOperator() != CigarOperator.INSERTION)
return false;
}
return true;
}
/**
* Looks for a read coordinate that corresponds to the reference coordinate in the soft clipped region before
* the alignment start of the read.

View File

@ -293,17 +293,8 @@ public class Genotype {
return commonInfo.getAttribute(key, defaultValue);
}
public String getAttributeAsString(String key) { return commonInfo.getAttributeAsString(key); }
public String getAttributeAsString(String key, String defaultValue) { return commonInfo.getAttributeAsString(key, defaultValue); }
public int getAttributeAsInt(String key) { return commonInfo.getAttributeAsInt(key); }
public int getAttributeAsInt(String key, int defaultValue) { return commonInfo.getAttributeAsInt(key, defaultValue); }
public double getAttributeAsDouble(String key) { return commonInfo.getAttributeAsDouble(key); }
public double getAttributeAsDouble(String key, double defaultValue) { return commonInfo.getAttributeAsDouble(key, defaultValue); }
public boolean getAttributeAsBoolean(String key) { return commonInfo.getAttributeAsBoolean(key); }
public boolean getAttributeAsBoolean(String key, boolean defaultValue) { return commonInfo.getAttributeAsBoolean(key, defaultValue); }
public Integer getAttributeAsIntegerNoException(String key) { return commonInfo.getAttributeAsIntegerNoException(key); }
public Double getAttributeAsDoubleNoException(String key) { return commonInfo.getAttributeAsDoubleNoException(key); }
public String getAttributeAsStringNoException(String key) { return commonInfo.getAttributeAsStringNoException(key); }
public Boolean getAttributeAsBooleanNoException(String key) { return commonInfo.getAttributeAsBooleanNoException(key); }
}

View File

@ -1,6 +1,8 @@
package org.broadinstitute.sting.utils.variantcontext;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import java.util.*;
@ -204,27 +206,40 @@ public final class InferredGeneticContext {
return defaultValue;
}
// public AttributedObject getAttributes(Collection<Object> keys) {
// AttributedObject selected = new AttributedObject();
//
// for ( Object key : keys )
// selected.putAttribute(key, this.getAttribute(key));
//
// return selected;
// }
public String getAttributeAsString(String key, String defaultValue) {
Object x = getAttribute(key);
if ( x == null ) return defaultValue;
if ( x instanceof String ) return (String)x;
return String.valueOf(x); // throws an exception if this isn't a string
}
public String getAttributeAsString(String key) { return (String.valueOf(getAttribute(key))); } // **NOTE**: will turn a null Object into the String "null"
public int getAttributeAsInt(String key) { Object x = getAttribute(key); return x instanceof Integer ? (Integer)x : Integer.valueOf((String)x); }
public double getAttributeAsDouble(String key) { Object x = getAttribute(key); return x instanceof Double ? (Double)x : Double.valueOf((String)x); }
public boolean getAttributeAsBoolean(String key) { Object x = getAttribute(key); return x instanceof Boolean ? (Boolean)x : Boolean.valueOf((String)x); }
public int getAttributeAsInt(String key, int defaultValue) {
Object x = getAttribute(key);
if ( x == null || x == VCFConstants.MISSING_VALUE_v4 ) return defaultValue;
if ( x instanceof Integer ) return (Integer)x;
return Integer.valueOf((String)x); // throws an exception if this isn't a string
}
public String getAttributeAsString(String key, String defaultValue) { return (String)getAttribute(key, defaultValue); }
public int getAttributeAsInt(String key, int defaultValue) { return (Integer)getAttribute(key, defaultValue); }
public double getAttributeAsDouble(String key, double defaultValue) { return (Double)getAttribute(key, defaultValue); }
public boolean getAttributeAsBoolean(String key, boolean defaultValue){ return (Boolean)getAttribute(key, defaultValue); }
public double getAttributeAsDouble(String key, double defaultValue) {
Object x = getAttribute(key);
if ( x == null ) return defaultValue;
if ( x instanceof Double ) return (Double)x;
return Double.valueOf((String)x); // throws an exception if this isn't a string
}
public Integer getAttributeAsIntegerNoException(String key) { try {return getAttributeAsInt(key);} catch (Exception e) {return null;} }
public Double getAttributeAsDoubleNoException(String key) { try {return getAttributeAsDouble(key);} catch (Exception e) {return null;} }
public String getAttributeAsStringNoException(String key) { if (getAttribute(key) == null) return null; return getAttributeAsString(key); }
public Boolean getAttributeAsBooleanNoException(String key) { try {return getAttributeAsBoolean(key);} catch (Exception e) {return null;} }
public boolean getAttributeAsBoolean(String key, boolean defaultValue) {
Object x = getAttribute(key);
if ( x == null ) return defaultValue;
if ( x instanceof Boolean ) return (Boolean)x;
return Boolean.valueOf((String)x); // throws an exception if this isn't a string
}
// public String getAttributeAsString(String key) { return (String.valueOf(getAttribute(key))); } // **NOTE**: will turn a null Object into the String "null"
// public int getAttributeAsInt(String key) { Object x = getAttribute(key); return x instanceof Integer ? (Integer)x : Integer.valueOf((String)x); }
// public double getAttributeAsDouble(String key) { Object x = getAttribute(key); return x instanceof Double ? (Double)x : Double.valueOf((String)x); }
// public boolean getAttributeAsBoolean(String key) { Object x = getAttribute(key); return x instanceof Boolean ? (Boolean)x : Boolean.valueOf((String)x); }
// public Integer getAttributeAsIntegerNoException(String key) { try {return getAttributeAsInt(key);} catch (Exception e) {return null;} }
// public Double getAttributeAsDoubleNoException(String key) { try {return getAttributeAsDouble(key);} catch (Exception e) {return null;} }
// public String getAttributeAsStringNoException(String key) { if (getAttribute(key) == null) return null; return getAttributeAsString(key); }
// public Boolean getAttributeAsBooleanNoException(String key) { try {return getAttributeAsBoolean(key);} catch (Exception e) {return null;} }
}

View File

@ -666,21 +666,11 @@ public class VariantContext implements Feature { // to enable tribble intergrati
return commonInfo.getAttribute(key, defaultValue);
}
public String getAttributeAsString(String key) { return commonInfo.getAttributeAsString(key); }
public String getAttributeAsString(String key, String defaultValue) { return commonInfo.getAttributeAsString(key, defaultValue); }
public int getAttributeAsInt(String key) { return commonInfo.getAttributeAsInt(key); }
public int getAttributeAsInt(String key, int defaultValue) { return commonInfo.getAttributeAsInt(key, defaultValue); }
public double getAttributeAsDouble(String key) { return commonInfo.getAttributeAsDouble(key); }
public double getAttributeAsDouble(String key, double defaultValue) { return commonInfo.getAttributeAsDouble(key, defaultValue); }
public boolean getAttributeAsBoolean(String key) { return commonInfo.getAttributeAsBoolean(key); }
public boolean getAttributeAsBoolean(String key, boolean defaultValue) { return commonInfo.getAttributeAsBoolean(key, defaultValue); }
public Integer getAttributeAsIntegerNoException(String key) { return commonInfo.getAttributeAsIntegerNoException(key); }
public Double getAttributeAsDoubleNoException(String key) { return commonInfo.getAttributeAsDoubleNoException(key); }
public String getAttributeAsStringNoException(String key) { return commonInfo.getAttributeAsStringNoException(key); }
public Boolean getAttributeAsBooleanNoException(String key) { return commonInfo.getAttributeAsBooleanNoException(key); }
// ---------------------------------------------------------------------------------------------------------
//
// Working with alleles

View File

@ -316,19 +316,22 @@ public class VariantContextUtils {
return pruneVariantContext(vc, null);
}
public static VariantContext pruneVariantContext(VariantContext vc, Collection<String> keysToPreserve ) {
MutableVariantContext mvc = new MutableVariantContext(vc);
public static VariantContext pruneVariantContext(final VariantContext vc, final Collection<String> keysToPreserve ) {
final MutableVariantContext mvc = new MutableVariantContext(vc);
if ( keysToPreserve == null || keysToPreserve.size() == 0 )
mvc.clearAttributes();
else {
Map<String, Object> d = mvc.getAttributes();
final Map<String, Object> d = mvc.getAttributes();
mvc.clearAttributes();
for ( String key : keysToPreserve )
if ( d.containsKey(key) )
mvc.putAttribute(key, d.get(key));
}
// this must be done as the ID is stored in the attributes field
if ( vc.hasID() ) mvc.setID(vc.getID());
Collection<Genotype> gs = mvc.getGenotypes().values();
mvc.clearGenotypes();
for ( Genotype g : gs ) {
@ -443,34 +446,6 @@ public class VariantContextUtils {
throw new ReviewedStingException(String.format("Couldn't find master VCF %s at %s", masterName, unsortedVCs.iterator().next()));
}
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, byte refBase) {
return simpleMerge(genomeLocParser, unsortedVCs, null, FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, GenotypeMergeType.UNSORTED, false, false, refBase);
}
/**
* Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided.
* If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
* the sample name
*
* @param genomeLocParser loc parser
* @param unsortedVCs collection of unsorted VCs
* @param priorityListOfVCs priority list detailing the order in which we should grab the VCs
* @param filteredRecordMergeType merge type for filtered records
* @param genotypeMergeOptions merge option for genotypes
* @param annotateOrigin should we annotate the set it came from?
* @param printMessages should we print messages?
* @param inputRefBase the ref base
* @return new VariantContext
*/
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions,
boolean annotateOrigin, boolean printMessages, byte inputRefBase ) {
return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, "set", false, false);
}
/**
* Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided.
* If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
@ -486,12 +461,18 @@ public class VariantContextUtils {
* @param setKey the key name of the set
* @param filteredAreUncalled are filtered records uncalled?
* @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count?
* @return new VariantContext
* @return new VariantContext representing the merge of unsortedVCs
*/
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions,
boolean annotateOrigin, boolean printMessages, String setKey,
boolean filteredAreUncalled, boolean mergeInfoWithMaxAC ) {
public static VariantContext simpleMerge(final GenomeLocParser genomeLocParser,
final Collection<VariantContext> unsortedVCs,
final List<String> priorityListOfVCs,
final FilteredRecordMergeType filteredRecordMergeType,
final GenotypeMergeType genotypeMergeOptions,
final boolean annotateOrigin,
final boolean printMessages,
final String setKey,
final boolean filteredAreUncalled,
final boolean mergeInfoWithMaxAC ) {
if ( unsortedVCs == null || unsortedVCs.size() == 0 )
return null;
@ -514,26 +495,28 @@ public class VariantContextUtils {
return null;
// establish the baseline info from the first VC
VariantContext first = VCs.get(0);
String name = first.getSource();
GenomeLoc loc = getLocation(genomeLocParser,first);
final VariantContext first = VCs.get(0);
final String name = first.getSource();
final Allele refAllele = determineReferenceAllele(VCs);
Set<Allele> alleles = new TreeSet<Allele>();
Map<String, Genotype> genotypes = new TreeMap<String, Genotype>();
double negLog10PError = -1;
Set<String> filters = new TreeSet<String>();
Map<String, Object> attributes = new TreeMap<String, Object>();
Set<String> inconsistentAttributes = new HashSet<String>();
String rsID = null;
final Set<Allele> alleles = new TreeSet<Allele>();
final Set<String> filters = new TreeSet<String>();
final Map<String, Object> attributes = new TreeMap<String, Object>();
final Set<String> inconsistentAttributes = new HashSet<String>();
final Set<String> variantSources = new HashSet<String>(); // contains the set of sources we found in our set of VCs that are variant
final Set<String> rsIDs = new LinkedHashSet<String>(1); // most of the time there's one id
GenomeLoc loc = getLocation(genomeLocParser,first);
int depth = 0;
int maxAC = -1;
Map<String, Object> attributesWithMaxAC = new TreeMap<String, Object>();
final Map<String, Object> attributesWithMaxAC = new TreeMap<String, Object>();
double negLog10PError = -1;
VariantContext vcWithMaxAC = null;
Map<String, Genotype> genotypes = new TreeMap<String, Genotype>();
// counting the number of filtered and variant VCs
int nFiltered = 0, nVariant = 0;
int nFiltered = 0;
Allele refAllele = determineReferenceAllele(VCs);
boolean remapped = false;
// cycle through and add info from the other VCs, making sure the loc/reference matches
@ -546,7 +529,7 @@ public class VariantContextUtils {
loc = getLocation(genomeLocParser,vc); // get the longest location
nFiltered += vc.isFiltered() ? 1 : 0;
nVariant += vc.isVariant() ? 1 : 0;
if ( vc.isVariant() ) variantSources.add(vc.getSource());
AlleleMapper alleleMapping = resolveIncompatibleAlleles(refAllele, vc, alleles);
remapped = remapped || alleleMapping.needsRemapping();
@ -565,11 +548,10 @@ public class VariantContextUtils {
// special case DP (add it up) and ID (just preserve it)
//
if (vc.hasAttribute(VCFConstants.DEPTH_KEY))
depth += Integer.valueOf(vc.getAttributeAsString(VCFConstants.DEPTH_KEY));
if (rsID == null && vc.hasID())
rsID = vc.getID();
depth += vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0);
if ( vc.hasID() && ! vc.getID().equals(VCFConstants.EMPTY_ID_FIELD) ) rsIDs.add(vc.getID());
if (mergeInfoWithMaxAC && vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY)) {
String rawAlleleCounts = vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY);
String rawAlleleCounts = vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY, null);
// lets see if the string contains a , separator
if (rawAlleleCounts.contains(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR)) {
List<String> alleleCountArray = Arrays.asList(rawAlleleCounts.substring(1, rawAlleleCounts.length() - 1).split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR));
@ -627,17 +609,16 @@ public class VariantContextUtils {
if ( filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size() )
filters.clear();
// we care about where the call came from
if ( annotateOrigin ) {
if ( annotateOrigin ) { // we care about where the call came from
String setValue;
if ( nFiltered == 0 && nVariant == priorityListOfVCs.size() ) // nothing was unfiltered
if ( nFiltered == 0 && variantSources.size() == priorityListOfVCs.size() ) // nothing was unfiltered
setValue = "Intersection";
else if ( nFiltered == VCs.size() ) // everything was filtered out
setValue = "FilteredInAll";
else if ( nVariant == 0 ) // everyone was reference
else if ( variantSources.isEmpty() ) // everyone was reference
setValue = "ReferenceInAll";
else { // we are filtered in some subset
List<String> s = new ArrayList<String>();
else {
LinkedHashSet<String> s = new LinkedHashSet<String>();
for ( VariantContext vc : VCs )
if ( vc.isVariant() )
s.add( vc.isFiltered() ? "filterIn" + vc.getSource() : vc.getSource() );
@ -652,8 +633,10 @@ public class VariantContextUtils {
if ( depth > 0 )
attributes.put(VCFConstants.DEPTH_KEY, String.valueOf(depth));
if ( rsID != null )
attributes.put(VariantContext.ID_KEY, rsID);
if ( ! rsIDs.isEmpty() ) {
attributes.put(VariantContext.ID_KEY, Utils.join(",", rsIDs));
}
VariantContext merged = new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, negLog10PError, filters, (mergeInfoWithMaxAC ? attributesWithMaxAC : attributes) );
// Trim the padded bases of all alleles if necessary
@ -1159,9 +1142,7 @@ public class VariantContextUtils {
for (String orAttrib : MERGE_OR_ATTRIBS) {
boolean attribVal = false;
for (VariantContext vc : vcList) {
Boolean val = vc.getAttributeAsBooleanNoException(orAttrib);
if (val != null)
attribVal = (attribVal || val);
attribVal = vc.getAttributeAsBoolean(orAttrib, false);
if (attribVal) // already true, so no reason to continue:
break;
}
@ -1171,7 +1152,7 @@ public class VariantContextUtils {
// Merge ID fields:
String iDVal = null;
for (VariantContext vc : vcList) {
String val = vc.getAttributeAsStringNoException(VariantContext.ID_KEY);
String val = vc.getAttributeAsString(VariantContext.ID_KEY, null);
if (val != null && !val.equals(VCFConstants.EMPTY_ID_FIELD)) {
if (iDVal == null)
iDVal = val;
@ -1251,8 +1232,10 @@ public class VariantContextUtils {
public PhaseAndQuality(Genotype gt) {
this.isPhased = gt.isPhased();
if (this.isPhased)
this.PQ = gt.getAttributeAsDoubleNoException(ReadBackedPhasingWalker.PQ_KEY);
if (this.isPhased) {
this.PQ = gt.getAttributeAsDouble(ReadBackedPhasingWalker.PQ_KEY, -1);
if ( this.PQ == -1 ) this.PQ = null;
}
}
}

View File

@ -75,7 +75,7 @@ public class WalkerTest extends BaseTest {
Index indexFromOutputFile = IndexFactory.createIndex(resultFile, new VCFCodec());
Index dynamicIndex = IndexFactory.loadIndex(indexFile.getAbsolutePath());
if ( ! indexFromOutputFile.equalsIgnoreTimestamp(dynamicIndex) ) {
if ( ! indexFromOutputFile.equalsIgnoreProperties(dynamicIndex) ) {
Assert.fail(String.format("Index on disk from indexing on the fly not equal to the index created after the run completed. FileIndex %s vs. on-the-fly %s%n",
indexFromOutputFile.getProperties(),
dynamicIndex.getProperties()));

View File

@ -56,6 +56,7 @@ public class FeatureManagerUnitTest extends BaseTest {
private static final File VCF3_FILE = new File(validationDataLocation + "vcfexample3.vcf");
private static final File VCF4_FILE = new File(testDir + "HiSeq.10000.vcf");
private static final File VCF4_FILE_GZ = new File(testDir + "HiSeq.10000.vcf.gz");
private static final File VCF4_FILE_BGZIP = new File(testDir + "HiSeq.10000.bgzip.vcf.gz");
private FeatureManager manager;
private GenomeLocParser genomeLocParser;
@ -109,6 +110,7 @@ public class FeatureManagerUnitTest extends BaseTest {
new FMTest(VariantContext.class, VCF3Codec.class, "VCF3", VCF3_FILE);
new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE);
new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE_GZ);
new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE_BGZIP);
new FMTest(TableFeature.class, BedTableCodec.class, "bedtable", null);
return FMTest.getTests(FMTest.class);
}

View File

@ -29,7 +29,6 @@ import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.samtools.SAMSequenceDictionary;
import org.broad.tribble.Tribble;
import org.broad.tribble.index.Index;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
import org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -45,7 +44,6 @@ import org.testng.annotations.Test;
import java.io.*;
import java.nio.channels.FileChannel;
import java.util.Map;
/**
@ -164,7 +162,7 @@ public class RMDTrackBuilderUnitTest extends BaseTest {
try {
Index idx = builder.loadIndex(vcfFile, new VCFCodec());
// catch any exception; this call should pass correctly
SAMSequenceDictionary dict = RMDTrackBuilder.getSequenceDictionaryFromProperties(idx);
SAMSequenceDictionary dict = IndexDictionaryUtils.getSequenceDictionaryFromProperties(idx);
} catch (IOException e) {
e.printStackTrace();
Assert.fail("IO exception unexpected" + e.getMessage());

View File

@ -134,7 +134,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation +
"snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000",
1,
Arrays.asList("486fc6a5ca1819f5ab180d5d72b1ebc9")
Arrays.asList("ed9d1b37b0bd8b65ff9ce2688e0e102e")
);
executeTest("Testing SnpEff annotations", spec);
}

View File

@ -96,8 +96,8 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "89f55abea8f59e39d1effb908440548c"); }
@Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "4836086891f6cbdd40eebef3076d215a"); }
@Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "6a34b5d743efda8b2f3b639f3a2f5de8"); }
@Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "c6adeda751cb2a08690dd9202356629f"); }
@Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "3a08fd5ee18993dfc8882156ccf5d2e9"); }
@Test public void threeWayWithRefs() {
WalkerTestSpec spec = new WalkerTestSpec(
@ -131,4 +131,13 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
@Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "df96cb3beb2dbb5e02f80abec7d3571e"); }
@Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "f704caeaaaed6711943014b847fe381a"); }
@Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "f704caeaaaed6711943014b847fe381a"); }
@Test
public void combineDBSNPDuplicateSites() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T CombineVariants -NO_HEADER -L 1:902000-903000 -o %s -R " + b37KGReference + " -V:v1 " + b37dbSNP132,
1,
Arrays.asList(""));
executeTest("combineDBSNPDuplicateSites:", spec);
}
}

View File

@ -172,6 +172,17 @@ public class BAQUnitTest extends BaseTest {
}
}
@Test(enabled = true)
public void testBAQQualRange() {
BAQ baq = new BAQ(1e-3, 0.1, 7, (byte)4, false); // matches current samtools parameters
final byte ref = (byte)'A';
final byte alt = (byte)'A';
for ( int i = 0; i <= SAMUtils.MAX_PHRED_SCORE; i++ )
Assert.assertTrue(baq.calcEpsilon( ref, alt, (byte)i) >= 0.0, "Failed to get baq epsilon range");
}
public void testBAQ(BAQTest test, boolean lookupWithFasta) {
BAQ baqHMM = new BAQ(1e-3, 0.1, 7, (byte)4, false); // matches current samtools parameters

View File

@ -1,53 +1,43 @@
package org.broadinstitute.sting.utils.codecs.vcf;
import net.sf.samtools.SAMSequenceDictionary;
import org.broad.tribble.Tribble;
import org.broad.tribble.index.*;
import org.broad.tribble.iterators.CloseableTribbleIterator;
import org.broad.tribble.source.BasicFeatureSource;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.WalkerTest;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.testng.Assert;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.*;
/**
* tests out the various functions in the index factory class
*/
public class IndexFactoryUnitTest {
public class IndexFactoryUnitTest extends BaseTest {
File inputFile = new File("public/testdata/HiSeq.10000.vcf");
File outputFile = new File("public/testdata/onTheFlyOutputTest.vcf");
File outputFileIndex = Tribble.indexFile(outputFile);
/**
* test out scoring the indexes
*/
@Test
public void testScoreIndexes() {
/*// make a list of indexes to score
Map<Class,IndexCreator> creators = new HashMap<Class,IndexCreator>();
// add a linear index with the default bin size
LinearIndexCreator linearNormal = new LinearIndexCreator();
linearNormal.initialize(inputFile, linearNormal.defaultBinSize());
creators.add(LInearIndexlinearNormal);
private SAMSequenceDictionary dict;
// create a tree index with a small index size
IntervalIndexCreator treeSmallBin = new IntervalIndexCreator();
treeSmallBin.initialize(inputFile, Math.max(200,treeSmallBin.defaultBinSize()/10));
creators.add(treeSmallBin);
List<Index> indexes = new ArrayList<Index>();
for (IndexCreator creator : creators)
indexes.add(creator.finalizeIndex(0));
ArrayList<Double> scores = IndexFactory.scoreIndexes(0.5,indexes,100, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
System.err.println("scores are : ");
for (Double score : scores) {
System.err.println(score);
*/
@BeforeTest
public void setup() {
try {
dict = new CachingIndexedFastaSequenceFile(new File(b37KGReference)).getSequenceDictionary();
}
catch(FileNotFoundException ex) {
throw new UserException.CouldNotReadInputFile(b37KGReference,ex);
}
}
//
@ -65,7 +55,7 @@ public class IndexFactoryUnitTest {
BasicFeatureSource<VariantContext> source = new BasicFeatureSource<VariantContext>(inputFile.getAbsolutePath(), indexFromInputFile, new VCFCodec());
int counter = 0;
VCFWriter writer = new StandardVCFWriter(outputFile);
VCFWriter writer = new StandardVCFWriter(outputFile, dict);
writer.writeHeader((VCFHeader)source.getHeader());
CloseableTribbleIterator<VariantContext> it = source.iterator();
while (it.hasNext() && (counter++ < maxRecords || maxRecords == -1) ) {

View File

@ -38,12 +38,13 @@ public class VCFWriterUnitTest extends BaseTest {
private Set<String> additionalColumns = new HashSet<String>();
private File fakeVCFFile = new File("FAKEVCFFILEFORTESTING.vcf");
private GenomeLocParser genomeLocParser;
private IndexedFastaSequenceFile seq;
@BeforeClass
public void beforeTests() {
File referenceFile = new File(hg18Reference);
try {
IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(referenceFile);
seq = new CachingIndexedFastaSequenceFile(referenceFile);
genomeLocParser = new GenomeLocParser(seq);
}
catch(FileNotFoundException ex) {
@ -55,7 +56,7 @@ public class VCFWriterUnitTest extends BaseTest {
@Test
public void testBasicWriteAndRead() {
VCFHeader header = createFakeHeader(metaData,additionalColumns);
VCFWriter writer = new StandardVCFWriter(fakeVCFFile);
VCFWriter writer = new StandardVCFWriter(fakeVCFFile, seq.getSequenceDictionary());
writer.writeHeader(header);
writer.add(createVC(header));
writer.add(createVC(header));

View File

@ -131,11 +131,11 @@ class GATKResourcesBundle extends QScript {
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf",
"hapmap_3.3", b37, true, true))
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/AFR+EUR+ASN+1KG.dindel_august_release_merged_pilot1.20110126.sites.vcf",
"1000G_indels_for_realignment", b37, true, false))
addResource(new Resource("/humgen/1kg/processing/official_release/phase1/ALL.wgs.VQSR_consensus_biallelic.20101123.indels.sites.vcf",
"1000G_biallelic.indels", b37, true, false))
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/Mills_Devine_Indels_2011/ALL.wgs.indels_mills_devine_hg19_leftAligned_collapsed_double_hit.sites.vcf",
"indels_mills_devine", b37, true, true))
"Mills_Devine_2hit.indels", b37, true, true))
//
// example call set for wiki tutorial
@ -300,9 +300,9 @@ class GATKResourcesBundle extends QScript {
bamFile = bamIn
}
class IndexVCF(@Input vcf: File, @Input ref: File) extends CountRod with UNIVERSAL_GATK_ARGS {
class IndexVCF(@Input vcf: File, @Input ref: File) extends CountRODs with UNIVERSAL_GATK_ARGS {
//@Output val vcfIndex: File = swapExt(vcf.getParent, vcf, ".vcf", ".vcf.idx")
this.rodBind :+= RodBind(vcf.getName, "VCF", vcf)
this.rod :+= vcf
this.reference_sequence = ref
}
@ -313,7 +313,7 @@ class GATKResourcesBundle extends QScript {
}
class MakeDBSNP129(@Input dbsnp: File, @Input ref: File, @Output dbsnp129: File) extends SelectVariants with UNIVERSAL_GATK_ARGS {
this.rodBind :+= RodBind("variant", "VCF", dbsnp)
this.variant = dbsnp
this.select ++= List("\"dbSNPBuildID <= 129\"")
this.reference_sequence = ref
this.out = dbsnp129

View File

@ -98,40 +98,52 @@ class MethodsDevelopmentCallingPipeline extends QScript {
// BUGBUG: We no longer support b36/hg18 because several of the necessary files aren't available aligned to those references
val targetDataSets: Map[String, Target] = Map(
"HiSeq" -> new Target("NA12878.HiSeq", hg18, dbSNP_hg18_129, hapmap_hg18,
"NA12878_wgs_b37" -> new Target("NA12878.HiSeq.WGS.b37", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.bam"),
new File("/humgen/gsa-hpprojects/dev/carneiro/hiseq19/analysis/snps/NA12878.HiSeq19.filtered.vcf"),
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 2.14, 99.0, !lowPass, !exome, 1),
"NA12878_wgs_decoy" -> new Target("NA12878.HiSeq.WGS.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37_decoy.NA12878.clean.dedup.recal.bam"),
new File("/humgen/gsa-hpprojects/dev/carneiro/hiseq19/analysis/snps/NA12878.HiSeq19.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 2.14, 99.0, !lowPass, !exome, 1),
"NA12878_wgs_hg18" -> new Target("NA12878.HiSeq.WGS.hg18", hg18, dbSNP_hg18_129, hapmap_hg18,
"/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/HiSeq.WGS.cleaned.indels.10.mask",
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam"),
new File("/home/radon01/depristo/work/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/HiSeq.WGS.cleaned.ug.snpfiltered.indelfiltered.vcf"),
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg18.intervals", 2.14, 99.0, !lowPass, !exome, 1),
"HiSeq19" -> new Target("NA12878.HiSeq19", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.bam"),
new File("/humgen/gsa-hpprojects/dev/carneiro/hiseq19/analysis/snps/NA12878.HiSeq19.filtered.vcf"),
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 2.14, 99.0, !lowPass, !exome, 1),
"GA2hg19" -> new Target("NA12878.GA2.hg19", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.GA2.WGS.bwa.cleaned.hg19.bam"),
new File("/humgen/gsa-hpprojects/dev/carneiro/hiseq19/analysis/snps/NA12878.GA2.hg19.filtered.vcf"),
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 2.14, 99.0, !lowPass, !exome, 1),
"WEx" -> new Target("NA12878.WEx", hg18, dbSNP_hg18_129, hapmap_hg18,
"NA12878_wex_b37" -> new Target("NA12878.HiSeq.WEx.b37", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/seq/picard_aggregation/C339/NA12878/v3/NA12878.bam"),
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
"/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 3.3, 98.0, !lowPass, exome, 1),
"NA12878_wex_hg18" -> new Target("NA12878.HiSeq.WEx.hg18", hg18, dbSNP_hg18_129, hapmap_hg18,
"/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/GA2.WEx.cleaned.indels.10.mask",
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.WEx.cleaned.recal.bam"),
new File("/home/radon01/depristo/work/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.vcf"),
"/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.targets.interval_list", 3.3, 98.0, !lowPass, exome, 1),
"WExTrio" -> new Target("CEUTrio.WEx", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
"NA12878_wex_decoy" -> new Target("NA12878.HiSeq.WEx.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WEx.b37_decoy.NA12878.clean.dedup.recal.bam"),
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
"/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 3.3, 98.0, !lowPass, exome, 1),
"CEUTrio_wex_b37" -> new Target("CEUTrio.HiSeq.WEx.b37", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WEx.bwa.cleaned.recal.bam"),
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"),
"/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 3.3, 98.0, !lowPass, exome, 3),
"WGSTrio" -> new Target("CEUTrio.WGS", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
"CEUTrio_wgs_b37" -> new Target("CEUTrio.HiSeq.WGS.b37", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.bwa.cleaned.recal.bam"),
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 2.3, 99.0, !lowPass, !exome, 3),
"WExTrioDecoy" -> new Target("CEUTrio.HiSeq.WEx.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37,
"CEUTrio_wex_decoy" -> new Target("CEUTrio.HiSeq.WEx.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WEx.b37_decoy.list"),
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
"/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 3.3, 98.0, !lowPass, exome, 3),
"WGSTrioDecoy" -> new Target("CEUTrio.HiSeq.WGS.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37,
"CEUTrio_wgs_decoy" -> new Target("CEUTrio.HiSeq.WGS.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37_decoy.list"),
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 2.3, 99.0, !lowPass, !exome, 3),
"GA2hg19" -> new Target("NA12878.GA2.hg19", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.GA2.WGS.bwa.cleaned.hg19.bam"),
new File("/humgen/gsa-hpprojects/dev/carneiro/hiseq19/analysis/snps/NA12878.GA2.hg19.filtered.vcf"),
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 2.14, 99.0, !lowPass, !exome, 1),
"FIN" -> new Target("FIN", b37, dbSNP_b37, hapmap_b37, indelMask_b37,
new File("/humgen/1kg/processing/pipeline_test_bams/FIN.79sample.Nov2010.chr20.bam"),
new File("/humgen/gsa-hpprojects/dev/data/AugChr20Calls_v4_3state/ALL.august.v4.chr20.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **

Binary file not shown.

View File

@ -1,3 +1,3 @@
<ivy-module version="1.0">
<info organisation="org.broad" module="tribble" revision="24" status="integration" />
<info organisation="org.broad" module="tribble" revision="25" status="integration" />
</ivy-module>