Merge branch 'master' of ssh://chartl@tin.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
ef05827c7b
|
|
@ -163,6 +163,14 @@
|
||||||
<!-- Remove old versions of ivy jars AFTER the ivy:retrieve has been class loaded. -->
|
<!-- Remove old versions of ivy jars AFTER the ivy:retrieve has been class loaded. -->
|
||||||
<delete file="${ivy.jar.dir}/ivy-2.0.0.jar"/>
|
<delete file="${ivy.jar.dir}/ivy-2.0.0.jar"/>
|
||||||
<delete file="${ivy.jar.dir}/ivy-2.2.0-rc1.jar"/>
|
<delete file="${ivy.jar.dir}/ivy-2.2.0-rc1.jar"/>
|
||||||
|
<!--
|
||||||
|
An old versions of the ivy-1.4.1.xml does not contain /ivy-module/configuration/conf/@name="compile".
|
||||||
|
Easier to upgrade to 1.4.4 than try to deal with xmlproperty and conditional deletion in ant.
|
||||||
|
Just in case we remove explicit 1.4.4 and go back to 1.4.1, try to clean out the file for now.
|
||||||
|
-->
|
||||||
|
<delete file="${ivy.home}/cache/javax.mail/mail/ivy-1.4.1.xml"/>
|
||||||
|
<delete file="${ivy.home}/cache/javax.mail/mail/ivydata-1.4.1.properties"/>
|
||||||
|
<delete file="${ivy.home}/cache/javax.mail/mail/jars/mail-1.4.1.jar"/>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="init.buildall">
|
<target name="init.buildall">
|
||||||
|
|
|
||||||
6
ivy.xml
6
ivy.xml
|
|
@ -15,10 +15,8 @@
|
||||||
<!-- Tribble -->
|
<!-- Tribble -->
|
||||||
<dependency org="org.broad" name="tribble" rev="latest.integration"/>
|
<dependency org="org.broad" name="tribble" rev="latest.integration"/>
|
||||||
|
|
||||||
<dependency org="log4j" name="log4j" rev="1.2.15">
|
<dependency org="log4j" name="log4j" rev="1.2.15"/>
|
||||||
<!-- Don't include javax.mail here in default, only used in scala->default by commons-email -->
|
<dependency org="javax.mail" name="mail" rev="1.4.4"/>
|
||||||
<exclude org="javax.mail" />
|
|
||||||
</dependency>
|
|
||||||
<dependency org="colt" name="colt" rev="1.2.0"/>
|
<dependency org="colt" name="colt" rev="1.2.0"/>
|
||||||
<dependency org="jboss" name="javassist" rev="3.7.ga"/>
|
<dependency org="jboss" name="javassist" rev="3.7.ga"/>
|
||||||
<dependency org="org.simpleframework" name="simple-xml" rev="2.0.4"/>
|
<dependency org="org.simpleframework" name="simple-xml" rev="2.0.4"/>
|
||||||
|
|
|
||||||
|
|
@ -114,7 +114,7 @@ public class AnalyzeCovariates extends CommandLineProgram {
|
||||||
private String RECAL_FILE = "output.recal_data.csv";
|
private String RECAL_FILE = "output.recal_data.csv";
|
||||||
@Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
|
@Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
|
||||||
private String OUTPUT_DIR = "analyzeCovariates/";
|
private String OUTPUT_DIR = "analyzeCovariates/";
|
||||||
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required = false)
|
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required = false)
|
||||||
private String PATH_TO_RSCRIPT = "Rscript";
|
private String PATH_TO_RSCRIPT = "Rscript";
|
||||||
@Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false)
|
@Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false)
|
||||||
private String PATH_TO_RESOURCES = "public/R/";
|
private String PATH_TO_RESOURCES = "public/R/";
|
||||||
|
|
|
||||||
|
|
@ -929,6 +929,14 @@ public class GenomeAnalysisEngine {
|
||||||
return readsDataSource.getHeader(reader);
|
return readsDataSource.getHeader(reader);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the master sequence dictionary for this GATK engine instance
|
||||||
|
* @return a never-null dictionary listing all of the contigs known to this engine instance
|
||||||
|
*/
|
||||||
|
public SAMSequenceDictionary getMasterSequenceDictionary() {
|
||||||
|
return getReferenceDataSource().getReference().getSequenceDictionary();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns data source object encapsulating all essential info and handlers used to traverse
|
* Returns data source object encapsulating all essential info and handlers used to traverse
|
||||||
* reads; header merger, individual file readers etc can be accessed through the returned data source object.
|
* reads; header merger, individual file readers etc can be accessed through the returned data source object.
|
||||||
|
|
|
||||||
|
|
@ -46,7 +46,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
|
||||||
else if ( stub.getOutputStream() != null ) {
|
else if ( stub.getOutputStream() != null ) {
|
||||||
this.file = null;
|
this.file = null;
|
||||||
this.stream = stub.getOutputStream();
|
this.stream = stub.getOutputStream();
|
||||||
writer = new StandardVCFWriter(stream, stub.doNotWriteGenotypes());
|
writer = new StandardVCFWriter(stream, stub.getMasterSequenceDictionary(), stub.doNotWriteGenotypes());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
throw new ReviewedStingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");
|
throw new ReviewedStingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");
|
||||||
|
|
@ -71,7 +71,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
// The GATK/Tribble can't currently index block-compressed files on the fly. Disable OTF indexing even if the user explicitly asked for it.
|
// The GATK/Tribble can't currently index block-compressed files on the fly. Disable OTF indexing even if the user explicitly asked for it.
|
||||||
return new StandardVCFWriter(file, this.stream, indexOnTheFly && !stub.isCompressed(), stub.doNotWriteGenotypes());
|
return new StandardVCFWriter(file, this.stream, stub.getMasterSequenceDictionary(), indexOnTheFly && !stub.isCompressed(), stub.doNotWriteGenotypes());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.io.stubs;
|
package org.broadinstitute.sting.gatk.io.stubs;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
import org.broadinstitute.sting.gatk.CommandLineExecutable;
|
import org.broadinstitute.sting.gatk.CommandLineExecutable;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
|
|
@ -150,6 +151,15 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
|
||||||
return isCompressed;
|
return isCompressed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the master sequence dictionary from the engine associated with this stub
|
||||||
|
* @link GenomeAnalysisEngine.getMasterSequenceDictionary
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public SAMSequenceDictionary getMasterSequenceDictionary() {
|
||||||
|
return engine.getMasterSequenceDictionary();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Should we tell the VCF writer not to write genotypes?
|
* Should we tell the VCF writer not to write genotypes?
|
||||||
* @return true if the writer should not write genotypes.
|
* @return true if the writer should not write genotypes.
|
||||||
|
|
|
||||||
|
|
@ -101,7 +101,7 @@ public class RMDIndexer extends CommandLineProgram {
|
||||||
Index index = IndexFactory.createIndex(inputFileSource, codec, approach);
|
Index index = IndexFactory.createIndex(inputFileSource, codec, approach);
|
||||||
|
|
||||||
// add writing of the sequence dictionary, if supplied
|
// add writing of the sequence dictionary, if supplied
|
||||||
builder.setIndexSequenceDictionary(inputFileSource, index, ref.getSequenceDictionary(), indexFile, false);
|
builder.validateAndUpdateIndexSequenceDictionary(inputFileSource, index, ref.getSequenceDictionary());
|
||||||
|
|
||||||
// create the output stream, and write the index
|
// create the output stream, and write the index
|
||||||
LittleEndianOutputStream stream = new LittleEndianOutputStream(new FileOutputStream(indexFile));
|
LittleEndianOutputStream stream = new LittleEndianOutputStream(new FileOutputStream(indexFile));
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,106 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.refdata.tracks;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broad.tribble.index.Index;
|
||||||
|
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||||
|
import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
|
||||||
|
|
||||||
|
import java.util.LinkedHashSet;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.TreeSet;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utilities for working with Sequence Dictionaries embedded in tribble indices
|
||||||
|
*
|
||||||
|
* @author Your Name
|
||||||
|
* @since Date created
|
||||||
|
*/
|
||||||
|
public class IndexDictionaryUtils {
|
||||||
|
private final static Logger logger = Logger.getLogger(IndexDictionaryUtils.class);
|
||||||
|
|
||||||
|
// a constant we use for marking sequence dictionary entries in the Tribble index property list
|
||||||
|
public static final String SequenceDictionaryPropertyPredicate = "DICT:";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the sequence dictionary from the track, if available. If not, make it from the contig list that is always in the index
|
||||||
|
* @param index the index file to use
|
||||||
|
* @return a SAMSequenceDictionary if available, null if unavailable
|
||||||
|
*/
|
||||||
|
public static SAMSequenceDictionary getSequenceDictionaryFromProperties(Index index) {
|
||||||
|
SAMSequenceDictionary dict = new SAMSequenceDictionary();
|
||||||
|
for (Map.Entry<String,String> entry : index.getProperties().entrySet()) {
|
||||||
|
if (entry.getKey().startsWith(SequenceDictionaryPropertyPredicate))
|
||||||
|
dict.addSequence(new SAMSequenceRecord(entry.getKey().substring(SequenceDictionaryPropertyPredicate.length() , entry.getKey().length()),
|
||||||
|
Integer.valueOf(entry.getValue())));
|
||||||
|
}
|
||||||
|
return dict;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* create the sequence dictionary with the contig list; a backup approach
|
||||||
|
* @param index the index file to use
|
||||||
|
* @param dict the sequence dictionary to add contigs to
|
||||||
|
* @return the filled-in sequence dictionary
|
||||||
|
*/
|
||||||
|
static SAMSequenceDictionary createSequenceDictionaryFromContigList(Index index, SAMSequenceDictionary dict) {
|
||||||
|
LinkedHashSet<String> seqNames = index.getSequenceNames();
|
||||||
|
if (seqNames == null) {
|
||||||
|
return dict;
|
||||||
|
}
|
||||||
|
for (String name : seqNames) {
|
||||||
|
SAMSequenceRecord seq = new SAMSequenceRecord(name, 0);
|
||||||
|
dict.addSequence(seq);
|
||||||
|
}
|
||||||
|
return dict;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void setIndexSequenceDictionary(Index index, SAMSequenceDictionary dict) {
|
||||||
|
for ( SAMSequenceRecord seq : dict.getSequences() ) {
|
||||||
|
final String contig = IndexDictionaryUtils.SequenceDictionaryPropertyPredicate + seq.getSequenceName();
|
||||||
|
final String length = String.valueOf(seq.getSequenceLength());
|
||||||
|
index.addProperty(contig,length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void validateTrackSequenceDictionary(final String trackName,
|
||||||
|
final SAMSequenceDictionary trackDict,
|
||||||
|
final SAMSequenceDictionary referenceDict,
|
||||||
|
final ValidationExclusion.TYPE validationExclusionType ) {
|
||||||
|
// if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation
|
||||||
|
if (trackDict == null || trackDict.size() == 0)
|
||||||
|
logger.info("Track " + trackName + " doesn't have a sequence dictionary built in, skipping dictionary validation");
|
||||||
|
else {
|
||||||
|
Set<String> trackSequences = new TreeSet<String>();
|
||||||
|
for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
|
||||||
|
trackSequences.add(dictionaryEntry.getSequenceName());
|
||||||
|
SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, trackName, trackDict, "reference", referenceDict);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -25,7 +25,6 @@
|
||||||
package org.broadinstitute.sting.gatk.refdata.tracks;
|
package org.broadinstitute.sting.gatk.refdata.tracks;
|
||||||
|
|
||||||
import net.sf.samtools.SAMSequenceDictionary;
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broad.tribble.FeatureCodec;
|
import org.broad.tribble.FeatureCodec;
|
||||||
import org.broad.tribble.FeatureSource;
|
import org.broad.tribble.FeatureSource;
|
||||||
|
|
@ -41,7 +40,6 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
|
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
|
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
@ -52,11 +50,6 @@ import org.broadinstitute.sting.utils.instrumentation.Sizeof;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileOutputStream;
|
import java.io.FileOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.LinkedHashSet;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.TreeSet;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -76,9 +69,6 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
||||||
private final static Logger logger = Logger.getLogger(RMDTrackBuilder.class);
|
private final static Logger logger = Logger.getLogger(RMDTrackBuilder.class);
|
||||||
public final static boolean MEASURE_TRIBBLE_QUERY_PERFORMANCE = false;
|
public final static boolean MEASURE_TRIBBLE_QUERY_PERFORMANCE = false;
|
||||||
|
|
||||||
// a constant we use for marking sequence dictionary entries in the Tribble index property list
|
|
||||||
public static final String SequenceDictionaryPropertyPredicate = "DICT:";
|
|
||||||
|
|
||||||
// private sequence dictionary we use to set our tracks with
|
// private sequence dictionary we use to set our tracks with
|
||||||
private SAMSequenceDictionary dict = null;
|
private SAMSequenceDictionary dict = null;
|
||||||
|
|
||||||
|
|
@ -210,13 +200,19 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
||||||
try { logger.info(String.format(" Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); }
|
try { logger.info(String.format(" Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); }
|
||||||
catch (ReviewedStingException e) { }
|
catch (ReviewedStingException e) { }
|
||||||
|
|
||||||
sequenceDictionary = getSequenceDictionaryFromProperties(index);
|
sequenceDictionary = IndexDictionaryUtils.getSequenceDictionaryFromProperties(index);
|
||||||
|
|
||||||
// if we don't have a dictionary in the Tribble file, and we've set a dictionary for this builder, set it in the file if they match
|
// if we don't have a dictionary in the Tribble file, and we've set a dictionary for this builder, set it in the file if they match
|
||||||
if (sequenceDictionary.size() == 0 && dict != null) {
|
if (sequenceDictionary.size() == 0 && dict != null) {
|
||||||
File indexFile = Tribble.indexFile(inputFile);
|
File indexFile = Tribble.indexFile(inputFile);
|
||||||
setIndexSequenceDictionary(inputFile,index,dict,indexFile,true);
|
validateAndUpdateIndexSequenceDictionary(inputFile, index, dict);
|
||||||
sequenceDictionary = getSequenceDictionaryFromProperties(index);
|
try { // re-write the index
|
||||||
|
writeIndexToDisk(index,indexFile,new FSLockWithShared(indexFile));
|
||||||
|
} catch (IOException e) {
|
||||||
|
logger.warn("Unable to update index with the sequence dictionary for file " + indexFile + "; this will not effect your run of the GATK");
|
||||||
|
}
|
||||||
|
|
||||||
|
sequenceDictionary = IndexDictionaryUtils.getSequenceDictionaryFromProperties(index);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( MEASURE_TRIBBLE_QUERY_PERFORMANCE )
|
if ( MEASURE_TRIBBLE_QUERY_PERFORMANCE )
|
||||||
|
|
@ -363,88 +359,31 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
||||||
// this can take a while, let them know what we're doing
|
// this can take a while, let them know what we're doing
|
||||||
logger.info("Creating Tribble index in memory for file " + inputFile);
|
logger.info("Creating Tribble index in memory for file " + inputFile);
|
||||||
Index idx = IndexFactory.createIndex(inputFile, codec, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
|
Index idx = IndexFactory.createIndex(inputFile, codec, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
|
||||||
setIndexSequenceDictionary(inputFile, idx, dict, null, false);
|
validateAndUpdateIndexSequenceDictionary(inputFile, idx, dict);
|
||||||
return idx;
|
return idx;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------------------------------------
|
|
||||||
// static functions to work with the sequence dictionaries of indexes
|
|
||||||
// ---------------------------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
/**
|
|
||||||
* get the sequence dictionary from the track, if available. If not, make it from the contig list that is always in the index
|
|
||||||
* @param index the index file to use
|
|
||||||
* @return a SAMSequenceDictionary if available, null if unavailable
|
|
||||||
*/
|
|
||||||
public static SAMSequenceDictionary getSequenceDictionaryFromProperties(Index index) {
|
|
||||||
SAMSequenceDictionary dict = new SAMSequenceDictionary();
|
|
||||||
for (Map.Entry<String,String> entry : index.getProperties().entrySet()) {
|
|
||||||
if (entry.getKey().startsWith(SequenceDictionaryPropertyPredicate))
|
|
||||||
dict.addSequence(new SAMSequenceRecord(entry.getKey().substring(SequenceDictionaryPropertyPredicate.length() , entry.getKey().length()),
|
|
||||||
Integer.valueOf(entry.getValue())));
|
|
||||||
}
|
|
||||||
return dict;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* create the sequence dictionary with the contig list; a backup approach
|
|
||||||
* @param index the index file to use
|
|
||||||
* @param dict the sequence dictionary to add contigs to
|
|
||||||
* @return the filled-in sequence dictionary
|
|
||||||
*/
|
|
||||||
private static SAMSequenceDictionary createSequenceDictionaryFromContigList(Index index, SAMSequenceDictionary dict) {
|
|
||||||
LinkedHashSet<String> seqNames = index.getSequenceNames();
|
|
||||||
if (seqNames == null) {
|
|
||||||
return dict;
|
|
||||||
}
|
|
||||||
for (String name : seqNames) {
|
|
||||||
SAMSequenceRecord seq = new SAMSequenceRecord(name, 0);
|
|
||||||
dict.addSequence(seq);
|
|
||||||
}
|
|
||||||
return dict;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* set the sequence dictionary of the track. This function checks that the contig listing of the underlying file is compatible.
|
* set the sequence dictionary of the track. This function checks that the contig listing of the underlying file is compatible.
|
||||||
* (that each contig in the index is in the sequence dictionary).
|
* (that each contig in the index is in the sequence dictionary).
|
||||||
* @param inputFile for proper error message formatting.
|
* @param inputFile for proper error message formatting.
|
||||||
* @param dict the sequence dictionary
|
* @param dict the sequence dictionary
|
||||||
* @param index the index file
|
* @param index the index file
|
||||||
* @param indexFile the index file
|
|
||||||
* @param rewriteIndex should we rewrite the index when we're done?
|
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
public void setIndexSequenceDictionary(File inputFile, Index index, SAMSequenceDictionary dict, File indexFile, boolean rewriteIndex) {
|
public void validateAndUpdateIndexSequenceDictionary(final File inputFile, final Index index, final SAMSequenceDictionary dict) {
|
||||||
if (dict == null) return;
|
if (dict == null) throw new ReviewedStingException("BUG: dict cannot be null");
|
||||||
|
|
||||||
SAMSequenceDictionary currentDict = createSequenceDictionaryFromContigList(index, new SAMSequenceDictionary());
|
|
||||||
validateTrackSequenceDictionary(inputFile.getAbsolutePath(),currentDict,dict);
|
|
||||||
|
|
||||||
// check that every contig in the RMD contig list is at least in the sequence dictionary we're being asked to set
|
// check that every contig in the RMD contig list is at least in the sequence dictionary we're being asked to set
|
||||||
for (SAMSequenceRecord seq : currentDict.getSequences()) {
|
final SAMSequenceDictionary currentDict = IndexDictionaryUtils.createSequenceDictionaryFromContigList(index, new SAMSequenceDictionary());
|
||||||
if (dict.getSequence(seq.getSequenceName()) == null)
|
validateTrackSequenceDictionary(inputFile.getAbsolutePath(), currentDict, dict);
|
||||||
continue;
|
|
||||||
index.addProperty(SequenceDictionaryPropertyPredicate + dict.getSequence(seq.getSequenceName()).getSequenceName(), String.valueOf(dict.getSequence(seq.getSequenceName()).getSequenceLength()));
|
// actually update the dictionary in the index
|
||||||
}
|
IndexDictionaryUtils.setIndexSequenceDictionary(index, dict);
|
||||||
// re-write the index
|
|
||||||
if (rewriteIndex) try {
|
|
||||||
writeIndexToDisk(index,indexFile,new FSLockWithShared(indexFile));
|
|
||||||
} catch (IOException e) {
|
|
||||||
logger.warn("Unable to update index with the sequence dictionary for file " + indexFile + "; this will not effect your run of the GATK");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void validateTrackSequenceDictionary(final String trackName,
|
||||||
public void validateTrackSequenceDictionary(String trackName, SAMSequenceDictionary trackDict, SAMSequenceDictionary referenceDict) {
|
final SAMSequenceDictionary trackDict,
|
||||||
// if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation
|
final SAMSequenceDictionary referenceDict ) {
|
||||||
if (trackDict == null || trackDict.size() == 0)
|
IndexDictionaryUtils.validateTrackSequenceDictionary(trackName, trackDict, referenceDict, validationExclusionType);
|
||||||
logger.info("Track " + trackName + " doesn't have a sequence dictionary built in, skipping dictionary validation");
|
|
||||||
else {
|
|
||||||
Set<String> trackSequences = new TreeSet<String>();
|
|
||||||
for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
|
|
||||||
trackSequences.add(dictionaryEntry.getSequenceName());
|
|
||||||
SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, trackName, trackDict, "reference", referenceDict);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -358,7 +358,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
|
||||||
public void printOnTraversalDone() {
|
public void printOnTraversalDone() {
|
||||||
printProgress(null, null, true);
|
printProgress(null, null, true);
|
||||||
|
|
||||||
final double elapsed = timer.getElapsedTime();
|
final double elapsed = timer == null ? 0 : timer.getElapsedTime();
|
||||||
|
|
||||||
ReadMetrics cumulativeMetrics = engine.getCumulativeMetrics();
|
ReadMetrics cumulativeMetrics = engine.getCumulativeMetrics();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers;
|
package org.broadinstitute.sting.gatk.walkers;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
|
|
@ -77,6 +78,15 @@ public abstract class Walker<MapType, ReduceType> {
|
||||||
return toolkit;
|
return toolkit;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the master sequence dictionary for this walker
|
||||||
|
* @link GenomeAnalysisEngine.getMasterSequenceDictionary
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
protected SAMSequenceDictionary getMasterSequenceDictionary() {
|
||||||
|
return getToolkit().getMasterSequenceDictionary();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* (conceptual static) method that states whether you want to see reads piling up at a locus
|
* (conceptual static) method that states whether you want to see reads piling up at a locus
|
||||||
* that contain a deletion at the locus.
|
* that contain a deletion at the locus.
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,7 @@ public class SBByDepth extends AnnotationByDepth {
|
||||||
if (!vc.hasAttribute(VCFConstants.STRAND_BIAS_KEY))
|
if (!vc.hasAttribute(VCFConstants.STRAND_BIAS_KEY))
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
double sBias = Double.valueOf(vc.getAttributeAsString(VCFConstants.STRAND_BIAS_KEY));
|
double sBias = vc.getAttributeAsDouble(VCFConstants.STRAND_BIAS_KEY, -1);
|
||||||
|
|
||||||
final Map<String, Genotype> genotypes = vc.getGenotypes();
|
final Map<String, Genotype> genotypes = vc.getGenotypes();
|
||||||
if ( genotypes == null || genotypes.size() == 0 )
|
if ( genotypes == null || genotypes.size() == 0 )
|
||||||
|
|
|
||||||
|
|
@ -58,6 +58,13 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
|
||||||
// lacking a SnpEff version number in the VCF header:
|
// lacking a SnpEff version number in the VCF header:
|
||||||
public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.2" };
|
public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.2" };
|
||||||
public static final String SNPEFF_VCF_HEADER_VERSION_LINE_KEY = "SnpEffVersion";
|
public static final String SNPEFF_VCF_HEADER_VERSION_LINE_KEY = "SnpEffVersion";
|
||||||
|
public static final String SNPEFF_VCF_HEADER_COMMAND_LINE_KEY = "SnpEffCmd";
|
||||||
|
|
||||||
|
// When we write the SnpEff version number and command line to the output VCF, we change
|
||||||
|
// the key name slightly so that the output VCF won't be confused in the future for an
|
||||||
|
// output file produced by SnpEff directly:
|
||||||
|
public static final String OUTPUT_VCF_HEADER_VERSION_LINE_KEY = "Original" + SNPEFF_VCF_HEADER_VERSION_LINE_KEY;
|
||||||
|
public static final String OUTPUT_VCF_HEADER_COMMAND_LINE_KEY = "Original" + SNPEFF_VCF_HEADER_COMMAND_LINE_KEY;
|
||||||
|
|
||||||
// SnpEff aggregates all effects (and effect metadata) together into a single INFO
|
// SnpEff aggregates all effects (and effect metadata) together into a single INFO
|
||||||
// field annotation with the key EFF:
|
// field annotation with the key EFF:
|
||||||
|
|
@ -165,10 +172,26 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
|
||||||
UNKNOWN
|
UNKNOWN
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) {
|
||||||
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) {
|
// Make sure that we actually have a valid SnpEff rod binding (just in case the user specified -A SnpEff
|
||||||
|
// without providing a SnpEff rod via --snpEffFile):
|
||||||
validateRodBinding(walker.getSnpEffRodBinding());
|
validateRodBinding(walker.getSnpEffRodBinding());
|
||||||
checkSnpEffVersion(walker, toolkit);
|
RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
|
||||||
|
|
||||||
|
// Make sure that the SnpEff version number and command-line header lines are present in the VCF header of
|
||||||
|
// the SnpEff rod, and that the file was generated by a supported version of SnpEff:
|
||||||
|
VCFHeader snpEffVCFHeader = VCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName())).get(snpEffRodBinding.getName());
|
||||||
|
VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY);
|
||||||
|
VCFHeaderLine snpEffCommandLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_COMMAND_LINE_KEY);
|
||||||
|
|
||||||
|
checkSnpEffVersion(snpEffVersionLine);
|
||||||
|
checkSnpEffCommandLine(snpEffCommandLine);
|
||||||
|
|
||||||
|
// If everything looks ok, add the SnpEff version number and command-line header lines to the
|
||||||
|
// header of the VCF output file, changing the key names so that our output file won't be
|
||||||
|
// mistaken in the future for a SnpEff output file:
|
||||||
|
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_VERSION_LINE_KEY, snpEffVersionLine.getValue()));
|
||||||
|
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue()));
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<String, Object> annotate ( RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc ) {
|
public Map<String, Object> annotate ( RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc ) {
|
||||||
|
|
@ -204,12 +227,7 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void checkSnpEffVersion ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) {
|
private void checkSnpEffVersion ( VCFHeaderLine snpEffVersionLine ) {
|
||||||
RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
|
|
||||||
|
|
||||||
VCFHeader snpEffVCFHeader = VCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName())).get(snpEffRodBinding.getName());
|
|
||||||
VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY);
|
|
||||||
|
|
||||||
if ( snpEffVersionLine == null || snpEffVersionLine.getValue() == null || snpEffVersionLine.getValue().trim().length() == 0 ) {
|
if ( snpEffVersionLine == null || snpEffVersionLine.getValue() == null || snpEffVersionLine.getValue().trim().length() == 0 ) {
|
||||||
throw new UserException("Could not find a " + SNPEFF_VCF_HEADER_VERSION_LINE_KEY + " entry in the VCF header for the SnpEff " +
|
throw new UserException("Could not find a " + SNPEFF_VCF_HEADER_VERSION_LINE_KEY + " entry in the VCF header for the SnpEff " +
|
||||||
"input file, and so could not verify that the file was generated by a supported version of SnpEff (" +
|
"input file, and so could not verify that the file was generated by a supported version of SnpEff (" +
|
||||||
|
|
@ -224,6 +242,14 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void checkSnpEffCommandLine ( VCFHeaderLine snpEffCommandLine ) {
|
||||||
|
if ( snpEffCommandLine == null || snpEffCommandLine.getValue() == null || snpEffCommandLine.getValue().trim().length() == 0 ) {
|
||||||
|
throw new UserException("Could not find a " + SNPEFF_VCF_HEADER_COMMAND_LINE_KEY + " entry in the VCF header for the SnpEff " +
|
||||||
|
"input file, which should be added by all supported versions of SnpEff (" +
|
||||||
|
Arrays.toString(SUPPORTED_SNPEFF_VERSIONS) + ")");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private boolean isSupportedSnpEffVersion ( String versionString ) {
|
private boolean isSupportedSnpEffVersion ( String versionString ) {
|
||||||
for ( String supportedVersion : SUPPORTED_SNPEFF_VERSIONS ) {
|
for ( String supportedVersion : SUPPORTED_SNPEFF_VERSIONS ) {
|
||||||
if ( supportedVersion.equals(versionString) ) {
|
if ( supportedVersion.equals(versionString) ) {
|
||||||
|
|
@ -248,10 +274,13 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
|
||||||
List<SnpEffEffect> parsedEffects = new ArrayList<SnpEffEffect>();
|
List<SnpEffEffect> parsedEffects = new ArrayList<SnpEffEffect>();
|
||||||
|
|
||||||
Object effectFieldValue = snpEffRecord.getAttribute(SNPEFF_INFO_FIELD_KEY);
|
Object effectFieldValue = snpEffRecord.getAttribute(SNPEFF_INFO_FIELD_KEY);
|
||||||
List<String> individualEffects;
|
if ( effectFieldValue == null ) {
|
||||||
|
return parsedEffects;
|
||||||
|
}
|
||||||
|
|
||||||
// The VCF codec stores multi-valued fields as a List<String>, and single-valued fields as a String.
|
// The VCF codec stores multi-valued fields as a List<String>, and single-valued fields as a String.
|
||||||
// We can have either in the case of SnpEff, since there may be one or more than one effect in this record.
|
// We can have either in the case of SnpEff, since there may be one or more than one effect in this record.
|
||||||
|
List<String> individualEffects;
|
||||||
if ( effectFieldValue instanceof List ) {
|
if ( effectFieldValue instanceof List ) {
|
||||||
individualEffects = (List<String>)effectFieldValue;
|
individualEffects = (List<String>)effectFieldValue;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -214,8 +214,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
||||||
engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, this, getToolkit());
|
engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, this, getToolkit());
|
||||||
engine.initializeExpressions(expressionsToUse);
|
engine.initializeExpressions(expressionsToUse);
|
||||||
|
|
||||||
engine.invokeAnnotationInitializationMethods();
|
|
||||||
|
|
||||||
// setup the header fields
|
// setup the header fields
|
||||||
// note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones
|
// note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones
|
||||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||||
|
|
@ -225,6 +223,8 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
||||||
hInfo.add(line);
|
hInfo.add(line);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
engine.invokeAnnotationInitializationMethods(hInfo);
|
||||||
|
|
||||||
VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
|
VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
|
||||||
vcfWriter.writeHeader(vcfHeader);
|
vcfWriter.writeHeader(vcfHeader);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -114,13 +114,13 @@ public class VariantAnnotatorEngine {
|
||||||
dbAnnotations.put(rod, rod.getName());
|
dbAnnotations.put(rod, rod.getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void invokeAnnotationInitializationMethods() {
|
public void invokeAnnotationInitializationMethods( Set<VCFHeaderLine> headerLines ) {
|
||||||
for ( VariantAnnotatorAnnotation annotation : requestedInfoAnnotations ) {
|
for ( VariantAnnotatorAnnotation annotation : requestedInfoAnnotations ) {
|
||||||
annotation.initialize(walker, toolkit);
|
annotation.initialize(walker, toolkit, headerLines);
|
||||||
}
|
}
|
||||||
|
|
||||||
for ( VariantAnnotatorAnnotation annotation : requestedGenotypeAnnotations ) {
|
for ( VariantAnnotatorAnnotation annotation : requestedGenotypeAnnotations ) {
|
||||||
annotation.initialize(walker, toolkit);
|
annotation.initialize(walker, toolkit, headerLines);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,9 +25,11 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
|
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
||||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
@DocumentedGATKFeature(enable = true, groupName = "VariantAnnotator annotations", summary = "VariantAnnotator annotations")
|
@DocumentedGATKFeature(enable = true, groupName = "VariantAnnotator annotations", summary = "VariantAnnotator annotations")
|
||||||
public abstract class VariantAnnotatorAnnotation {
|
public abstract class VariantAnnotatorAnnotation {
|
||||||
|
|
@ -35,5 +37,5 @@ public abstract class VariantAnnotatorAnnotation {
|
||||||
public abstract List<String> getKeyNames();
|
public abstract List<String> getKeyNames();
|
||||||
|
|
||||||
// initialization method (optional for subclasses, and therefore non-abstract)
|
// initialization method (optional for subclasses, and therefore non-abstract)
|
||||||
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) { }
|
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) { }
|
||||||
}
|
}
|
||||||
|
|
@ -63,9 +63,12 @@ import java.util.*;
|
||||||
* <h2>Input</h2>
|
* <h2>Input</h2>
|
||||||
* <p>
|
* <p>
|
||||||
* One or more bam files (with proper headers) to be analyzed for coverage statistics
|
* One or more bam files (with proper headers) to be analyzed for coverage statistics
|
||||||
* (Optional) A REFSEQ Rod to aggregate coverage to the gene level
|
|
||||||
* </p>
|
* </p>
|
||||||
*
|
* <p>
|
||||||
|
*(Optional) A REFSEQ Rod to aggregate coverage to the gene level
|
||||||
|
* <p>
|
||||||
|
* (for information about creating the REFSEQ Rod, please consult the RefSeqCodec documentation)
|
||||||
|
*</p></p>
|
||||||
* <h2>Output</h2>
|
* <h2>Output</h2>
|
||||||
* <p>
|
* <p>
|
||||||
* Tables pertaining to different coverage summaries. Suffix on the table files declares the contents:
|
* Tables pertaining to different coverage summaries. Suffix on the table files declares the contents:
|
||||||
|
|
@ -93,7 +96,7 @@ import java.util.*;
|
||||||
* <pre>
|
* <pre>
|
||||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||||
* -R ref.fasta \
|
* -R ref.fasta \
|
||||||
* -T VariantEval \
|
* -T DepthOfCoverage \
|
||||||
* -o file_name_base \
|
* -o file_name_base \
|
||||||
* -I input_bams.list
|
* -I input_bams.list
|
||||||
* [-geneList refSeq.sorted.txt] \
|
* [-geneList refSeq.sorted.txt] \
|
||||||
|
|
|
||||||
|
|
@ -276,8 +276,11 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
|
||||||
if ( elt.isReducedRead() ) {
|
if ( elt.isReducedRead() ) {
|
||||||
// reduced read representation
|
// reduced read representation
|
||||||
byte qual = elt.getReducedQual();
|
byte qual = elt.getReducedQual();
|
||||||
|
if ( BaseUtils.isRegularBase( elt.getBase() )) {
|
||||||
add(obsBase, qual, (byte)0, (byte)0, elt.getReducedCount()); // fast calculation of n identical likelihoods
|
add(obsBase, qual, (byte)0, (byte)0, elt.getReducedCount()); // fast calculation of n identical likelihoods
|
||||||
return elt.getReducedCount(); // we added nObs bases here
|
return elt.getReducedCount(); // we added nObs bases here
|
||||||
|
} else // odd bases or deletions => don't use them
|
||||||
|
return 0;
|
||||||
} else {
|
} else {
|
||||||
byte qual = qualToUse(elt, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual);
|
byte qual = qualToUse(elt, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual);
|
||||||
return qual > 0 ? add(obsBase, qual, (byte)0, (byte)0, 1) : 0;
|
return qual > 0 ? add(obsBase, qual, (byte)0, (byte)0, 1) : 0;
|
||||||
|
|
|
||||||
|
|
@ -73,7 +73,7 @@ public class HaplotypeIndelErrorModel {
|
||||||
baseMatchArray = new double[MAX_CACHED_QUAL+1];
|
baseMatchArray = new double[MAX_CACHED_QUAL+1];
|
||||||
baseMismatchArray = new double[MAX_CACHED_QUAL+1];
|
baseMismatchArray = new double[MAX_CACHED_QUAL+1];
|
||||||
for (int k=1; k <= MAX_CACHED_QUAL; k++) {
|
for (int k=1; k <= MAX_CACHED_QUAL; k++) {
|
||||||
double baseProb = QualityUtils.qualToProb(k);
|
double baseProb = QualityUtils.qualToProb((byte)k);
|
||||||
|
|
||||||
|
|
||||||
baseMatchArray[k] = probToQual(baseProb);
|
baseMatchArray[k] = probToQual(baseProb);
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,7 @@ public class PhasingRead extends BaseArray {
|
||||||
public PhasingRead(int length, int mappingQual) {
|
public PhasingRead(int length, int mappingQual) {
|
||||||
super(length);
|
super(length);
|
||||||
|
|
||||||
this.mappingProb = new PreciseNonNegativeDouble(QualityUtils.qualToProb(mappingQual));
|
this.mappingProb = new PreciseNonNegativeDouble(QualityUtils.qualToProb((byte)mappingQual));
|
||||||
|
|
||||||
this.baseProbs = new PreciseNonNegativeDouble[length];
|
this.baseProbs = new PreciseNonNegativeDouble[length];
|
||||||
Arrays.fill(this.baseProbs, null);
|
Arrays.fill(this.baseProbs, null);
|
||||||
|
|
|
||||||
|
|
@ -44,12 +44,12 @@ public class RefSeqDataParser {
|
||||||
String nameKeyToUseMultiplePrefix = nameKeyToUse + "_";
|
String nameKeyToUseMultiplePrefix = nameKeyToUse + "_";
|
||||||
|
|
||||||
Map<String, String> entriesToNames = new HashMap<String, String>();
|
Map<String, String> entriesToNames = new HashMap<String, String>();
|
||||||
Integer numRecords = vc.getAttributeAsIntegerNoException(NUM_RECORDS_KEY);
|
int numRecords = vc.getAttributeAsInt(NUM_RECORDS_KEY, -1);
|
||||||
if (numRecords != null) {
|
if (numRecords != -1) {
|
||||||
boolean done = false;
|
boolean done = false;
|
||||||
|
|
||||||
if (numRecords == 1) { // Check if perhaps the single record doesn't end with "_1":
|
if (numRecords == 1) { // Check if perhaps the single record doesn't end with "_1":
|
||||||
String name = vc.getAttributeAsStringNoException(nameKeyToUse);
|
String name = vc.getAttributeAsString(nameKeyToUse, null);
|
||||||
if (name != null) {
|
if (name != null) {
|
||||||
entriesToNames.put(nameKeyToUse, name);
|
entriesToNames.put(nameKeyToUse, name);
|
||||||
done = true;
|
done = true;
|
||||||
|
|
@ -59,14 +59,14 @@ public class RefSeqDataParser {
|
||||||
if (!done) {
|
if (!done) {
|
||||||
for (int i = 1; i <= numRecords; i++) {
|
for (int i = 1; i <= numRecords; i++) {
|
||||||
String key = nameKeyToUseMultiplePrefix + i;
|
String key = nameKeyToUseMultiplePrefix + i;
|
||||||
String name = vc.getAttributeAsStringNoException(key);
|
String name = vc.getAttributeAsString(key, null);
|
||||||
if (name != null)
|
if (name != null)
|
||||||
entriesToNames.put(key, name);
|
entriesToNames.put(key, name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else { // no entry with the # of records:
|
else { // no entry with the # of records:
|
||||||
String name = vc.getAttributeAsStringNoException(nameKeyToUse);
|
String name = vc.getAttributeAsString(nameKeyToUse, null);
|
||||||
if (name != null) {
|
if (name != null) {
|
||||||
entriesToNames.put(nameKeyToUse, name);
|
entriesToNames.put(nameKeyToUse, name);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -110,12 +110,12 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
|
||||||
case SNP:
|
case SNP:
|
||||||
nVariantLoci++;
|
nVariantLoci++;
|
||||||
nSNPs++;
|
nSNPs++;
|
||||||
if (vc1.getAttributeAsBoolean("ISSINGLETON")) nSingletons++;
|
if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++;
|
||||||
break;
|
break;
|
||||||
case MNP:
|
case MNP:
|
||||||
nVariantLoci++;
|
nVariantLoci++;
|
||||||
nMNPs++;
|
nMNPs++;
|
||||||
if (vc1.getAttributeAsBoolean("ISSINGLETON")) nSingletons++;
|
if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++;
|
||||||
break;
|
break;
|
||||||
case INDEL:
|
case INDEL:
|
||||||
nVariantLoci++;
|
nVariantLoci++;
|
||||||
|
|
@ -137,7 +137,7 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
|
||||||
|
|
||||||
String refStr = vc1.getReference().getBaseString().toUpperCase();
|
String refStr = vc1.getReference().getBaseString().toUpperCase();
|
||||||
|
|
||||||
String aaStr = vc1.hasAttribute("ANCESTRALALLELE") ? vc1.getAttributeAsString("ANCESTRALALLELE").toUpperCase() : null;
|
String aaStr = vc1.hasAttribute("ANCESTRALALLELE") ? vc1.getAttributeAsString("ANCESTRALALLELE", null).toUpperCase() : null;
|
||||||
// if (aaStr.equals(".")) {
|
// if (aaStr.equals(".")) {
|
||||||
// aaStr = refStr;
|
// aaStr = refStr;
|
||||||
// }
|
// }
|
||||||
|
|
|
||||||
|
|
@ -219,7 +219,8 @@ public class GenotypePhasingEvaluator extends VariantEvaluator {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Double getPQ(Genotype gt) {
|
public static Double getPQ(Genotype gt) {
|
||||||
return gt.getAttributeAsDoubleNoException(ReadBackedPhasingWalker.PQ_KEY);
|
Double d = gt.getAttributeAsDouble(ReadBackedPhasingWalker.PQ_KEY, -1);
|
||||||
|
return d == -1 ? null : d;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean topMatchesTop(AllelePair b1, AllelePair b2) {
|
public static boolean topMatchesTop(AllelePair b1, AllelePair b2) {
|
||||||
|
|
|
||||||
|
|
@ -120,7 +120,7 @@ public class SimpleMetricsByAC extends VariantEvaluator implements StandardEval
|
||||||
if ( eval.hasGenotypes() )
|
if ( eval.hasGenotypes() )
|
||||||
ac = eval.getChromosomeCount(eval.getAlternateAllele(0));
|
ac = eval.getChromosomeCount(eval.getAlternateAllele(0));
|
||||||
else if ( eval.hasAttribute("AC") ) {
|
else if ( eval.hasAttribute("AC") ) {
|
||||||
ac = Integer.valueOf(eval.getAttributeAsString("AC"));
|
ac = eval.getAttributeAsInt("AC", -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( ac != -1 ) {
|
if ( ac != -1 ) {
|
||||||
|
|
|
||||||
|
|
@ -49,18 +49,14 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
|
||||||
else nTv++;
|
else nTv++;
|
||||||
}
|
}
|
||||||
|
|
||||||
String refStr = vc.getReference().getBaseString().toUpperCase();
|
if (vc.hasAttribute("ANCESTRALALLELE")) {
|
||||||
String aaStr = vc.getAttributeAsString("ANCESTRALALLELE").toUpperCase();
|
final String aaStr = vc.getAttributeAsString("ANCESTRALALLELE", "null").toUpperCase();
|
||||||
|
if ( ! aaStr.equals(".") ) {
|
||||||
if (aaStr != null && !aaStr.equalsIgnoreCase("null") && !aaStr.equals(".")) {
|
switch ( BaseUtils.SNPSubstitutionType(aaStr.getBytes()[0], vc.getAlternateAllele(0).getBases()[0] ) ) {
|
||||||
BaseUtils.BaseSubstitutionType aaSubType = BaseUtils.SNPSubstitutionType(aaStr.getBytes()[0], vc.getAlternateAllele(0).getBases()[0]);
|
case TRANSITION: nTiDerived++; break;
|
||||||
|
case TRANSVERSION: nTvDerived++; break;
|
||||||
//System.out.println(refStr + " " + vc.getAttributeAsString("ANCESTRALALLELE").toUpperCase() + " " + aaSubType);
|
default: break;
|
||||||
|
}
|
||||||
if (aaSubType == BaseUtils.BaseSubstitutionType.TRANSITION) {
|
|
||||||
nTiDerived++;
|
|
||||||
} else if (aaSubType == BaseUtils.BaseSubstitutionType.TRANSVERSION) {
|
|
||||||
nTvDerived++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -131,7 +131,7 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
|
||||||
//// System.out.printf(" ac = %d%n", ac);
|
//// System.out.printf(" ac = %d%n", ac);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY);
|
ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY, 0);
|
||||||
return ac > 0 ? SiteStatus.POLY : SiteStatus.MONO;
|
return ac > 0 ? SiteStatus.POLY : SiteStatus.MONO;
|
||||||
} else {
|
} else {
|
||||||
return TREAT_ALL_SITES_IN_EVAL_VCF_AS_CALLED ? SiteStatus.POLY : SiteStatus.NO_CALL; // we can't figure out what to do
|
return TREAT_ALL_SITES_IN_EVAL_VCF_AS_CALLED ? SiteStatus.POLY : SiteStatus.NO_CALL; // we can't figure out what to do
|
||||||
|
|
|
||||||
|
|
@ -44,7 +44,7 @@ public class AlleleCount extends VariantStratifier {
|
||||||
if (eval != null) {
|
if (eval != null) {
|
||||||
int AC = -1;
|
int AC = -1;
|
||||||
if ( eval.hasAttribute("AC") && eval.getAttribute("AC") instanceof Integer ) {
|
if ( eval.hasAttribute("AC") && eval.getAttribute("AC") instanceof Integer ) {
|
||||||
AC = eval.getAttributeAsInt("AC");
|
AC = eval.getAttributeAsInt("AC", 0);
|
||||||
} else if ( eval.isVariant() ) {
|
} else if ( eval.isVariant() ) {
|
||||||
for (Allele allele : eval.getAlternateAlleles())
|
for (Allele allele : eval.getAlternateAlleles())
|
||||||
AC = Math.max(AC, eval.getChromosomeCount(allele));
|
AC = Math.max(AC, eval.getChromosomeCount(allele));
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,7 @@ public class AlleleFrequency extends VariantStratifier {
|
||||||
|
|
||||||
if (eval != null) {
|
if (eval != null) {
|
||||||
try {
|
try {
|
||||||
relevantStates.add(String.format("%.3f", (5.0 * MathUtils.round(eval.getAttributeAsDouble("AF") / 5.0, 3))));
|
relevantStates.add(String.format("%.3f", (5.0 * MathUtils.round(eval.getAttributeAsDouble("AF", 0.0) / 5.0, 3))));
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
return relevantStates;
|
return relevantStates;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -90,8 +90,8 @@ public class Degeneracy extends VariantStratifier {
|
||||||
Integer frame = null;
|
Integer frame = null;
|
||||||
|
|
||||||
if (eval.hasAttribute("refseq.functionalClass")) {
|
if (eval.hasAttribute("refseq.functionalClass")) {
|
||||||
aa = eval.getAttributeAsString("refseq.variantAA");
|
aa = eval.getAttributeAsString("refseq.variantAA", null);
|
||||||
frame = eval.getAttributeAsInt("refseq.frame");
|
frame = eval.getAttributeAsInt("refseq.frame", 0);
|
||||||
} else if (eval.hasAttribute("refseq.functionalClass_1")) {
|
} else if (eval.hasAttribute("refseq.functionalClass_1")) {
|
||||||
int annotationId = 1;
|
int annotationId = 1;
|
||||||
String key;
|
String key;
|
||||||
|
|
@ -99,7 +99,7 @@ public class Degeneracy extends VariantStratifier {
|
||||||
do {
|
do {
|
||||||
key = String.format("refseq.functionalClass_%d", annotationId);
|
key = String.format("refseq.functionalClass_%d", annotationId);
|
||||||
|
|
||||||
String newtype = eval.getAttributeAsString(key);
|
String newtype = eval.getAttributeAsString(key, null);
|
||||||
|
|
||||||
if ( newtype != null &&
|
if ( newtype != null &&
|
||||||
( type == null ||
|
( type == null ||
|
||||||
|
|
@ -109,13 +109,13 @@ public class Degeneracy extends VariantStratifier {
|
||||||
type = newtype;
|
type = newtype;
|
||||||
|
|
||||||
String aakey = String.format("refseq.variantAA_%d", annotationId);
|
String aakey = String.format("refseq.variantAA_%d", annotationId);
|
||||||
aa = eval.getAttributeAsString(aakey);
|
aa = eval.getAttributeAsString(aakey, null);
|
||||||
|
|
||||||
if (aa != null) {
|
if (aa != null) {
|
||||||
String framekey = String.format("refseq.frame_%d", annotationId);
|
String framekey = String.format("refseq.frame_%d", annotationId);
|
||||||
|
|
||||||
if (eval.hasAttribute(framekey)) {
|
if (eval.hasAttribute(framekey)) {
|
||||||
frame = eval.getAttributeAsInt(framekey);
|
frame = eval.getAttributeAsInt(framekey, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -38,7 +38,7 @@ public class FunctionalClass extends VariantStratifier {
|
||||||
|
|
||||||
if (eval.hasAttribute("refseq.functionalClass")) {
|
if (eval.hasAttribute("refseq.functionalClass")) {
|
||||||
try {
|
try {
|
||||||
type = FunctionalType.valueOf(eval.getAttributeAsString("refseq.functionalClass"));
|
type = FunctionalType.valueOf(eval.getAttributeAsString("refseq.functionalClass", null));
|
||||||
} catch ( Exception e ) {} // don't error out if the type isn't supported
|
} catch ( Exception e ) {} // don't error out if the type isn't supported
|
||||||
} else if (eval.hasAttribute("refseq.functionalClass_1")) {
|
} else if (eval.hasAttribute("refseq.functionalClass_1")) {
|
||||||
int annotationId = 1;
|
int annotationId = 1;
|
||||||
|
|
@ -47,7 +47,7 @@ public class FunctionalClass extends VariantStratifier {
|
||||||
do {
|
do {
|
||||||
key = String.format("refseq.functionalClass_%d", annotationId);
|
key = String.format("refseq.functionalClass_%d", annotationId);
|
||||||
|
|
||||||
String newtypeStr = eval.getAttributeAsString(key);
|
String newtypeStr = eval.getAttributeAsString(key, null);
|
||||||
if ( newtypeStr != null && !newtypeStr.equalsIgnoreCase("null") ) {
|
if ( newtypeStr != null && !newtypeStr.equalsIgnoreCase("null") ) {
|
||||||
try {
|
try {
|
||||||
FunctionalType newType = FunctionalType.valueOf(newtypeStr);
|
FunctionalType newType = FunctionalType.valueOf(newtypeStr);
|
||||||
|
|
|
||||||
|
|
@ -115,7 +115,7 @@ public class VQSRCalibrationCurve {
|
||||||
if ( vc.isFiltered() )
|
if ( vc.isFiltered() )
|
||||||
return 0.0;
|
return 0.0;
|
||||||
else if ( vc.hasAttribute(VQSRQualKey) ) {
|
else if ( vc.hasAttribute(VQSRQualKey) ) {
|
||||||
double qual = vc.getAttributeAsDouble(VQSRQualKey);
|
double qual = vc.getAttributeAsDouble(VQSRQualKey, 0.0);
|
||||||
return probTrueVariant(qual);
|
return probTrueVariant(qual);
|
||||||
} else {
|
} else {
|
||||||
throw new UserException.VariantContextMissingRequiredField(VQSRQualKey, vc);
|
throw new UserException.VariantContextMissingRequiredField(VQSRQualKey, vc);
|
||||||
|
|
@ -143,7 +143,7 @@ public class VQSRCalibrationCurve {
|
||||||
for ( int i = 0; i < log10Likelihoods.length; i++) {
|
for ( int i = 0; i < log10Likelihoods.length; i++) {
|
||||||
double p = Math.pow(10, log10Likelihoods[i]);
|
double p = Math.pow(10, log10Likelihoods[i]);
|
||||||
double q = alpha * p + (1-alpha) * noInfoPr;
|
double q = alpha * p + (1-alpha) * noInfoPr;
|
||||||
if ( DEBUG ) System.out.printf(" vqslod = %.2f, p = %.2e, alpha = %.2e, q = %.2e%n", vc.getAttributeAsDouble(VQSRQualKey), p, alpha, q);
|
if ( DEBUG ) System.out.printf(" vqslod = %.2f, p = %.2e, alpha = %.2e, q = %.2e%n", vc.getAttributeAsDouble(VQSRQualKey, 0.0), p, alpha, q);
|
||||||
updated[i] = Math.log10(q);
|
updated[i] = Math.log10(q);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -155,7 +155,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
||||||
private double[] TS_TRANCHES = new double[] {100.0, 99.9, 99.0, 90.0};
|
private double[] TS_TRANCHES = new double[] {100.0, 99.9, 99.0, 90.0};
|
||||||
@Argument(fullName="ignore_filter", shortName="ignoreFilter", doc="If specified the variant recalibrator will use variants even if the specified filter name is marked in the input VCF file", required=false)
|
@Argument(fullName="ignore_filter", shortName="ignoreFilter", doc="If specified the variant recalibrator will use variants even if the specified filter name is marked in the input VCF file", required=false)
|
||||||
private String[] IGNORE_INPUT_FILTERS = null;
|
private String[] IGNORE_INPUT_FILTERS = null;
|
||||||
@Argument(fullName="path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required=false)
|
@Argument(fullName="path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required=false)
|
||||||
private String PATH_TO_RSCRIPT = "Rscript";
|
private String PATH_TO_RSCRIPT = "Rscript";
|
||||||
@Argument(fullName="rscript_file", shortName="rscriptFile", doc="The output rscript file generated by the VQSR to aid in visualization of the input data and learned model", required=false)
|
@Argument(fullName="rscript_file", shortName="rscriptFile", doc="The output rscript file generated by the VQSR to aid in visualization of the input data and learned model", required=false)
|
||||||
private String RSCRIPT_FILE = null;
|
private String RSCRIPT_FILE = null;
|
||||||
|
|
|
||||||
|
|
@ -99,7 +99,7 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
|
||||||
|
|
||||||
|
|
||||||
final VCFHeader vcfHeader = new VCFHeader(metaData, samples);
|
final VCFHeader vcfHeader = new VCFHeader(metaData, samples);
|
||||||
writer = new StandardVCFWriter(file, false);
|
writer = new StandardVCFWriter(file, getMasterSequenceDictionary(), false);
|
||||||
writer.writeHeader(vcfHeader);
|
writer.writeHeader(vcfHeader);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -75,7 +75,7 @@ public class RandomlySplitVariants extends RodWalker<Integer, Integer> {
|
||||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames));
|
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames));
|
||||||
|
|
||||||
vcfWriter1.writeHeader(new VCFHeader(hInfo, samples));
|
vcfWriter1.writeHeader(new VCFHeader(hInfo, samples));
|
||||||
vcfWriter2 = new StandardVCFWriter(file2, true);
|
vcfWriter2 = new StandardVCFWriter(file2, getMasterSequenceDictionary(), true);
|
||||||
vcfWriter2.writeHeader(new VCFHeader(hInfo, samples));
|
vcfWriter2.writeHeader(new VCFHeader(hInfo, samples));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -574,7 +574,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
|
||||||
// ok we have a comp VC and we need to match the AF spectrum of inputAFRodName.
|
// ok we have a comp VC and we need to match the AF spectrum of inputAFRodName.
|
||||||
// We then pick a variant with probablity AF*desiredFraction
|
// We then pick a variant with probablity AF*desiredFraction
|
||||||
if ( sub.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) ) {
|
if ( sub.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) ) {
|
||||||
String afo = sub.getAttributeAsString(VCFConstants.ALLELE_FREQUENCY_KEY);
|
String afo = sub.getAttributeAsString(VCFConstants.ALLELE_FREQUENCY_KEY, null);
|
||||||
|
|
||||||
double af;
|
double af;
|
||||||
double afBoost = 1.0;
|
double afBoost = 1.0;
|
||||||
|
|
|
||||||
|
|
@ -192,7 +192,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {
|
||||||
if ( getters.containsKey(field) ) {
|
if ( getters.containsKey(field) ) {
|
||||||
val = getters.get(field).get(vc);
|
val = getters.get(field).get(vc);
|
||||||
} else if ( vc.hasAttribute(field) ) {
|
} else if ( vc.hasAttribute(field) ) {
|
||||||
val = vc.getAttributeAsString(field);
|
val = vc.getAttributeAsString(field, null);
|
||||||
} else if ( isWildCard(field) ) {
|
} else if ( isWildCard(field) ) {
|
||||||
Set<String> wildVals = new HashSet<String>();
|
Set<String> wildVals = new HashSet<String>();
|
||||||
for ( Map.Entry<String,Object> elt : vc.getAttributes().entrySet()) {
|
for ( Map.Entry<String,Object> elt : vc.getAttributes().entrySet()) {
|
||||||
|
|
|
||||||
|
|
@ -9,14 +9,17 @@ import net.sf.samtools.SAMUtils;
|
||||||
* @author Kiran Garimella
|
* @author Kiran Garimella
|
||||||
*/
|
*/
|
||||||
public class QualityUtils {
|
public class QualityUtils {
|
||||||
|
|
||||||
public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE;
|
public final static byte MAX_QUAL_SCORE = SAMUtils.MAX_PHRED_SCORE;
|
||||||
public final static double MIN_REASONABLE_ERROR = 0.0001;
|
public final static double MIN_REASONABLE_ERROR = 0.0001;
|
||||||
public final static byte MAX_REASONABLE_Q_SCORE = 40;
|
public final static byte MAX_REASONABLE_Q_SCORE = 40;
|
||||||
public final static byte MIN_USABLE_Q_SCORE = 6;
|
public final static byte MIN_USABLE_Q_SCORE = 6;
|
||||||
|
|
||||||
public final static int MAPPING_QUALITY_UNAVAILABLE = 255;
|
public final static int MAPPING_QUALITY_UNAVAILABLE = 255;
|
||||||
|
|
||||||
|
private static double qualToErrorProbCache[] = new double[256];
|
||||||
|
static {
|
||||||
|
for (int i = 0; i < 256; i++) qualToErrorProbCache[i] = qualToErrorProbRaw((byte)i);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Private constructor. No instantiating this class!
|
* Private constructor. No instantiating this class!
|
||||||
*/
|
*/
|
||||||
|
|
@ -33,10 +36,6 @@ public class QualityUtils {
|
||||||
return 1.0 - qualToErrorProb(qual);
|
return 1.0 - qualToErrorProb(qual);
|
||||||
}
|
}
|
||||||
|
|
||||||
static public double qualToProb(int qual) {
|
|
||||||
return qualToProb( (double)qual );
|
|
||||||
}
|
|
||||||
|
|
||||||
static public double qualToProb(double qual) {
|
static public double qualToProb(double qual) {
|
||||||
return 1.0 - Math.pow(10.0, qual/(-10.0));
|
return 1.0 - Math.pow(10.0, qual/(-10.0));
|
||||||
}
|
}
|
||||||
|
|
@ -48,10 +47,14 @@ public class QualityUtils {
|
||||||
* @param qual a quality score (0-40)
|
* @param qual a quality score (0-40)
|
||||||
* @return a probability (0.0-1.0)
|
* @return a probability (0.0-1.0)
|
||||||
*/
|
*/
|
||||||
static public double qualToErrorProb(byte qual) {
|
static public double qualToErrorProbRaw(byte qual) {
|
||||||
return Math.pow(10.0, ((double) qual)/-10.0);
|
return Math.pow(10.0, ((double) qual)/-10.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static public double qualToErrorProb(byte qual) {
|
||||||
|
return qualToErrorProbCache[qual];
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Convert a probability to a quality score. Note, this is capped at Q40.
|
* Convert a probability to a quality score. Note, this is capped at Q40.
|
||||||
*
|
*
|
||||||
|
|
@ -110,88 +113,4 @@ public class QualityUtils {
|
||||||
//return (byte) Math.min(qual, maxQual);
|
//return (byte) Math.min(qual, maxQual);
|
||||||
return (byte) Math.max(Math.min(qual, maxQual), 1);
|
return (byte) Math.max(Math.min(qual, maxQual), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Compress a base and a probability into a single byte so that it can be output in a SAMRecord's SQ field.
|
|
||||||
* Note: the highest probability this function can encode is 64%, so this function should only never be used on the best base hypothesis.
|
|
||||||
* Another note: the probability encoded here gets rounded to the nearest 1%.
|
|
||||||
*
|
|
||||||
* @param baseIndex the base index
|
|
||||||
* @param prob the base probability
|
|
||||||
* @return a byte containing the index and the probability
|
|
||||||
*/
|
|
||||||
static public byte baseAndProbToCompressedQuality(int baseIndex, double prob) {
|
|
||||||
byte compressedQual = 0;
|
|
||||||
|
|
||||||
compressedQual = (byte) baseIndex;
|
|
||||||
|
|
||||||
byte cprob = (byte) (100.0*prob);
|
|
||||||
byte qualmask = (byte) 252;
|
|
||||||
compressedQual += ((cprob << 2) & qualmask);
|
|
||||||
|
|
||||||
return compressedQual;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* From a compressed base, extract the base index (0:A, 1:C, 2:G, 3:T)
|
|
||||||
*
|
|
||||||
* @param compressedQual the compressed quality score, as returned by baseAndProbToCompressedQuality
|
|
||||||
* @return base index
|
|
||||||
*/
|
|
||||||
static public int compressedQualityToBaseIndex(byte compressedQual) {
|
|
||||||
return (int) (compressedQual & 0x3);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* From a compressed base, extract the base probability
|
|
||||||
*
|
|
||||||
* @param compressedQual the compressed quality score, as returned by baseAndProbToCompressedQuality
|
|
||||||
* @return the probability
|
|
||||||
*/
|
|
||||||
static public double compressedQualityToProb(byte compressedQual) {
|
|
||||||
// Because java natives are signed, extra care must be taken to avoid
|
|
||||||
// shifting a 1 into the sign bit in the implicit promotion of 2 to an int.
|
|
||||||
int x2 = ((int) compressedQual) & 0xff;
|
|
||||||
x2 = (x2 >>> 2);
|
|
||||||
|
|
||||||
return ((double) x2)/100.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return the complement of a compressed quality
|
|
||||||
*
|
|
||||||
* @param compressedQual the compressed quality score (as returned by baseAndProbToCompressedQuality)
|
|
||||||
* @return the complementary compressed quality
|
|
||||||
*/
|
|
||||||
static public byte complementCompressedQuality(byte compressedQual) {
|
|
||||||
int baseIndex = compressedQualityToBaseIndex(compressedQual);
|
|
||||||
double prob = compressedQualityToProb(compressedQual);
|
|
||||||
|
|
||||||
return baseAndProbToCompressedQuality(BaseUtils.complementIndex(baseIndex), prob);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return the reverse complement of a byte array of compressed qualities
|
|
||||||
*
|
|
||||||
* @param compressedQuals a byte array of compressed quality scores
|
|
||||||
* @return the reverse complement of the byte array
|
|
||||||
*/
|
|
||||||
static public byte[] reverseComplementCompressedQualityArray(byte[] compressedQuals) {
|
|
||||||
byte[] rcCompressedQuals = new byte[compressedQuals.length];
|
|
||||||
|
|
||||||
for (int pos = 0; pos < compressedQuals.length; pos++) {
|
|
||||||
rcCompressedQuals[compressedQuals.length - pos - 1] = complementCompressedQuality(compressedQuals[pos]);
|
|
||||||
}
|
|
||||||
|
|
||||||
return rcCompressedQuals;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return the reverse of a byte array of qualities (compressed or otherwise)
|
|
||||||
* @param quals the array of bytes to be reversed
|
|
||||||
* @return the reverse of the quality array
|
|
||||||
*/
|
|
||||||
static public byte[] reverseQualityArray( byte[] quals ) {
|
|
||||||
return Utils.reverse(quals); // no sense in duplicating functionality
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -53,7 +53,7 @@ public class RScriptExecutor {
|
||||||
|
|
||||||
public static class RScriptArgumentCollection {
|
public static class RScriptArgumentCollection {
|
||||||
@Advanced
|
@Advanced
|
||||||
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required = false)
|
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required = false)
|
||||||
public String PATH_TO_RSCRIPT = "Rscript";
|
public String PATH_TO_RSCRIPT = "Rscript";
|
||||||
|
|
||||||
@Advanced
|
@Advanced
|
||||||
|
|
|
||||||
|
|
@ -240,23 +240,35 @@ public class Utils {
|
||||||
return ret.toString();
|
return ret.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
//public static String join(String separator, Collection<String> strings) {
|
/**
|
||||||
// return join( separator, strings.toArray(new String[0]) );
|
* Returns a string of the form elt1.toString() [sep elt2.toString() ... sep elt.toString()] for a collection of
|
||||||
//}
|
* elti objects (note there's no actual space between sep and the elti elements). Returns
|
||||||
|
* "" if collection is empty. If collection contains just elt, then returns elt.toString()
|
||||||
public static <T> String join(String separator, Collection<T> objects) {
|
*
|
||||||
if(objects.isEmpty()) {
|
* @param separator the string to use to separate objects
|
||||||
|
* @param objects a collection of objects. the element order is defined by the iterator over objects
|
||||||
|
* @param <T> the type of the objects
|
||||||
|
* @return a non-null string
|
||||||
|
*/
|
||||||
|
public static <T> String join(final String separator, final Collection<T> objects) {
|
||||||
|
if (objects.isEmpty()) { // fast path for empty collection
|
||||||
return "";
|
return "";
|
||||||
}
|
} else {
|
||||||
Iterator<T> iter = objects.iterator();
|
final Iterator<T> iter = objects.iterator();
|
||||||
final StringBuilder ret = new StringBuilder(iter.next().toString());
|
final T first = iter.next();
|
||||||
|
|
||||||
|
if ( ! iter.hasNext() ) // fast path for singleton collections
|
||||||
|
return first.toString();
|
||||||
|
else { // full path for 2+ collection that actually need a join
|
||||||
|
final StringBuilder ret = new StringBuilder(first.toString());
|
||||||
while(iter.hasNext()) {
|
while(iter.hasNext()) {
|
||||||
ret.append(separator);
|
ret.append(separator);
|
||||||
ret.append(iter.next().toString());
|
ret.append(iter.next().toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret.toString();
|
return ret.toString();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static String dupString(char c, int nCopies) {
|
public static String dupString(char c, int nCopies) {
|
||||||
char[] chars = new char[nCopies];
|
char[] chars = new char[nCopies];
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ import net.sf.picard.reference.ReferenceSequence;
|
||||||
import net.sf.samtools.CigarElement;
|
import net.sf.samtools.CigarElement;
|
||||||
import net.sf.samtools.CigarOperator;
|
import net.sf.samtools.CigarOperator;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import net.sf.samtools.SAMUtils;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
@ -131,19 +132,18 @@ public class BAQ {
|
||||||
private final static double EM = 0.33333333333;
|
private final static double EM = 0.33333333333;
|
||||||
private final static double EI = 0.25;
|
private final static double EI = 0.25;
|
||||||
|
|
||||||
private double[][][] EPSILONS = new double[256][256][64];
|
private double[][][] EPSILONS = new double[256][256][SAMUtils.MAX_PHRED_SCORE+1];
|
||||||
|
|
||||||
private void initializeCachedData() {
|
private void initializeCachedData() {
|
||||||
for ( int i = 0; i < 256; i++ )
|
for ( int i = 0; i < 256; i++ )
|
||||||
for ( int j = 0; j < 256; j++ )
|
for ( int j = 0; j < 256; j++ )
|
||||||
for ( int q = 0; q < 64; q++ ) {
|
for ( int q = 0; q <= SAMUtils.MAX_PHRED_SCORE; q++ ) {
|
||||||
double qual = qual2prob[q < minBaseQual ? minBaseQual : q];
|
|
||||||
EPSILONS[i][j][q] = 1.0;
|
EPSILONS[i][j][q] = 1.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
for ( char b1 : "ACGTacgt".toCharArray() ) {
|
for ( char b1 : "ACGTacgt".toCharArray() ) {
|
||||||
for ( char b2 : "ACGTacgt".toCharArray() ) {
|
for ( char b2 : "ACGTacgt".toCharArray() ) {
|
||||||
for ( int q = 0; q < 64; q++ ) {
|
for ( int q = 0; q <= SAMUtils.MAX_PHRED_SCORE; q++ ) {
|
||||||
double qual = qual2prob[q < minBaseQual ? minBaseQual : q];
|
double qual = qual2prob[q < minBaseQual ? minBaseQual : q];
|
||||||
double e = Character.toLowerCase(b1) == Character.toLowerCase(b2) ? 1 - qual : qual * EM;
|
double e = Character.toLowerCase(b1) == Character.toLowerCase(b2) ? 1 - qual : qual * EM;
|
||||||
EPSILONS[(byte)b1][(byte)b2][q] = e;
|
EPSILONS[(byte)b1][(byte)b2][q] = e;
|
||||||
|
|
@ -152,7 +152,7 @@ public class BAQ {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private double calcEpsilon( byte ref, byte read, byte qualB ) {
|
protected double calcEpsilon( byte ref, byte read, byte qualB ) {
|
||||||
return EPSILONS[ref][read][qualB];
|
return EPSILONS[ref][read][qualB];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -324,6 +324,8 @@ public class ClippingOp {
|
||||||
|
|
||||||
if (index <= stop && cigarElementIterator.hasNext())
|
if (index <= stop && cigarElementIterator.hasNext())
|
||||||
cigarElement = cigarElementIterator.next();
|
cigarElement = cigarElementIterator.next();
|
||||||
|
else
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// add the remaining cigar elements
|
// add the remaining cigar elements
|
||||||
|
|
@ -363,6 +365,8 @@ public class ClippingOp {
|
||||||
index += shift;
|
index += shift;
|
||||||
if (index < start && cigarElementIterator.hasNext())
|
if (index < start && cigarElementIterator.hasNext())
|
||||||
cigarElement = cigarElementIterator.next();
|
cigarElement = cigarElementIterator.next();
|
||||||
|
else
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// check if we are hard clipping indels
|
// check if we are hard clipping indels
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,8 @@
|
||||||
package org.broadinstitute.sting.utils.clipreads;
|
package org.broadinstitute.sting.utils.clipreads;
|
||||||
|
|
||||||
import com.google.java.contract.Requires;
|
import com.google.java.contract.Requires;
|
||||||
|
import net.sf.samtools.CigarElement;
|
||||||
|
import net.sf.samtools.CigarOperator;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
|
|
@ -56,17 +58,34 @@ public class ReadClipper {
|
||||||
return hardClipByReferenceCoordinates(refStart, -1);
|
return hardClipByReferenceCoordinates(refStart, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private int numDeletions(SAMRecord read) {
|
||||||
|
int result = 0;
|
||||||
|
for (CigarElement e: read.getCigar().getCigarElements()) {
|
||||||
|
if ( e.getOperator() == CigarOperator.DELETION || e.getOperator() == CigarOperator.D )
|
||||||
|
result =+ e.getLength();
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
|
private SAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
|
||||||
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart);
|
int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart);
|
||||||
int stop = (refStop < 0) ? read.getReadLength() - 1: ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop);
|
int stop = (refStop < 0) ? read.getReadLength() - 1: ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop);
|
||||||
|
|
||||||
if (start < 0 || stop > read.getReadLength() - 1)
|
if (start < 0 || stop > read.getReadLength() - 1 + numDeletions(read))
|
||||||
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
|
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
|
||||||
|
|
||||||
// TODO add requires statement/check in the Hardclip function
|
// TODO add check in the Hardclip function
|
||||||
if ( start > stop )
|
if ( start > stop )
|
||||||
stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read));
|
stop = ReadUtils.getReadCoordinateForReferenceCoordinate(read, ReadUtils.getRefCoordSoftUnclippedEnd(read));
|
||||||
|
|
||||||
|
|
||||||
|
//This tries to fix the bug where the deletion is counted a read base and as a result, the hardCLipper runs into
|
||||||
|
//an endless loop when hard clipping the cigar string because the read coordinates are not covered by the read
|
||||||
|
stop -= numDeletions(read);
|
||||||
|
if ( start > stop )
|
||||||
|
start -= numDeletions(read);
|
||||||
|
|
||||||
|
|
||||||
//System.out.println("Clipping start/stop: " + start + "/" + stop);
|
//System.out.println("Clipping start/stop: " + start + "/" + stop);
|
||||||
this.addOp(new ClippingOp(start, stop));
|
this.addOp(new ClippingOp(start, stop));
|
||||||
SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
SAMRecord clippedRead = clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||||
|
|
|
||||||
|
|
@ -12,19 +12,35 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* TODO FOR CHRIS HARTL
|
* Allows for reading in RefSeq information
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* Codec Description
|
* Parses a sorted UCSC RefSeq file (see below) into relevant features: the gene name, the unique gene name (if multiple transcrips get separate entries), exons, gene start/stop, coding start/stop,
|
||||||
|
* strandedness of transcription.
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* See also: link to file specification
|
* Instructions for generating a RefSeq file for use with the RefSeq codec can be found on the Wiki here
|
||||||
|
* <a href="http://www.broadinstitute.org/gsa/wiki/index.php/RefSeq">http://www.broadinstitute.org/gsa/wiki/index.php/RefSeq</a>
|
||||||
* </p>
|
* </p>
|
||||||
|
* <h2> Usage </h2>
|
||||||
|
* The RefSeq Rod can be bound as any other rod, and is specified by REFSEQ, for example
|
||||||
|
* <pre>
|
||||||
|
* -refSeqBinding:REFSEQ /path/to/refSeq.txt
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* You will need to consult individual walkers for the binding name ("refSeqBinding", above)
|
||||||
*
|
*
|
||||||
* <h2>File format example</h2>
|
* <h2>File format example</h2>
|
||||||
|
* If you want to define your own file for use, the format is (tab delimited):
|
||||||
|
* bin, name, chrom, strand, transcription start, transcription end, coding start, coding end, num exons, exon starts, exon ends, id, alt. name, coding start status (complete/incomplete), coding end status (complete,incomplete)
|
||||||
|
* and exon frames, for example:
|
||||||
|
* <pre>
|
||||||
|
* 76 NM_001011874 1 - 3204562 3661579 3206102 3661429 3 3204562,3411782,3660632, 3207049,3411982,3661579, 0 Xkr4 cmpl cmpl 1,2,0,
|
||||||
|
* </pre>
|
||||||
|
* for more information see <a href="http://skip.ucsc.edu/cgi-bin/hgTables?hgsid=5651&hgta_doSchemaDb=mm8&hgta_doSchemaTable=refGene">here</a>
|
||||||
* <p>
|
* <p>
|
||||||
* A BAM file containing <b>exactly one sample</b>.
|
*
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
* @author Mark DePristo
|
* @author Mark DePristo
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ import org.broad.tribble.FeatureCodec;
|
||||||
import org.broad.tribble.NameAwareCodec;
|
import org.broad.tribble.NameAwareCodec;
|
||||||
import org.broad.tribble.TribbleException;
|
import org.broad.tribble.TribbleException;
|
||||||
import org.broad.tribble.readers.LineReader;
|
import org.broad.tribble.readers.LineReader;
|
||||||
|
import org.broad.tribble.util.BlockCompressedInputStream;
|
||||||
import org.broad.tribble.util.ParsingUtils;
|
import org.broad.tribble.util.ParsingUtils;
|
||||||
import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec;
|
import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
@ -215,7 +216,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
||||||
int nParts = ParsingUtils.split(line, parts, VCFConstants.FIELD_SEPARATOR_CHAR, true);
|
int nParts = ParsingUtils.split(line, parts, VCFConstants.FIELD_SEPARATOR_CHAR, true);
|
||||||
|
|
||||||
// if we have don't have a header, or we have a header with no genotyping data check that we have eight columns. Otherwise check that we have nine (normal colummns + genotyping data)
|
// if we have don't have a header, or we have a header with no genotyping data check that we have eight columns. Otherwise check that we have nine (normal colummns + genotyping data)
|
||||||
if (( (header == null || (header != null && !header.hasGenotypingData())) && nParts != NUM_STANDARD_FIELDS) ||
|
if (( (header == null || !header.hasGenotypingData()) && nParts != NUM_STANDARD_FIELDS) ||
|
||||||
(header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) )
|
(header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) )
|
||||||
throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) +
|
throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) +
|
||||||
" tokens, and saw " + nParts + " )", lineNo);
|
" tokens, and saw " + nParts + " )", lineNo);
|
||||||
|
|
@ -227,7 +228,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
||||||
throw new UserException.MalformedVCF(message, lineNo);
|
throw new UserException.MalformedVCF(message, lineNo);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void generateException(String message, int lineNo) {
|
protected static void generateException(String message, int lineNo) {
|
||||||
throw new UserException.MalformedVCF(message, lineNo);
|
throw new UserException.MalformedVCF(message, lineNo);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -345,6 +346,9 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
||||||
generateException("The VCF specification requires a valid info field");
|
generateException("The VCF specification requires a valid info field");
|
||||||
|
|
||||||
if ( !infoField.equals(VCFConstants.EMPTY_INFO_FIELD) ) {
|
if ( !infoField.equals(VCFConstants.EMPTY_INFO_FIELD) ) {
|
||||||
|
if ( infoField.indexOf("\t") != -1 || infoField.indexOf(" ") != -1 )
|
||||||
|
generateException("The VCF specification does not allow for whitespace in the INFO field");
|
||||||
|
|
||||||
int infoValueSplitSize = ParsingUtils.split(infoField, infoValueArray, VCFConstants.INFO_FIELD_SEPARATOR_CHAR);
|
int infoValueSplitSize = ParsingUtils.split(infoField, infoValueArray, VCFConstants.INFO_FIELD_SEPARATOR_CHAR);
|
||||||
for (int i = 0; i < infoValueSplitSize; i++) {
|
for (int i = 0; i < infoValueSplitSize; i++) {
|
||||||
String key;
|
String key;
|
||||||
|
|
@ -587,7 +591,8 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
||||||
public final static boolean canDecodeFile(final File potentialInput, final String MAGIC_HEADER_LINE) {
|
public final static boolean canDecodeFile(final File potentialInput, final String MAGIC_HEADER_LINE) {
|
||||||
try {
|
try {
|
||||||
return isVCFStream(new FileInputStream(potentialInput), MAGIC_HEADER_LINE) ||
|
return isVCFStream(new FileInputStream(potentialInput), MAGIC_HEADER_LINE) ||
|
||||||
isVCFStream(new GZIPInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE);
|
isVCFStream(new GZIPInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE) ||
|
||||||
|
isVCFStream(new BlockCompressedInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE);
|
||||||
} catch ( FileNotFoundException e ) {
|
} catch ( FileNotFoundException e ) {
|
||||||
return false;
|
return false;
|
||||||
} catch ( IOException e ) {
|
} catch ( IOException e ) {
|
||||||
|
|
@ -598,12 +603,17 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
|
||||||
private final static boolean isVCFStream(final InputStream stream, final String MAGIC_HEADER_LINE) {
|
private final static boolean isVCFStream(final InputStream stream, final String MAGIC_HEADER_LINE) {
|
||||||
try {
|
try {
|
||||||
byte[] buff = new byte[MAGIC_HEADER_LINE.length()];
|
byte[] buff = new byte[MAGIC_HEADER_LINE.length()];
|
||||||
stream.read(buff, 0, MAGIC_HEADER_LINE.length());
|
int nread = stream.read(buff, 0, MAGIC_HEADER_LINE.length());
|
||||||
String firstLine = new String(buff);
|
boolean eq = Arrays.equals(buff, MAGIC_HEADER_LINE.getBytes());
|
||||||
stream.close();
|
return eq;
|
||||||
return firstLine.startsWith(MAGIC_HEADER_LINE);
|
// String firstLine = new String(buff);
|
||||||
|
// return firstLine.startsWith(MAGIC_HEADER_LINE);
|
||||||
} catch ( IOException e ) {
|
} catch ( IOException e ) {
|
||||||
return false;
|
return false;
|
||||||
|
} catch ( RuntimeException e ) {
|
||||||
|
return false;
|
||||||
|
} finally {
|
||||||
|
try { stream.close(); } catch ( IOException e ) {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,144 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||||
|
|
||||||
|
import com.google.java.contract.Ensures;
|
||||||
|
import com.google.java.contract.Requires;
|
||||||
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
|
import org.broad.tribble.Tribble;
|
||||||
|
import org.broad.tribble.TribbleException;
|
||||||
|
import org.broad.tribble.index.DynamicIndexCreator;
|
||||||
|
import org.broad.tribble.index.Index;
|
||||||
|
import org.broad.tribble.index.IndexFactory;
|
||||||
|
import org.broad.tribble.util.LittleEndianOutputStream;
|
||||||
|
import org.broad.tribble.util.PositionalStream;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.tracks.IndexDictionaryUtils;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* this class writes VCF files
|
||||||
|
*/
|
||||||
|
public abstract class IndexingVCFWriter implements VCFWriter {
|
||||||
|
final private String name;
|
||||||
|
private final SAMSequenceDictionary refDict;
|
||||||
|
|
||||||
|
private OutputStream outputStream;
|
||||||
|
private PositionalStream positionalStream = null;
|
||||||
|
private DynamicIndexCreator indexer = null;
|
||||||
|
private LittleEndianOutputStream idxStream = null;
|
||||||
|
|
||||||
|
@Requires({"name != null",
|
||||||
|
"! ( location == null && output == null )",
|
||||||
|
"! ( enableOnTheFlyIndexing && location == null )"})
|
||||||
|
protected IndexingVCFWriter(final String name, final File location, final OutputStream output, final SAMSequenceDictionary refDict, final boolean enableOnTheFlyIndexing) {
|
||||||
|
outputStream = output;
|
||||||
|
this.name = name;
|
||||||
|
this.refDict = refDict;
|
||||||
|
|
||||||
|
if ( enableOnTheFlyIndexing ) {
|
||||||
|
try {
|
||||||
|
idxStream = new LittleEndianOutputStream(new FileOutputStream(Tribble.indexFile(location)));
|
||||||
|
//System.out.println("Creating index on the fly for " + location);
|
||||||
|
indexer = new DynamicIndexCreator(IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
|
||||||
|
indexer.initialize(location, indexer.defaultBinSize());
|
||||||
|
positionalStream = new PositionalStream(output);
|
||||||
|
outputStream = positionalStream;
|
||||||
|
} catch ( IOException ex ) {
|
||||||
|
// No matter what we keep going, since we don't care if we can't create the index file
|
||||||
|
idxStream = null;
|
||||||
|
indexer = null;
|
||||||
|
positionalStream = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Ensures("result != null")
|
||||||
|
public OutputStream getOutputStream() {
|
||||||
|
return outputStream;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Ensures("result != null")
|
||||||
|
public String getStreamName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
public abstract void writeHeader(VCFHeader header);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* attempt to close the VCF file
|
||||||
|
*/
|
||||||
|
public void close() {
|
||||||
|
// try to close the index stream (keep it separate to help debugging efforts)
|
||||||
|
if ( indexer != null ) {
|
||||||
|
try {
|
||||||
|
Index index = indexer.finalizeIndex(positionalStream.getPosition());
|
||||||
|
IndexDictionaryUtils.setIndexSequenceDictionary(index, refDict);
|
||||||
|
index.write(idxStream);
|
||||||
|
idxStream.close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new ReviewedStingException("Unable to close index for " + getStreamName(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* add a record to the file
|
||||||
|
*
|
||||||
|
* @param vc the Variant Context object
|
||||||
|
*/
|
||||||
|
public void add(VariantContext vc) {
|
||||||
|
// if we are doing on the fly indexing, add the record ***before*** we write any bytes
|
||||||
|
if ( indexer != null )
|
||||||
|
indexer.addFeature(vc, positionalStream.getPosition());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a reasonable "name" for this writer, to display to the user if something goes wrong
|
||||||
|
*
|
||||||
|
* @param location
|
||||||
|
* @param stream
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
protected static final String writerName(final File location, final OutputStream stream) {
|
||||||
|
return location == null ? stream.toString() : location.getAbsolutePath();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a output stream writing to location, or throws a UserException if this fails
|
||||||
|
* @param location
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
protected static OutputStream openOutputStream(final File location) {
|
||||||
|
try {
|
||||||
|
return new FileOutputStream(location);
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
throw new UserException.CouldNotCreateOutputFile(location, "Unable to create VCF writer", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -24,6 +24,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.codecs.vcf;
|
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import org.broad.tribble.Tribble;
|
import org.broad.tribble.Tribble;
|
||||||
import org.broad.tribble.TribbleException;
|
import org.broad.tribble.TribbleException;
|
||||||
import org.broad.tribble.index.DynamicIndexCreator;
|
import org.broad.tribble.index.DynamicIndexCreator;
|
||||||
|
|
@ -44,46 +45,30 @@ import java.util.*;
|
||||||
/**
|
/**
|
||||||
* this class writes VCF files
|
* this class writes VCF files
|
||||||
*/
|
*/
|
||||||
public class StandardVCFWriter implements VCFWriter {
|
public class StandardVCFWriter extends IndexingVCFWriter {
|
||||||
|
// the print stream we're writing to
|
||||||
|
final protected BufferedWriter mWriter;
|
||||||
|
|
||||||
|
// should we write genotypes or just sites?
|
||||||
|
final protected boolean doNotWriteGenotypes;
|
||||||
|
|
||||||
// the VCF header we're storing
|
// the VCF header we're storing
|
||||||
protected VCFHeader mHeader = null;
|
protected VCFHeader mHeader = null;
|
||||||
|
|
||||||
// the print stream we're writing to
|
|
||||||
protected BufferedWriter mWriter;
|
|
||||||
protected PositionalStream positionalStream = null;
|
|
||||||
|
|
||||||
// were filters applied?
|
// were filters applied?
|
||||||
protected boolean filtersWereAppliedToContext = false;
|
protected boolean filtersWereAppliedToContext = false;
|
||||||
|
|
||||||
// should we write genotypes or just sites?
|
|
||||||
protected boolean doNotWriteGenotypes = false;
|
|
||||||
|
|
||||||
protected DynamicIndexCreator indexer = null;
|
|
||||||
protected File indexFile = null;
|
|
||||||
LittleEndianOutputStream idxStream = null;
|
|
||||||
File location = null;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* create a VCF writer, given a file to write to
|
* create a VCF writer, given a file to write to
|
||||||
*
|
*
|
||||||
* @param location the file location to write to
|
* @param location the file location to write to
|
||||||
*/
|
*/
|
||||||
public StandardVCFWriter(File location) {
|
public StandardVCFWriter(final File location, final SAMSequenceDictionary refDict) {
|
||||||
this(location, openOutputStream(location), true, false);
|
this(location, openOutputStream(location), refDict, true, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
public StandardVCFWriter(File location, boolean enableOnTheFlyIndexing) {
|
public StandardVCFWriter(File location, final SAMSequenceDictionary refDict, boolean enableOnTheFlyIndexing) {
|
||||||
this(location, openOutputStream(location), enableOnTheFlyIndexing, false);
|
this(location, openOutputStream(location), refDict, enableOnTheFlyIndexing, false);
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* create a VCF writer, given a stream to write to
|
|
||||||
*
|
|
||||||
* @param output the file location to write to
|
|
||||||
*/
|
|
||||||
public StandardVCFWriter(OutputStream output) {
|
|
||||||
this(output, false);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -92,33 +77,23 @@ public class StandardVCFWriter implements VCFWriter {
|
||||||
* @param output the file location to write to
|
* @param output the file location to write to
|
||||||
* @param doNotWriteGenotypes do not write genotypes
|
* @param doNotWriteGenotypes do not write genotypes
|
||||||
*/
|
*/
|
||||||
public StandardVCFWriter(OutputStream output, boolean doNotWriteGenotypes) {
|
public StandardVCFWriter(final OutputStream output, final SAMSequenceDictionary refDict, final boolean doNotWriteGenotypes) {
|
||||||
mWriter = new BufferedWriter(new OutputStreamWriter(output));
|
this(null, output, refDict, false, doNotWriteGenotypes);
|
||||||
|
}
|
||||||
|
|
||||||
|
public StandardVCFWriter(final File location, final OutputStream output, final SAMSequenceDictionary refDict, final boolean enableOnTheFlyIndexing, boolean doNotWriteGenotypes) {
|
||||||
|
super(writerName(location, output), location, output, refDict, enableOnTheFlyIndexing);
|
||||||
|
mWriter = new BufferedWriter(new OutputStreamWriter(getOutputStream())); // todo -- fix buffer size
|
||||||
this.doNotWriteGenotypes = doNotWriteGenotypes;
|
this.doNotWriteGenotypes = doNotWriteGenotypes;
|
||||||
}
|
}
|
||||||
|
|
||||||
public StandardVCFWriter(File location, OutputStream output, boolean enableOnTheFlyIndexing, boolean doNotWriteGenotypes) {
|
// --------------------------------------------------------------------------------
|
||||||
this.location = location;
|
//
|
||||||
|
// VCFWriter interface functions
|
||||||
if ( enableOnTheFlyIndexing ) {
|
//
|
||||||
indexFile = Tribble.indexFile(location);
|
// --------------------------------------------------------------------------------
|
||||||
try {
|
|
||||||
idxStream = new LittleEndianOutputStream(new FileOutputStream(indexFile));
|
|
||||||
//System.out.println("Creating index on the fly for " + location);
|
|
||||||
indexer = new DynamicIndexCreator(IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
|
|
||||||
indexer.initialize(location, indexer.defaultBinSize());
|
|
||||||
positionalStream = new PositionalStream(output);
|
|
||||||
output = positionalStream;
|
|
||||||
} catch ( IOException ex ) {
|
|
||||||
// No matter what we keep going, since we don't care if we can't create the index file
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//mWriter = new BufferedWriter(new OutputStreamWriter(new PositionalStream(output)));
|
|
||||||
mWriter = new BufferedWriter(new OutputStreamWriter(output));
|
|
||||||
this.doNotWriteGenotypes = doNotWriteGenotypes;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
@Override
|
||||||
public void writeHeader(VCFHeader header) {
|
public void writeHeader(VCFHeader header) {
|
||||||
mHeader = doNotWriteGenotypes ? new VCFHeader(header.getMetaData()) : header;
|
mHeader = doNotWriteGenotypes ? new VCFHeader(header.getMetaData()) : header;
|
||||||
|
|
||||||
|
|
@ -158,44 +133,24 @@ public class StandardVCFWriter implements VCFWriter {
|
||||||
mWriter.flush(); // necessary so that writing to an output stream will work
|
mWriter.flush(); // necessary so that writing to an output stream will work
|
||||||
}
|
}
|
||||||
catch (IOException e) {
|
catch (IOException e) {
|
||||||
throw new TribbleException("IOException writing the VCF header to " + locationString(), e);
|
throw new ReviewedStingException("IOException writing the VCF header to " + getStreamName(), e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private String locationString() {
|
|
||||||
return location == null ? mWriter.toString() : location.getAbsolutePath();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* attempt to close the VCF file
|
* attempt to close the VCF file
|
||||||
*/
|
*/
|
||||||
|
@Override
|
||||||
public void close() {
|
public void close() {
|
||||||
// try to close the vcf stream
|
// try to close the vcf stream
|
||||||
try {
|
try {
|
||||||
mWriter.flush();
|
mWriter.flush();
|
||||||
mWriter.close();
|
mWriter.close();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new TribbleException("Unable to close " + locationString() + " because of " + e.getMessage());
|
throw new ReviewedStingException("Unable to close " + getStreamName(), e);
|
||||||
}
|
}
|
||||||
|
|
||||||
// try to close the index stream (keep it separate to help debugging efforts)
|
super.close();
|
||||||
if ( indexer != null ) {
|
|
||||||
try {
|
|
||||||
Index index = indexer.finalizeIndex(positionalStream.getPosition());
|
|
||||||
index.write(idxStream);
|
|
||||||
idxStream.close();
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new TribbleException("Unable to close index for " + locationString() + " because of " + e.getMessage());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected static OutputStream openOutputStream(File location) {
|
|
||||||
try {
|
|
||||||
return new FileOutputStream(location);
|
|
||||||
} catch (FileNotFoundException e) {
|
|
||||||
throw new TribbleException("Unable to create VCF file at location: " + location);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -203,28 +158,17 @@ public class StandardVCFWriter implements VCFWriter {
|
||||||
*
|
*
|
||||||
* @param vc the Variant Context object
|
* @param vc the Variant Context object
|
||||||
*/
|
*/
|
||||||
|
@Override
|
||||||
public void add(VariantContext vc) {
|
public void add(VariantContext vc) {
|
||||||
add(vc, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* add a record to the file
|
|
||||||
*
|
|
||||||
* @param vc the Variant Context object
|
|
||||||
* @param refBaseShouldBeAppliedToEndOfAlleles *** THIS SHOULD BE FALSE EXCEPT FOR AN INDEL AT THE EXTREME BEGINNING OF A CONTIG (WHERE THERE IS NO PREVIOUS BASE, SO WE USE THE BASE AFTER THE EVENT INSTEAD)
|
|
||||||
*/
|
|
||||||
public void add(VariantContext vc, boolean refBaseShouldBeAppliedToEndOfAlleles) {
|
|
||||||
if ( mHeader == null )
|
if ( mHeader == null )
|
||||||
throw new IllegalStateException("The VCF Header must be written before records can be added: " + locationString());
|
throw new IllegalStateException("The VCF Header must be written before records can be added: " + getStreamName());
|
||||||
|
|
||||||
if ( doNotWriteGenotypes )
|
if ( doNotWriteGenotypes )
|
||||||
vc = VariantContext.modifyGenotypes(vc, null);
|
vc = VariantContext.modifyGenotypes(vc, null);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
vc = VariantContext.createVariantContextWithPaddedAlleles(vc, refBaseShouldBeAppliedToEndOfAlleles);
|
vc = VariantContext.createVariantContextWithPaddedAlleles(vc, false);
|
||||||
|
super.add(vc);
|
||||||
// if we are doing on the fly indexing, add the record ***before*** we write any bytes
|
|
||||||
if ( indexer != null ) indexer.addFeature(vc, positionalStream.getPosition());
|
|
||||||
|
|
||||||
Map<Allele, String> alleleMap = new HashMap<Allele, String>(vc.getAlleles().size());
|
Map<Allele, String> alleleMap = new HashMap<Allele, String>(vc.getAlleles().size());
|
||||||
alleleMap.put(Allele.NO_CALL, VCFConstants.EMPTY_ALLELE); // convenience for lookup
|
alleleMap.put(Allele.NO_CALL, VCFConstants.EMPTY_ALLELE); // convenience for lookup
|
||||||
|
|
@ -275,7 +219,7 @@ public class StandardVCFWriter implements VCFWriter {
|
||||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||||
|
|
||||||
// FILTER
|
// FILTER
|
||||||
String filters = vc.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(vc.getFilters())) : (filtersWereAppliedToContext || vc.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
|
String filters = getFilterString(vc, filtersWereAppliedToContext);
|
||||||
mWriter.write(filters);
|
mWriter.write(filters);
|
||||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||||
|
|
||||||
|
|
@ -317,9 +261,22 @@ public class StandardVCFWriter implements VCFWriter {
|
||||||
mWriter.write("\n");
|
mWriter.write("\n");
|
||||||
mWriter.flush(); // necessary so that writing to an output stream will work
|
mWriter.flush(); // necessary so that writing to an output stream will work
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new RuntimeException("Unable to write the VCF object to " + locationString());
|
throw new RuntimeException("Unable to write the VCF object to " + getStreamName());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// implementation functions
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
public static final String getFilterString(final VariantContext vc) {
|
||||||
|
return getFilterString(vc, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final String getFilterString(final VariantContext vc, boolean forcePASS) {
|
||||||
|
return vc.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(vc.getFilters())) : (forcePASS || vc.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getQualValue(double qual) {
|
private String getQualValue(double qual) {
|
||||||
|
|
@ -462,7 +419,7 @@ public class StandardVCFWriter implements VCFWriter {
|
||||||
mWriter.write(encoding);
|
mWriter.write(encoding);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String formatVCFField(Object val) {
|
public static String formatVCFField(Object val) {
|
||||||
String result;
|
String result;
|
||||||
if ( val == null )
|
if ( val == null )
|
||||||
result = VCFConstants.MISSING_VALUE_v4;
|
result = VCFConstants.MISSING_VALUE_v4;
|
||||||
|
|
@ -524,12 +481,11 @@ public class StandardVCFWriter implements VCFWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static int countOccurrences(char c, String s) {
|
private static int countOccurrences(char c, String s) {
|
||||||
int count = 0;
|
int count = 0;
|
||||||
for (int i = 0; i < s.length(); i++) {
|
for (int i = 0; i < s.length(); i++) {
|
||||||
count += s.charAt(i) == c ? 1 : 0;
|
count += s.charAt(i) == c ? 1 : 0;
|
||||||
}
|
}
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -105,34 +105,37 @@ public class VCFCodec extends AbstractVCFCodec {
|
||||||
* @return a set of the filters applied or null if filters were not applied to the record (e.g. as per the missing value in a VCF)
|
* @return a set of the filters applied or null if filters were not applied to the record (e.g. as per the missing value in a VCF)
|
||||||
*/
|
*/
|
||||||
protected Set<String> parseFilters(String filterString) {
|
protected Set<String> parseFilters(String filterString) {
|
||||||
|
return parseFilters(filterHash, lineNo, filterString);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Set<String> parseFilters(final Map<String, LinkedHashSet<String>> cache, final int lineNo, final String filterString) {
|
||||||
// null for unfiltered
|
// null for unfiltered
|
||||||
if ( filterString.equals(VCFConstants.UNFILTERED) )
|
if ( filterString.equals(VCFConstants.UNFILTERED) )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
// empty set for passes filters
|
|
||||||
LinkedHashSet<String> fFields = new LinkedHashSet<String>();
|
|
||||||
|
|
||||||
if ( filterString.equals(VCFConstants.PASSES_FILTERS_v4) )
|
if ( filterString.equals(VCFConstants.PASSES_FILTERS_v4) )
|
||||||
return fFields;
|
return Collections.emptySet();
|
||||||
if ( filterString.equals(VCFConstants.PASSES_FILTERS_v3) )
|
if ( filterString.equals(VCFConstants.PASSES_FILTERS_v3) )
|
||||||
generateException(VCFConstants.PASSES_FILTERS_v3 + " is an invalid filter name in vcf4");
|
generateException(VCFConstants.PASSES_FILTERS_v3 + " is an invalid filter name in vcf4", lineNo);
|
||||||
if ( filterString.length() == 0 )
|
if ( filterString.length() == 0 )
|
||||||
generateException("The VCF specification requires a valid filter status");
|
generateException("The VCF specification requires a valid filter status: filter was " + filterString, lineNo);
|
||||||
|
|
||||||
// do we have the filter string cached?
|
// do we have the filter string cached?
|
||||||
if ( filterHash.containsKey(filterString) )
|
if ( cache != null && cache.containsKey(filterString) )
|
||||||
return filterHash.get(filterString);
|
return Collections.unmodifiableSet(cache.get(filterString));
|
||||||
|
|
||||||
|
// empty set for passes filters
|
||||||
|
LinkedHashSet<String> fFields = new LinkedHashSet<String>();
|
||||||
// otherwise we have to parse and cache the value
|
// otherwise we have to parse and cache the value
|
||||||
if ( filterString.indexOf(VCFConstants.FILTER_CODE_SEPARATOR) == -1 )
|
if ( filterString.indexOf(VCFConstants.FILTER_CODE_SEPARATOR) == -1 )
|
||||||
fFields.add(filterString);
|
fFields.add(filterString);
|
||||||
else
|
else
|
||||||
fFields.addAll(Arrays.asList(filterString.split(VCFConstants.FILTER_CODE_SEPARATOR)));
|
fFields.addAll(Arrays.asList(filterString.split(VCFConstants.FILTER_CODE_SEPARATOR)));
|
||||||
|
|
||||||
filterHash.put(filterString, fFields);
|
fFields = fFields;
|
||||||
|
if ( cache != null ) cache.put(filterString, fFields);
|
||||||
|
|
||||||
return fFields;
|
return Collections.unmodifiableSet(fFields);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,256 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.utils.gcf;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.StandardVCFWriter;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GATK binary VCF record
|
||||||
|
*
|
||||||
|
* @author Your Name
|
||||||
|
* @since Date created
|
||||||
|
*/
|
||||||
|
public class GCF {
|
||||||
|
private final static int RECORD_TERMINATOR = 123456789;
|
||||||
|
private int chromOffset;
|
||||||
|
private int start, stop;
|
||||||
|
private String id;
|
||||||
|
private List<Allele> alleleMap;
|
||||||
|
private int alleleOffsets[];
|
||||||
|
private float qual;
|
||||||
|
private byte refPad;
|
||||||
|
private String info;
|
||||||
|
private int filterOffset;
|
||||||
|
|
||||||
|
private List<GCFGenotype> genotypes = Collections.emptyList();
|
||||||
|
|
||||||
|
public GCF(final GCFHeaderBuilder GCFHeaderBuilder, final VariantContext vc, boolean skipGenotypes) {
|
||||||
|
chromOffset = GCFHeaderBuilder.encodeString(vc.getChr());
|
||||||
|
start = vc.getStart();
|
||||||
|
stop = vc.getEnd();
|
||||||
|
refPad = vc.hasReferenceBaseForIndel() ? vc.getReferenceBaseForIndel() : 0;
|
||||||
|
id = vc.getID();
|
||||||
|
|
||||||
|
// encode alleles
|
||||||
|
alleleMap = new ArrayList<Allele>(vc.getNAlleles());
|
||||||
|
alleleOffsets = new int[vc.getNAlleles()];
|
||||||
|
alleleMap.add(vc.getReference());
|
||||||
|
alleleOffsets[0] = GCFHeaderBuilder.encodeAllele(vc.getReference());
|
||||||
|
for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) {
|
||||||
|
alleleMap.add(vc.getAlternateAllele(i));
|
||||||
|
alleleOffsets[i+1] = GCFHeaderBuilder.encodeAllele(vc.getAlternateAllele(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
qual = (float)vc.getNegLog10PError(); //qualToByte(vc.getPhredScaledQual());
|
||||||
|
info = infoFieldString(vc, GCFHeaderBuilder);
|
||||||
|
filterOffset = GCFHeaderBuilder.encodeString(StandardVCFWriter.getFilterString(vc));
|
||||||
|
|
||||||
|
if ( ! skipGenotypes ) {
|
||||||
|
genotypes = encodeGenotypes(GCFHeaderBuilder, vc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public GCF(DataInputStream inputStream, boolean skipGenotypes) throws IOException, EOFException {
|
||||||
|
chromOffset = inputStream.readInt();
|
||||||
|
|
||||||
|
// have we reached the footer?
|
||||||
|
if ( chromOffset == GCFHeader.FOOTER_START_MARKER )
|
||||||
|
throw new EOFException();
|
||||||
|
|
||||||
|
start = inputStream.readInt();
|
||||||
|
stop = inputStream.readInt();
|
||||||
|
id = inputStream.readUTF();
|
||||||
|
refPad = inputStream.readByte();
|
||||||
|
alleleOffsets = readIntArray(inputStream);
|
||||||
|
qual = inputStream.readFloat();
|
||||||
|
info = inputStream.readUTF();
|
||||||
|
filterOffset = inputStream.readInt();
|
||||||
|
|
||||||
|
int nGenotypes = inputStream.readInt();
|
||||||
|
int sizeOfGenotypes = inputStream.readInt();
|
||||||
|
if ( skipGenotypes ) {
|
||||||
|
genotypes = Collections.emptyList();
|
||||||
|
inputStream.skipBytes(sizeOfGenotypes);
|
||||||
|
} else {
|
||||||
|
genotypes = new ArrayList<GCFGenotype>(nGenotypes);
|
||||||
|
for ( int i = 0; i < nGenotypes; i++ )
|
||||||
|
genotypes.add(new GCFGenotype(this, inputStream));
|
||||||
|
}
|
||||||
|
|
||||||
|
int recordDone = inputStream.readInt();
|
||||||
|
if ( recordDone != RECORD_TERMINATOR )
|
||||||
|
throw new UserException.MalformedFile("Record not terminated by RECORD_TERMINATOR key");
|
||||||
|
}
|
||||||
|
|
||||||
|
public int write(DataOutputStream outputStream) throws IOException {
|
||||||
|
int startSize = outputStream.size();
|
||||||
|
outputStream.writeInt(chromOffset);
|
||||||
|
outputStream.writeInt(start);
|
||||||
|
outputStream.writeInt(stop);
|
||||||
|
outputStream.writeUTF(id);
|
||||||
|
outputStream.writeByte(refPad);
|
||||||
|
writeIntArray(alleleOffsets, outputStream, true);
|
||||||
|
outputStream.writeFloat(qual);
|
||||||
|
outputStream.writeUTF(info);
|
||||||
|
outputStream.writeInt(filterOffset);
|
||||||
|
|
||||||
|
int nGenotypes = genotypes.size();
|
||||||
|
int expectedSizeOfGenotypes = nGenotypes == 0 ? 0 : genotypes.get(0).sizeInBytes() * nGenotypes;
|
||||||
|
outputStream.writeInt(nGenotypes);
|
||||||
|
outputStream.writeInt(expectedSizeOfGenotypes);
|
||||||
|
int obsSizeOfGenotypes = 0;
|
||||||
|
for ( GCFGenotype g : genotypes )
|
||||||
|
obsSizeOfGenotypes += g.write(outputStream);
|
||||||
|
if ( obsSizeOfGenotypes != expectedSizeOfGenotypes )
|
||||||
|
throw new RuntimeException("Expect and observed genotype sizes disagree! expect = " + expectedSizeOfGenotypes + " obs =" + obsSizeOfGenotypes);
|
||||||
|
|
||||||
|
outputStream.writeInt(RECORD_TERMINATOR);
|
||||||
|
return outputStream.size() - startSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
public VariantContext decode(final String source, final GCFHeader header) {
|
||||||
|
final String contig = header.getString(chromOffset);
|
||||||
|
alleleMap = header.getAlleles(alleleOffsets);
|
||||||
|
double negLog10PError = qual; // QualityUtils.qualToErrorProb(qual);
|
||||||
|
Set<String> filters = header.getFilters(filterOffset);
|
||||||
|
Map<String, Object> attributes = new HashMap<String, Object>();
|
||||||
|
attributes.put("INFO", info);
|
||||||
|
Byte refPadByte = refPad == 0 ? null : refPad;
|
||||||
|
Map<String, Genotype> genotypes = decodeGenotypes(header);
|
||||||
|
|
||||||
|
return new VariantContext(source, contig, start, stop, alleleMap, genotypes, negLog10PError, filters, attributes, refPadByte);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, Genotype> decodeGenotypes(final GCFHeader header) {
|
||||||
|
if ( genotypes.isEmpty() )
|
||||||
|
return VariantContext.NO_GENOTYPES;
|
||||||
|
else {
|
||||||
|
Map<String, Genotype> map = new TreeMap<String, Genotype>();
|
||||||
|
|
||||||
|
for ( int i = 0; i < genotypes.size(); i++ ) {
|
||||||
|
final String sampleName = header.getSample(i);
|
||||||
|
final Genotype g = genotypes.get(i).decode(sampleName, header, this, alleleMap);
|
||||||
|
map.put(sampleName, g);
|
||||||
|
}
|
||||||
|
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<GCFGenotype> encodeGenotypes(final GCFHeaderBuilder GCFHeaderBuilder, final VariantContext vc) {
|
||||||
|
int nGenotypes = vc.getNSamples();
|
||||||
|
if ( nGenotypes > 0 ) {
|
||||||
|
List<GCFGenotype> genotypes = new ArrayList<GCFGenotype>(nGenotypes);
|
||||||
|
for ( int i = 0; i < nGenotypes; i++ ) genotypes.add(null);
|
||||||
|
|
||||||
|
for ( Genotype g : vc.getGenotypes().values() ) {
|
||||||
|
int i = GCFHeaderBuilder.encodeSample(g.getSampleName());
|
||||||
|
genotypes.set(i, new GCFGenotype(GCFHeaderBuilder, alleleMap, g));
|
||||||
|
}
|
||||||
|
|
||||||
|
return genotypes;
|
||||||
|
} else {
|
||||||
|
return Collections.emptyList();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getNAlleles() { return alleleOffsets.length; }
|
||||||
|
|
||||||
|
|
||||||
|
private final String infoFieldString(VariantContext vc, final GCFHeaderBuilder GCFHeaderBuilder) {
|
||||||
|
StringBuilder s = new StringBuilder();
|
||||||
|
|
||||||
|
boolean first = true;
|
||||||
|
for ( Map.Entry<String, Object> field : vc.getAttributes().entrySet() ) {
|
||||||
|
String key = field.getKey();
|
||||||
|
if ( key.equals(VariantContext.ID_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_MAP_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY) )
|
||||||
|
continue;
|
||||||
|
int stringIndex = GCFHeaderBuilder.encodeString(key);
|
||||||
|
String outputValue = StandardVCFWriter.formatVCFField(field.getValue());
|
||||||
|
if ( outputValue != null ) {
|
||||||
|
if ( ! first ) s.append(";");
|
||||||
|
s.append(stringIndex).append("=").append(outputValue);
|
||||||
|
first = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return s.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected final static int BUFFER_SIZE = 1048576; // 2**20
|
||||||
|
|
||||||
|
public static DataInputStream createDataInputStream(final InputStream stream) {
|
||||||
|
return new DataInputStream(new BufferedInputStream(stream, BUFFER_SIZE));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static FileInputStream createFileInputStream(final File file) throws FileNotFoundException {
|
||||||
|
return new FileInputStream(file);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected final static int[] readIntArray(final DataInputStream inputStream) throws IOException {
|
||||||
|
return readIntArray(inputStream, inputStream.readInt());
|
||||||
|
}
|
||||||
|
|
||||||
|
protected final static int[] readIntArray(final DataInputStream inputStream, int size) throws IOException {
|
||||||
|
int[] array = new int[size];
|
||||||
|
for ( int i = 0; i < array.length; i++ )
|
||||||
|
array[i] = inputStream.readInt();
|
||||||
|
return array;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected final static void writeIntArray(int[] array, final DataOutputStream outputStream, boolean writeSize) throws IOException {
|
||||||
|
if ( writeSize ) outputStream.writeInt(array.length);
|
||||||
|
for ( int i : array )
|
||||||
|
outputStream.writeInt(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected final static byte[] readByteArray(final DataInputStream inputStream) throws IOException {
|
||||||
|
return readByteArray(inputStream, inputStream.readInt());
|
||||||
|
}
|
||||||
|
|
||||||
|
protected final static byte[] readByteArray(final DataInputStream inputStream, int size) throws IOException {
|
||||||
|
byte[] array = new byte[size];
|
||||||
|
for ( int i = 0; i < array.length; i++ )
|
||||||
|
array[i] = inputStream.readByte();
|
||||||
|
return array;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected final static void writeByteArray(byte[] array, final DataOutputStream outputStream, boolean writeSize) throws IOException {
|
||||||
|
if ( writeSize ) outputStream.writeInt(array.length);
|
||||||
|
for ( byte i : array )
|
||||||
|
outputStream.writeByte(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected final static byte qualToByte(double phredScaledQual) {
|
||||||
|
return (byte)Math.round(Math.min(phredScaledQual, 255));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,147 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.utils.gcf;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
|
|
||||||
|
import java.io.DataInputStream;
|
||||||
|
import java.io.DataOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GATK binary VCF record
|
||||||
|
*
|
||||||
|
* @author Your Name
|
||||||
|
* @since Date created
|
||||||
|
*/
|
||||||
|
public class GCFGenotype {
|
||||||
|
private byte gq;
|
||||||
|
private int gt;
|
||||||
|
private int dp;
|
||||||
|
private int ad[];
|
||||||
|
private byte[] pl;
|
||||||
|
|
||||||
|
// todo -- what to do about phasing? Perhaps we shouldn't support it
|
||||||
|
// todo -- is the FL field generic or just a flag? Should we even support per sample filtering?
|
||||||
|
|
||||||
|
public GCFGenotype(final GCFHeaderBuilder GCFHeaderBuilder, final List<Allele> allAlleles, Genotype genotype) {
|
||||||
|
gq = GCF.qualToByte(genotype.getPhredScaledQual());
|
||||||
|
gt = encodeAlleles(genotype.getAlleles(), allAlleles);
|
||||||
|
|
||||||
|
dp = genotype.getAttributeAsInt("DP", 0);
|
||||||
|
|
||||||
|
int nAlleles = allAlleles.size();
|
||||||
|
ad = new int[nAlleles];
|
||||||
|
|
||||||
|
int npls = nAllelesToNPls(nAlleles);
|
||||||
|
pl = new byte[npls];
|
||||||
|
}
|
||||||
|
|
||||||
|
private int nAllelesToNPls( int nAlleles ) {
|
||||||
|
return nAlleles*(nAlleles+1) / 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
public GCFGenotype(GCF GCF, DataInputStream inputStream) throws IOException {
|
||||||
|
int gqInt = inputStream.readUnsignedByte();
|
||||||
|
gq = (byte)gqInt;
|
||||||
|
gt = inputStream.readInt();
|
||||||
|
dp = inputStream.readInt();
|
||||||
|
ad = GCF.readIntArray(inputStream, GCF.getNAlleles());
|
||||||
|
pl = GCF.readByteArray(inputStream, nAllelesToNPls(GCF.getNAlleles()));
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2 alleles => 1 + 8 + 8 + 3 => 20
|
||||||
|
protected int sizeInBytes() {
|
||||||
|
return 1 // gq
|
||||||
|
+ 4 * 2 // gt + dp
|
||||||
|
+ 4 * ad.length // ad
|
||||||
|
+ 1 * pl.length; // pl
|
||||||
|
}
|
||||||
|
|
||||||
|
public Genotype decode(final String sampleName, final GCFHeader header, GCF GCF, List<Allele> alleleIndex) {
|
||||||
|
final List<Allele> alleles = decodeAlleles(gt, alleleIndex);
|
||||||
|
final double negLog10PError = gq / 10.0;
|
||||||
|
final Set<String> filters = Collections.emptySet();
|
||||||
|
final Map<String, Object> attributes = new HashMap<String, Object>();
|
||||||
|
attributes.put("DP", dp);
|
||||||
|
attributes.put("AD", ad);
|
||||||
|
attributes.put("PL", pl);
|
||||||
|
|
||||||
|
return new Genotype(sampleName, alleles, negLog10PError, filters, attributes, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int encodeAlleles(List<Allele> gtList, List<Allele> allAlleles) {
|
||||||
|
final int nAlleles = gtList.size();
|
||||||
|
if ( nAlleles > 4 )
|
||||||
|
throw new IllegalArgumentException("encodeAlleles doesn't support more than 4 alt alleles, but I saw " + gtList);
|
||||||
|
|
||||||
|
int gtInt = 0;
|
||||||
|
for ( int i = 0; i < nAlleles ; i++ ) {
|
||||||
|
final int bitOffset = i * 8;
|
||||||
|
final int allelei = getAlleleIndex(gtList.get(i), allAlleles);
|
||||||
|
final int gti = (allelei + 1) << bitOffset;
|
||||||
|
gtInt = gtInt | gti;
|
||||||
|
}
|
||||||
|
|
||||||
|
return gtInt;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int getAlleleIndex(Allele q, List<Allele> allAlleles) {
|
||||||
|
if ( q.isNoCall() )
|
||||||
|
return 254;
|
||||||
|
for ( int i = 0; i < allAlleles.size(); i++ )
|
||||||
|
if ( q.equals(allAlleles.get(i)) )
|
||||||
|
return i;
|
||||||
|
throw new IllegalStateException("getAlleleIndex passed allele not in map! allele " + q + " allAlleles " + allAlleles);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Allele> decodeAlleles(int gtInt, List<Allele> alleleIndex) {
|
||||||
|
List<Allele> alleles = new ArrayList<Allele>(4);
|
||||||
|
|
||||||
|
for ( int i = 0; i < 32; i += 8 ) {
|
||||||
|
final int gi = (gtInt & (0x000000FF << i)) >> i;
|
||||||
|
if ( gi != 0 ) {
|
||||||
|
final int allelei = gi - 1;
|
||||||
|
alleles.add( allelei == 254 ? Allele.NO_CALL : alleleIndex.get(allelei) );
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return alleles;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int write(DataOutputStream outputStream) throws IOException {
|
||||||
|
int startSize = outputStream.size();
|
||||||
|
outputStream.writeByte(gq);
|
||||||
|
outputStream.writeInt(gt);
|
||||||
|
outputStream.writeInt(dp);
|
||||||
|
GCF.writeIntArray(ad, outputStream, false);
|
||||||
|
GCF.writeByteArray(pl, outputStream, false);
|
||||||
|
return outputStream.size() - startSize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,205 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.utils.gcf;
|
||||||
|
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* [Short one sentence description of this walker]
|
||||||
|
* <p/>
|
||||||
|
* <p>
|
||||||
|
* [Functionality of this walker]
|
||||||
|
* </p>
|
||||||
|
* <p/>
|
||||||
|
* <h2>Input</h2>
|
||||||
|
* <p>
|
||||||
|
* [Input description]
|
||||||
|
* </p>
|
||||||
|
* <p/>
|
||||||
|
* <h2>Output</h2>
|
||||||
|
* <p>
|
||||||
|
* [Output description]
|
||||||
|
* </p>
|
||||||
|
* <p/>
|
||||||
|
* <h2>Examples</h2>
|
||||||
|
* <pre>
|
||||||
|
* java
|
||||||
|
* -jar GenomeAnalysisTK.jar
|
||||||
|
* -T $WalkerName
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* @author Your Name
|
||||||
|
* @since Date created
|
||||||
|
*/
|
||||||
|
public class GCFHeader {
|
||||||
|
final protected static Logger logger = Logger.getLogger(GCFHeader.class);
|
||||||
|
|
||||||
|
public final static int GCF_VERSION = 1;
|
||||||
|
public final static byte[] GCF_FILE_START_MARKER = "GCF\1".getBytes();
|
||||||
|
public final static int FOOTER_START_MARKER = -1;
|
||||||
|
public final static long HEADER_FORWARD_REFERENCE_OFFSET = GCF_FILE_START_MARKER.length + 4; // for the version
|
||||||
|
|
||||||
|
final int version;
|
||||||
|
long footerPosition;
|
||||||
|
final List<Allele> alleles;
|
||||||
|
final List<String> strings;
|
||||||
|
final List<String> samples;
|
||||||
|
final List<Set<String>> filters;
|
||||||
|
|
||||||
|
public GCFHeader(final Map<Allele, Integer> allelesIn, final Map<String, Integer> stringIn, final Map<String, Integer> samplesIn) {
|
||||||
|
version = GCF_VERSION;
|
||||||
|
footerPosition = 0;
|
||||||
|
this.alleles = linearize(allelesIn);
|
||||||
|
this.strings = linearize(stringIn);
|
||||||
|
this.samples = linearize(samplesIn);
|
||||||
|
this.filters = null; // not used with this constructor
|
||||||
|
}
|
||||||
|
|
||||||
|
public GCFHeader(FileInputStream fileInputStream) throws IOException {
|
||||||
|
DataInputStream inputStream = new DataInputStream(fileInputStream);
|
||||||
|
byte[] headerTest = new byte[GCF_FILE_START_MARKER.length];
|
||||||
|
inputStream.read(headerTest);
|
||||||
|
if ( ! Arrays.equals(headerTest, GCF_FILE_START_MARKER) ) {
|
||||||
|
throw new UserException("Could not read GVCF file. GCF_FILE_START_MARKER missing. Saw " + new String(headerTest));
|
||||||
|
} else {
|
||||||
|
version = inputStream.readInt();
|
||||||
|
logger.info("Read GCF version " + version);
|
||||||
|
footerPosition = inputStream.readLong();
|
||||||
|
logger.info("Read footer position of " + footerPosition);
|
||||||
|
long lastPos = fileInputStream.getChannel().position();
|
||||||
|
logger.info(" Last position is " + lastPos);
|
||||||
|
|
||||||
|
// seek to the footer
|
||||||
|
fileInputStream.getChannel().position(footerPosition);
|
||||||
|
if ( inputStream.readInt() != FOOTER_START_MARKER )
|
||||||
|
throw new UserException.MalformedFile("Malformed GCF file: couldn't find the footer marker");
|
||||||
|
alleles = stringsToAlleles(readStrings(inputStream));
|
||||||
|
strings = readStrings(inputStream);
|
||||||
|
samples = readStrings(inputStream);
|
||||||
|
logger.info(String.format("Allele map of %d elements", alleles.size()));
|
||||||
|
logger.info(String.format("String map of %d elements", strings.size()));
|
||||||
|
logger.info(String.format("Sample map of %d elements", samples.size()));
|
||||||
|
filters = initializeFilterCache();
|
||||||
|
fileInputStream.getChannel().position(lastPos);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int writeHeader(final DataOutputStream outputStream) throws IOException {
|
||||||
|
int startBytes = outputStream.size();
|
||||||
|
outputStream.write(GCF_FILE_START_MARKER);
|
||||||
|
outputStream.writeInt(GCF_VERSION);
|
||||||
|
outputStream.writeLong(0);
|
||||||
|
return outputStream.size() - startBytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int writeFooter(final DataOutputStream outputStream) throws IOException {
|
||||||
|
int startBytes = outputStream.size();
|
||||||
|
outputStream.writeInt(FOOTER_START_MARKER); // has to be the same as chrom encoding
|
||||||
|
write(outputStream, allelesToStrings(alleles));
|
||||||
|
write(outputStream, strings);
|
||||||
|
write(outputStream, samples);
|
||||||
|
return outputStream.size() - startBytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void write(DataOutputStream outputStream, List<String> l) throws IOException {
|
||||||
|
outputStream.writeInt(l.size());
|
||||||
|
for ( String elt : l ) outputStream.writeUTF(elt);
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<String> allelesToStrings(List<Allele> alleles) {
|
||||||
|
List<String> strings = new ArrayList<String>(alleles.size());
|
||||||
|
for ( Allele allele : alleles ) strings.add(allele.toString());
|
||||||
|
return strings;
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<Set<String>> initializeFilterCache() {
|
||||||
|
// required to allow offset -> set lookup
|
||||||
|
List<Set<String>> l = new ArrayList<Set<String>>(strings.size());
|
||||||
|
for ( int i = 0; i < strings.size(); i++ ) l.add(null);
|
||||||
|
return l;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<Allele> stringsToAlleles(final List<String> strings) {
|
||||||
|
final List<Allele> alleles = new ArrayList<Allele>(strings.size());
|
||||||
|
for ( String string : strings ) {
|
||||||
|
boolean isRef = string.endsWith("*");
|
||||||
|
if ( isRef ) string = string.substring(0, string.length() - 1);
|
||||||
|
alleles.add(Allele.create(string, isRef));
|
||||||
|
}
|
||||||
|
return alleles;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static List<String> readStrings(final DataInputStream inputStream) throws IOException {
|
||||||
|
final int nStrings = inputStream.readInt();
|
||||||
|
|
||||||
|
final List<String> strings = new ArrayList<String>(nStrings);
|
||||||
|
for ( int i = 0; i < nStrings; i++ ) {
|
||||||
|
strings.add(inputStream.readUTF());
|
||||||
|
}
|
||||||
|
|
||||||
|
return strings;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static <T> List<T> linearize(final Map<T, Integer> map) {
|
||||||
|
final ArrayList<T> l = new ArrayList<T>(map.size());
|
||||||
|
for ( int i = 0; i < map.size(); i++ ) l.add(null);
|
||||||
|
for ( final Map.Entry<T, Integer> elt : map.entrySet() )
|
||||||
|
l.set(elt.getValue(), elt.getKey());
|
||||||
|
return l;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSample(final int offset) { return samples.get(offset); }
|
||||||
|
public String getString(final int offset) { return strings.get(offset); }
|
||||||
|
public Allele getAllele(final int offset) { return alleles.get(offset); }
|
||||||
|
public List<Allele> getAlleles(final int[] offsets) {
|
||||||
|
final List<Allele> alleles = new ArrayList<Allele>(offsets.length);
|
||||||
|
for ( int i : offsets ) alleles.add(getAllele(i));
|
||||||
|
return alleles;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Set<String> getFilters(final int offset) {
|
||||||
|
Set<String> cached = filters.get(offset);
|
||||||
|
|
||||||
|
if ( cached != null )
|
||||||
|
return cached;
|
||||||
|
else {
|
||||||
|
final String filterString = getString(offset);
|
||||||
|
if ( filterString.equals(VCFConstants.UNFILTERED) )
|
||||||
|
return null; // UNFILTERED records are represented by null
|
||||||
|
else {
|
||||||
|
Set<String> set = VCFCodec.parseFilters(null, -1, filterString);
|
||||||
|
filters.set(offset, set); // remember the result
|
||||||
|
return set;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,80 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.utils.gcf;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* [Short one sentence description of this walker]
|
||||||
|
* <p/>
|
||||||
|
* <p>
|
||||||
|
* [Functionality of this walker]
|
||||||
|
* </p>
|
||||||
|
* <p/>
|
||||||
|
* <h2>Input</h2>
|
||||||
|
* <p>
|
||||||
|
* [Input description]
|
||||||
|
* </p>
|
||||||
|
* <p/>
|
||||||
|
* <h2>Output</h2>
|
||||||
|
* <p>
|
||||||
|
* [Output description]
|
||||||
|
* </p>
|
||||||
|
* <p/>
|
||||||
|
* <h2>Examples</h2>
|
||||||
|
* <pre>
|
||||||
|
* java
|
||||||
|
* -jar GenomeAnalysisTK.jar
|
||||||
|
* -T $WalkerName
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* @author Your Name
|
||||||
|
* @since Date created
|
||||||
|
*/
|
||||||
|
public class GCFHeaderBuilder {
|
||||||
|
Map<Allele, Integer> alleles = new HashMap<Allele, Integer>();
|
||||||
|
Map<String, Integer> strings = new HashMap<String, Integer>();
|
||||||
|
Map<String, Integer> samples = new HashMap<String, Integer>();
|
||||||
|
|
||||||
|
public GCFHeader createHeader() {
|
||||||
|
return new GCFHeader(alleles, strings, samples);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int encodeString(final String chr) { return encode(strings, chr); }
|
||||||
|
public int encodeAllele(final Allele allele) { return encode(alleles, allele); }
|
||||||
|
public int encodeSample(final String sampleName) { return encode(samples, sampleName); }
|
||||||
|
|
||||||
|
private <T> int encode(Map<T, Integer> map, T key) {
|
||||||
|
Integer v = map.get(key);
|
||||||
|
if ( v == null ) {
|
||||||
|
v = map.size();
|
||||||
|
map.put(key, v);
|
||||||
|
}
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,123 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.utils.gcf;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.IndexingVCFWriter;
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* GCFWriter implementing the VCFWriter interface
|
||||||
|
* @author Your Name
|
||||||
|
* @since Date created
|
||||||
|
*/
|
||||||
|
public class GCFWriter extends IndexingVCFWriter {
|
||||||
|
final boolean skipGenotypes;
|
||||||
|
final FileOutputStream fileOutputStream;
|
||||||
|
final DataOutputStream dataOutputStream;
|
||||||
|
final GCFHeaderBuilder gcfHeaderBuilder;
|
||||||
|
int nbytes = 0;
|
||||||
|
VCFHeader header = null;
|
||||||
|
File location;
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Constructors
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
public GCFWriter(final File location, final SAMSequenceDictionary refDict, boolean enableOnTheFlyIndexing, boolean doNotWriteGenotypes) {
|
||||||
|
super(writerName(location, null), location, null, refDict, enableOnTheFlyIndexing);
|
||||||
|
this.location = location;
|
||||||
|
this.skipGenotypes = doNotWriteGenotypes;
|
||||||
|
|
||||||
|
// write the output
|
||||||
|
try {
|
||||||
|
fileOutputStream = new FileOutputStream(location);
|
||||||
|
dataOutputStream = createDataOutputStream(fileOutputStream);
|
||||||
|
gcfHeaderBuilder = new GCFHeaderBuilder();
|
||||||
|
} catch ( FileNotFoundException e ) {
|
||||||
|
throw new UserException.CouldNotCreateOutputFile(location, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// VCFWriter interface functions
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void writeHeader(VCFHeader header) {
|
||||||
|
this.header = header;
|
||||||
|
try {
|
||||||
|
nbytes += GCFHeader.writeHeader(dataOutputStream);
|
||||||
|
} catch ( IOException e ) {
|
||||||
|
throw new UserException.CouldNotCreateOutputFile(getStreamName(), "Couldn't write header", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void add(VariantContext vc) {
|
||||||
|
super.add(vc);
|
||||||
|
GCF gcf = new GCF(gcfHeaderBuilder, vc, skipGenotypes);
|
||||||
|
try {
|
||||||
|
nbytes += gcf.write(dataOutputStream);
|
||||||
|
} catch ( IOException e ) {
|
||||||
|
throw new UserException.CouldNotCreateOutputFile(getStreamName(), "Failed to add gcf record " + gcf + " to stream " + getStreamName(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() {
|
||||||
|
// todo -- write out VCF header lines
|
||||||
|
GCFHeader gcfHeader = gcfHeaderBuilder.createHeader();
|
||||||
|
try {
|
||||||
|
long headerPosition = nbytes;
|
||||||
|
nbytes += gcfHeader.writeFooter(dataOutputStream);
|
||||||
|
dataOutputStream.close();
|
||||||
|
//System.out.println("Writing forward reference to " + headerPosition);
|
||||||
|
|
||||||
|
RandomAccessFile raFile = new RandomAccessFile(location, "rw");
|
||||||
|
raFile.seek(GCFHeader.HEADER_FORWARD_REFERENCE_OFFSET);
|
||||||
|
raFile.writeLong(headerPosition);
|
||||||
|
raFile.close();
|
||||||
|
} catch ( IOException e ) {
|
||||||
|
throw new ReviewedStingException("Failed to close GCFWriter " + getStreamName(), e);
|
||||||
|
}
|
||||||
|
|
||||||
|
super.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final DataOutputStream createDataOutputStream(final OutputStream stream) {
|
||||||
|
return new DataOutputStream(new BufferedOutputStream(stream, GCF.BUFFER_SIZE));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -118,31 +118,40 @@ public class ReadUtils {
|
||||||
/**
|
/**
|
||||||
* This enum represents all the different ways in which a read can overlap an interval.
|
* This enum represents all the different ways in which a read can overlap an interval.
|
||||||
*
|
*
|
||||||
* NO_OVERLAP:
|
* NO_OVERLAP_CONTIG:
|
||||||
|
* read and interval are in different contigs.
|
||||||
|
*
|
||||||
|
* NO_OVERLAP_LEFT:
|
||||||
* the read does not overlap the interval.
|
* the read does not overlap the interval.
|
||||||
*
|
*
|
||||||
* |----------------| (interval)
|
* |----------------| (interval)
|
||||||
* <----------------> (read)
|
* <----------------> (read)
|
||||||
*
|
*
|
||||||
* LEFT_OVERLAP:
|
* NO_OVERLAP_RIGHT:
|
||||||
|
* the read does not overlap the interval.
|
||||||
|
*
|
||||||
|
* |----------------| (interval)
|
||||||
|
* <----------------> (read)
|
||||||
|
*
|
||||||
|
* OVERLAP_LEFT:
|
||||||
* the read starts before the beginning of the interval but ends inside of it
|
* the read starts before the beginning of the interval but ends inside of it
|
||||||
*
|
*
|
||||||
* |----------------| (interval)
|
* |----------------| (interval)
|
||||||
* <----------------> (read)
|
* <----------------> (read)
|
||||||
*
|
*
|
||||||
* RIGHT_OVERLAP:
|
* OVERLAP_RIGHT:
|
||||||
* the read starts inside the interval but ends outside of it
|
* the read starts inside the interval but ends outside of it
|
||||||
*
|
*
|
||||||
* |----------------| (interval)
|
* |----------------| (interval)
|
||||||
* <----------------> (read)
|
* <----------------> (read)
|
||||||
*
|
*
|
||||||
* FULL_OVERLAP:
|
* OVERLAP_LEFT_AND_RIGHT:
|
||||||
* the read starts before the interval and ends after the interval
|
* the read starts before the interval and ends after the interval
|
||||||
*
|
*
|
||||||
* |-----------| (interval)
|
* |-----------| (interval)
|
||||||
* <-------------------> (read)
|
* <-------------------> (read)
|
||||||
*
|
*
|
||||||
* CONTAINED:
|
* OVERLAP_CONTAINED:
|
||||||
* the read starts and ends inside the interval
|
* the read starts and ends inside the interval
|
||||||
*
|
*
|
||||||
* |----------------| (interval)
|
* |----------------| (interval)
|
||||||
|
|
@ -658,7 +667,7 @@ public class ReadUtils {
|
||||||
return ReadAndIntervalOverlap.OVERLAP_RIGHT;
|
return ReadAndIntervalOverlap.OVERLAP_RIGHT;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd()"})
|
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"})
|
||||||
public static int getRefCoordSoftUnclippedStart(SAMRecord read) {
|
public static int getRefCoordSoftUnclippedStart(SAMRecord read) {
|
||||||
int start = read.getUnclippedStart();
|
int start = read.getUnclippedStart();
|
||||||
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
||||||
|
|
@ -670,9 +679,13 @@ public class ReadUtils {
|
||||||
return start;
|
return start;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd()"})
|
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"})
|
||||||
public static int getRefCoordSoftUnclippedEnd(SAMRecord read) {
|
public static int getRefCoordSoftUnclippedEnd(SAMRecord read) {
|
||||||
int stop = read.getUnclippedStart();
|
int stop = read.getUnclippedStart();
|
||||||
|
|
||||||
|
if (readIsEntirelyInsertion(read))
|
||||||
|
return stop;
|
||||||
|
|
||||||
int shift = 0;
|
int shift = 0;
|
||||||
CigarOperator lastOperator = null;
|
CigarOperator lastOperator = null;
|
||||||
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
||||||
|
|
@ -686,6 +699,14 @@ public class ReadUtils {
|
||||||
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
|
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static boolean readIsEntirelyInsertion(SAMRecord read) {
|
||||||
|
for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
|
||||||
|
if (cigarElement.getOperator() != CigarOperator.INSERTION)
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Looks for a read coordinate that corresponds to the reference coordinate in the soft clipped region before
|
* Looks for a read coordinate that corresponds to the reference coordinate in the soft clipped region before
|
||||||
* the alignment start of the read.
|
* the alignment start of the read.
|
||||||
|
|
|
||||||
|
|
@ -293,17 +293,8 @@ public class Genotype {
|
||||||
return commonInfo.getAttribute(key, defaultValue);
|
return commonInfo.getAttribute(key, defaultValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getAttributeAsString(String key) { return commonInfo.getAttributeAsString(key); }
|
|
||||||
public String getAttributeAsString(String key, String defaultValue) { return commonInfo.getAttributeAsString(key, defaultValue); }
|
public String getAttributeAsString(String key, String defaultValue) { return commonInfo.getAttributeAsString(key, defaultValue); }
|
||||||
public int getAttributeAsInt(String key) { return commonInfo.getAttributeAsInt(key); }
|
|
||||||
public int getAttributeAsInt(String key, int defaultValue) { return commonInfo.getAttributeAsInt(key, defaultValue); }
|
public int getAttributeAsInt(String key, int defaultValue) { return commonInfo.getAttributeAsInt(key, defaultValue); }
|
||||||
public double getAttributeAsDouble(String key) { return commonInfo.getAttributeAsDouble(key); }
|
|
||||||
public double getAttributeAsDouble(String key, double defaultValue) { return commonInfo.getAttributeAsDouble(key, defaultValue); }
|
public double getAttributeAsDouble(String key, double defaultValue) { return commonInfo.getAttributeAsDouble(key, defaultValue); }
|
||||||
public boolean getAttributeAsBoolean(String key) { return commonInfo.getAttributeAsBoolean(key); }
|
|
||||||
public boolean getAttributeAsBoolean(String key, boolean defaultValue) { return commonInfo.getAttributeAsBoolean(key, defaultValue); }
|
public boolean getAttributeAsBoolean(String key, boolean defaultValue) { return commonInfo.getAttributeAsBoolean(key, defaultValue); }
|
||||||
|
|
||||||
public Integer getAttributeAsIntegerNoException(String key) { return commonInfo.getAttributeAsIntegerNoException(key); }
|
|
||||||
public Double getAttributeAsDoubleNoException(String key) { return commonInfo.getAttributeAsDoubleNoException(key); }
|
|
||||||
public String getAttributeAsStringNoException(String key) { return commonInfo.getAttributeAsStringNoException(key); }
|
|
||||||
public Boolean getAttributeAsBooleanNoException(String key) { return commonInfo.getAttributeAsBooleanNoException(key); }
|
|
||||||
}
|
}
|
||||||
|
|
@ -1,6 +1,8 @@
|
||||||
package org.broadinstitute.sting.utils.variantcontext;
|
package org.broadinstitute.sting.utils.variantcontext;
|
||||||
|
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -204,27 +206,40 @@ public final class InferredGeneticContext {
|
||||||
return defaultValue;
|
return defaultValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// public AttributedObject getAttributes(Collection<Object> keys) {
|
public String getAttributeAsString(String key, String defaultValue) {
|
||||||
// AttributedObject selected = new AttributedObject();
|
Object x = getAttribute(key);
|
||||||
//
|
if ( x == null ) return defaultValue;
|
||||||
// for ( Object key : keys )
|
if ( x instanceof String ) return (String)x;
|
||||||
// selected.putAttribute(key, this.getAttribute(key));
|
return String.valueOf(x); // throws an exception if this isn't a string
|
||||||
//
|
}
|
||||||
// return selected;
|
|
||||||
// }
|
public int getAttributeAsInt(String key, int defaultValue) {
|
||||||
|
Object x = getAttribute(key);
|
||||||
public String getAttributeAsString(String key) { return (String.valueOf(getAttribute(key))); } // **NOTE**: will turn a null Object into the String "null"
|
if ( x == null || x == VCFConstants.MISSING_VALUE_v4 ) return defaultValue;
|
||||||
public int getAttributeAsInt(String key) { Object x = getAttribute(key); return x instanceof Integer ? (Integer)x : Integer.valueOf((String)x); }
|
if ( x instanceof Integer ) return (Integer)x;
|
||||||
public double getAttributeAsDouble(String key) { Object x = getAttribute(key); return x instanceof Double ? (Double)x : Double.valueOf((String)x); }
|
return Integer.valueOf((String)x); // throws an exception if this isn't a string
|
||||||
public boolean getAttributeAsBoolean(String key) { Object x = getAttribute(key); return x instanceof Boolean ? (Boolean)x : Boolean.valueOf((String)x); }
|
}
|
||||||
|
|
||||||
public String getAttributeAsString(String key, String defaultValue) { return (String)getAttribute(key, defaultValue); }
|
public double getAttributeAsDouble(String key, double defaultValue) {
|
||||||
public int getAttributeAsInt(String key, int defaultValue) { return (Integer)getAttribute(key, defaultValue); }
|
Object x = getAttribute(key);
|
||||||
public double getAttributeAsDouble(String key, double defaultValue) { return (Double)getAttribute(key, defaultValue); }
|
if ( x == null ) return defaultValue;
|
||||||
public boolean getAttributeAsBoolean(String key, boolean defaultValue){ return (Boolean)getAttribute(key, defaultValue); }
|
if ( x instanceof Double ) return (Double)x;
|
||||||
|
return Double.valueOf((String)x); // throws an exception if this isn't a string
|
||||||
public Integer getAttributeAsIntegerNoException(String key) { try {return getAttributeAsInt(key);} catch (Exception e) {return null;} }
|
}
|
||||||
public Double getAttributeAsDoubleNoException(String key) { try {return getAttributeAsDouble(key);} catch (Exception e) {return null;} }
|
|
||||||
public String getAttributeAsStringNoException(String key) { if (getAttribute(key) == null) return null; return getAttributeAsString(key); }
|
public boolean getAttributeAsBoolean(String key, boolean defaultValue) {
|
||||||
public Boolean getAttributeAsBooleanNoException(String key) { try {return getAttributeAsBoolean(key);} catch (Exception e) {return null;} }
|
Object x = getAttribute(key);
|
||||||
|
if ( x == null ) return defaultValue;
|
||||||
|
if ( x instanceof Boolean ) return (Boolean)x;
|
||||||
|
return Boolean.valueOf((String)x); // throws an exception if this isn't a string
|
||||||
|
}
|
||||||
|
|
||||||
|
// public String getAttributeAsString(String key) { return (String.valueOf(getAttribute(key))); } // **NOTE**: will turn a null Object into the String "null"
|
||||||
|
// public int getAttributeAsInt(String key) { Object x = getAttribute(key); return x instanceof Integer ? (Integer)x : Integer.valueOf((String)x); }
|
||||||
|
// public double getAttributeAsDouble(String key) { Object x = getAttribute(key); return x instanceof Double ? (Double)x : Double.valueOf((String)x); }
|
||||||
|
// public boolean getAttributeAsBoolean(String key) { Object x = getAttribute(key); return x instanceof Boolean ? (Boolean)x : Boolean.valueOf((String)x); }
|
||||||
|
// public Integer getAttributeAsIntegerNoException(String key) { try {return getAttributeAsInt(key);} catch (Exception e) {return null;} }
|
||||||
|
// public Double getAttributeAsDoubleNoException(String key) { try {return getAttributeAsDouble(key);} catch (Exception e) {return null;} }
|
||||||
|
// public String getAttributeAsStringNoException(String key) { if (getAttribute(key) == null) return null; return getAttributeAsString(key); }
|
||||||
|
// public Boolean getAttributeAsBooleanNoException(String key) { try {return getAttributeAsBoolean(key);} catch (Exception e) {return null;} }
|
||||||
}
|
}
|
||||||
|
|
@ -666,21 +666,11 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
||||||
return commonInfo.getAttribute(key, defaultValue);
|
return commonInfo.getAttribute(key, defaultValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getAttributeAsString(String key) { return commonInfo.getAttributeAsString(key); }
|
|
||||||
public String getAttributeAsString(String key, String defaultValue) { return commonInfo.getAttributeAsString(key, defaultValue); }
|
public String getAttributeAsString(String key, String defaultValue) { return commonInfo.getAttributeAsString(key, defaultValue); }
|
||||||
public int getAttributeAsInt(String key) { return commonInfo.getAttributeAsInt(key); }
|
|
||||||
public int getAttributeAsInt(String key, int defaultValue) { return commonInfo.getAttributeAsInt(key, defaultValue); }
|
public int getAttributeAsInt(String key, int defaultValue) { return commonInfo.getAttributeAsInt(key, defaultValue); }
|
||||||
public double getAttributeAsDouble(String key) { return commonInfo.getAttributeAsDouble(key); }
|
|
||||||
public double getAttributeAsDouble(String key, double defaultValue) { return commonInfo.getAttributeAsDouble(key, defaultValue); }
|
public double getAttributeAsDouble(String key, double defaultValue) { return commonInfo.getAttributeAsDouble(key, defaultValue); }
|
||||||
public boolean getAttributeAsBoolean(String key) { return commonInfo.getAttributeAsBoolean(key); }
|
|
||||||
public boolean getAttributeAsBoolean(String key, boolean defaultValue) { return commonInfo.getAttributeAsBoolean(key, defaultValue); }
|
public boolean getAttributeAsBoolean(String key, boolean defaultValue) { return commonInfo.getAttributeAsBoolean(key, defaultValue); }
|
||||||
|
|
||||||
public Integer getAttributeAsIntegerNoException(String key) { return commonInfo.getAttributeAsIntegerNoException(key); }
|
|
||||||
public Double getAttributeAsDoubleNoException(String key) { return commonInfo.getAttributeAsDoubleNoException(key); }
|
|
||||||
public String getAttributeAsStringNoException(String key) { return commonInfo.getAttributeAsStringNoException(key); }
|
|
||||||
public Boolean getAttributeAsBooleanNoException(String key) { return commonInfo.getAttributeAsBooleanNoException(key); }
|
|
||||||
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// Working with alleles
|
// Working with alleles
|
||||||
|
|
|
||||||
|
|
@ -316,19 +316,22 @@ public class VariantContextUtils {
|
||||||
return pruneVariantContext(vc, null);
|
return pruneVariantContext(vc, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static VariantContext pruneVariantContext(VariantContext vc, Collection<String> keysToPreserve ) {
|
public static VariantContext pruneVariantContext(final VariantContext vc, final Collection<String> keysToPreserve ) {
|
||||||
MutableVariantContext mvc = new MutableVariantContext(vc);
|
final MutableVariantContext mvc = new MutableVariantContext(vc);
|
||||||
|
|
||||||
if ( keysToPreserve == null || keysToPreserve.size() == 0 )
|
if ( keysToPreserve == null || keysToPreserve.size() == 0 )
|
||||||
mvc.clearAttributes();
|
mvc.clearAttributes();
|
||||||
else {
|
else {
|
||||||
Map<String, Object> d = mvc.getAttributes();
|
final Map<String, Object> d = mvc.getAttributes();
|
||||||
mvc.clearAttributes();
|
mvc.clearAttributes();
|
||||||
for ( String key : keysToPreserve )
|
for ( String key : keysToPreserve )
|
||||||
if ( d.containsKey(key) )
|
if ( d.containsKey(key) )
|
||||||
mvc.putAttribute(key, d.get(key));
|
mvc.putAttribute(key, d.get(key));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// this must be done as the ID is stored in the attributes field
|
||||||
|
if ( vc.hasID() ) mvc.setID(vc.getID());
|
||||||
|
|
||||||
Collection<Genotype> gs = mvc.getGenotypes().values();
|
Collection<Genotype> gs = mvc.getGenotypes().values();
|
||||||
mvc.clearGenotypes();
|
mvc.clearGenotypes();
|
||||||
for ( Genotype g : gs ) {
|
for ( Genotype g : gs ) {
|
||||||
|
|
@ -443,34 +446,6 @@ public class VariantContextUtils {
|
||||||
throw new ReviewedStingException(String.format("Couldn't find master VCF %s at %s", masterName, unsortedVCs.iterator().next()));
|
throw new ReviewedStingException(String.format("Couldn't find master VCF %s at %s", masterName, unsortedVCs.iterator().next()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, byte refBase) {
|
|
||||||
return simpleMerge(genomeLocParser, unsortedVCs, null, FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, GenotypeMergeType.UNSORTED, false, false, refBase);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided.
|
|
||||||
* If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
|
|
||||||
* the sample name
|
|
||||||
*
|
|
||||||
* @param genomeLocParser loc parser
|
|
||||||
* @param unsortedVCs collection of unsorted VCs
|
|
||||||
* @param priorityListOfVCs priority list detailing the order in which we should grab the VCs
|
|
||||||
* @param filteredRecordMergeType merge type for filtered records
|
|
||||||
* @param genotypeMergeOptions merge option for genotypes
|
|
||||||
* @param annotateOrigin should we annotate the set it came from?
|
|
||||||
* @param printMessages should we print messages?
|
|
||||||
* @param inputRefBase the ref base
|
|
||||||
* @return new VariantContext
|
|
||||||
*/
|
|
||||||
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
|
|
||||||
FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions,
|
|
||||||
boolean annotateOrigin, boolean printMessages, byte inputRefBase ) {
|
|
||||||
|
|
||||||
return simpleMerge(genomeLocParser, unsortedVCs, priorityListOfVCs, filteredRecordMergeType, genotypeMergeOptions, annotateOrigin, printMessages, "set", false, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided.
|
* Merges VariantContexts into a single hybrid. Takes genotypes for common samples in priority order, if provided.
|
||||||
* If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
|
* If uniqifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
|
||||||
|
|
@ -486,12 +461,18 @@ public class VariantContextUtils {
|
||||||
* @param setKey the key name of the set
|
* @param setKey the key name of the set
|
||||||
* @param filteredAreUncalled are filtered records uncalled?
|
* @param filteredAreUncalled are filtered records uncalled?
|
||||||
* @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count?
|
* @param mergeInfoWithMaxAC should we merge in info from the VC with maximum allele count?
|
||||||
* @return new VariantContext
|
* @return new VariantContext representing the merge of unsortedVCs
|
||||||
*/
|
*/
|
||||||
public static VariantContext simpleMerge(GenomeLocParser genomeLocParser, Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
|
public static VariantContext simpleMerge(final GenomeLocParser genomeLocParser,
|
||||||
FilteredRecordMergeType filteredRecordMergeType, GenotypeMergeType genotypeMergeOptions,
|
final Collection<VariantContext> unsortedVCs,
|
||||||
boolean annotateOrigin, boolean printMessages, String setKey,
|
final List<String> priorityListOfVCs,
|
||||||
boolean filteredAreUncalled, boolean mergeInfoWithMaxAC ) {
|
final FilteredRecordMergeType filteredRecordMergeType,
|
||||||
|
final GenotypeMergeType genotypeMergeOptions,
|
||||||
|
final boolean annotateOrigin,
|
||||||
|
final boolean printMessages,
|
||||||
|
final String setKey,
|
||||||
|
final boolean filteredAreUncalled,
|
||||||
|
final boolean mergeInfoWithMaxAC ) {
|
||||||
if ( unsortedVCs == null || unsortedVCs.size() == 0 )
|
if ( unsortedVCs == null || unsortedVCs.size() == 0 )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
@ -514,26 +495,28 @@ public class VariantContextUtils {
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
// establish the baseline info from the first VC
|
// establish the baseline info from the first VC
|
||||||
VariantContext first = VCs.get(0);
|
final VariantContext first = VCs.get(0);
|
||||||
String name = first.getSource();
|
final String name = first.getSource();
|
||||||
GenomeLoc loc = getLocation(genomeLocParser,first);
|
final Allele refAllele = determineReferenceAllele(VCs);
|
||||||
|
|
||||||
Set<Allele> alleles = new TreeSet<Allele>();
|
final Set<Allele> alleles = new TreeSet<Allele>();
|
||||||
Map<String, Genotype> genotypes = new TreeMap<String, Genotype>();
|
final Set<String> filters = new TreeSet<String>();
|
||||||
double negLog10PError = -1;
|
final Map<String, Object> attributes = new TreeMap<String, Object>();
|
||||||
Set<String> filters = new TreeSet<String>();
|
final Set<String> inconsistentAttributes = new HashSet<String>();
|
||||||
Map<String, Object> attributes = new TreeMap<String, Object>();
|
final Set<String> variantSources = new HashSet<String>(); // contains the set of sources we found in our set of VCs that are variant
|
||||||
Set<String> inconsistentAttributes = new HashSet<String>();
|
final Set<String> rsIDs = new LinkedHashSet<String>(1); // most of the time there's one id
|
||||||
String rsID = null;
|
|
||||||
|
GenomeLoc loc = getLocation(genomeLocParser,first);
|
||||||
int depth = 0;
|
int depth = 0;
|
||||||
int maxAC = -1;
|
int maxAC = -1;
|
||||||
Map<String, Object> attributesWithMaxAC = new TreeMap<String, Object>();
|
final Map<String, Object> attributesWithMaxAC = new TreeMap<String, Object>();
|
||||||
|
double negLog10PError = -1;
|
||||||
VariantContext vcWithMaxAC = null;
|
VariantContext vcWithMaxAC = null;
|
||||||
|
Map<String, Genotype> genotypes = new TreeMap<String, Genotype>();
|
||||||
|
|
||||||
// counting the number of filtered and variant VCs
|
// counting the number of filtered and variant VCs
|
||||||
int nFiltered = 0, nVariant = 0;
|
int nFiltered = 0;
|
||||||
|
|
||||||
Allele refAllele = determineReferenceAllele(VCs);
|
|
||||||
boolean remapped = false;
|
boolean remapped = false;
|
||||||
|
|
||||||
// cycle through and add info from the other VCs, making sure the loc/reference matches
|
// cycle through and add info from the other VCs, making sure the loc/reference matches
|
||||||
|
|
@ -546,7 +529,7 @@ public class VariantContextUtils {
|
||||||
loc = getLocation(genomeLocParser,vc); // get the longest location
|
loc = getLocation(genomeLocParser,vc); // get the longest location
|
||||||
|
|
||||||
nFiltered += vc.isFiltered() ? 1 : 0;
|
nFiltered += vc.isFiltered() ? 1 : 0;
|
||||||
nVariant += vc.isVariant() ? 1 : 0;
|
if ( vc.isVariant() ) variantSources.add(vc.getSource());
|
||||||
|
|
||||||
AlleleMapper alleleMapping = resolveIncompatibleAlleles(refAllele, vc, alleles);
|
AlleleMapper alleleMapping = resolveIncompatibleAlleles(refAllele, vc, alleles);
|
||||||
remapped = remapped || alleleMapping.needsRemapping();
|
remapped = remapped || alleleMapping.needsRemapping();
|
||||||
|
|
@ -565,11 +548,10 @@ public class VariantContextUtils {
|
||||||
// special case DP (add it up) and ID (just preserve it)
|
// special case DP (add it up) and ID (just preserve it)
|
||||||
//
|
//
|
||||||
if (vc.hasAttribute(VCFConstants.DEPTH_KEY))
|
if (vc.hasAttribute(VCFConstants.DEPTH_KEY))
|
||||||
depth += Integer.valueOf(vc.getAttributeAsString(VCFConstants.DEPTH_KEY));
|
depth += vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, 0);
|
||||||
if (rsID == null && vc.hasID())
|
if ( vc.hasID() && ! vc.getID().equals(VCFConstants.EMPTY_ID_FIELD) ) rsIDs.add(vc.getID());
|
||||||
rsID = vc.getID();
|
|
||||||
if (mergeInfoWithMaxAC && vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY)) {
|
if (mergeInfoWithMaxAC && vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY)) {
|
||||||
String rawAlleleCounts = vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY);
|
String rawAlleleCounts = vc.getAttributeAsString(VCFConstants.ALLELE_COUNT_KEY, null);
|
||||||
// lets see if the string contains a , separator
|
// lets see if the string contains a , separator
|
||||||
if (rawAlleleCounts.contains(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR)) {
|
if (rawAlleleCounts.contains(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR)) {
|
||||||
List<String> alleleCountArray = Arrays.asList(rawAlleleCounts.substring(1, rawAlleleCounts.length() - 1).split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR));
|
List<String> alleleCountArray = Arrays.asList(rawAlleleCounts.substring(1, rawAlleleCounts.length() - 1).split(VCFConstants.INFO_FIELD_ARRAY_SEPARATOR));
|
||||||
|
|
@ -627,17 +609,16 @@ public class VariantContextUtils {
|
||||||
if ( filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size() )
|
if ( filteredRecordMergeType == FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED && nFiltered != VCs.size() )
|
||||||
filters.clear();
|
filters.clear();
|
||||||
|
|
||||||
// we care about where the call came from
|
if ( annotateOrigin ) { // we care about where the call came from
|
||||||
if ( annotateOrigin ) {
|
|
||||||
String setValue;
|
String setValue;
|
||||||
if ( nFiltered == 0 && nVariant == priorityListOfVCs.size() ) // nothing was unfiltered
|
if ( nFiltered == 0 && variantSources.size() == priorityListOfVCs.size() ) // nothing was unfiltered
|
||||||
setValue = "Intersection";
|
setValue = "Intersection";
|
||||||
else if ( nFiltered == VCs.size() ) // everything was filtered out
|
else if ( nFiltered == VCs.size() ) // everything was filtered out
|
||||||
setValue = "FilteredInAll";
|
setValue = "FilteredInAll";
|
||||||
else if ( nVariant == 0 ) // everyone was reference
|
else if ( variantSources.isEmpty() ) // everyone was reference
|
||||||
setValue = "ReferenceInAll";
|
setValue = "ReferenceInAll";
|
||||||
else { // we are filtered in some subset
|
else {
|
||||||
List<String> s = new ArrayList<String>();
|
LinkedHashSet<String> s = new LinkedHashSet<String>();
|
||||||
for ( VariantContext vc : VCs )
|
for ( VariantContext vc : VCs )
|
||||||
if ( vc.isVariant() )
|
if ( vc.isVariant() )
|
||||||
s.add( vc.isFiltered() ? "filterIn" + vc.getSource() : vc.getSource() );
|
s.add( vc.isFiltered() ? "filterIn" + vc.getSource() : vc.getSource() );
|
||||||
|
|
@ -652,8 +633,10 @@ public class VariantContextUtils {
|
||||||
|
|
||||||
if ( depth > 0 )
|
if ( depth > 0 )
|
||||||
attributes.put(VCFConstants.DEPTH_KEY, String.valueOf(depth));
|
attributes.put(VCFConstants.DEPTH_KEY, String.valueOf(depth));
|
||||||
if ( rsID != null )
|
|
||||||
attributes.put(VariantContext.ID_KEY, rsID);
|
if ( ! rsIDs.isEmpty() ) {
|
||||||
|
attributes.put(VariantContext.ID_KEY, Utils.join(",", rsIDs));
|
||||||
|
}
|
||||||
|
|
||||||
VariantContext merged = new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, negLog10PError, filters, (mergeInfoWithMaxAC ? attributesWithMaxAC : attributes) );
|
VariantContext merged = new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, negLog10PError, filters, (mergeInfoWithMaxAC ? attributesWithMaxAC : attributes) );
|
||||||
// Trim the padded bases of all alleles if necessary
|
// Trim the padded bases of all alleles if necessary
|
||||||
|
|
@ -1159,9 +1142,7 @@ public class VariantContextUtils {
|
||||||
for (String orAttrib : MERGE_OR_ATTRIBS) {
|
for (String orAttrib : MERGE_OR_ATTRIBS) {
|
||||||
boolean attribVal = false;
|
boolean attribVal = false;
|
||||||
for (VariantContext vc : vcList) {
|
for (VariantContext vc : vcList) {
|
||||||
Boolean val = vc.getAttributeAsBooleanNoException(orAttrib);
|
attribVal = vc.getAttributeAsBoolean(orAttrib, false);
|
||||||
if (val != null)
|
|
||||||
attribVal = (attribVal || val);
|
|
||||||
if (attribVal) // already true, so no reason to continue:
|
if (attribVal) // already true, so no reason to continue:
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -1171,7 +1152,7 @@ public class VariantContextUtils {
|
||||||
// Merge ID fields:
|
// Merge ID fields:
|
||||||
String iDVal = null;
|
String iDVal = null;
|
||||||
for (VariantContext vc : vcList) {
|
for (VariantContext vc : vcList) {
|
||||||
String val = vc.getAttributeAsStringNoException(VariantContext.ID_KEY);
|
String val = vc.getAttributeAsString(VariantContext.ID_KEY, null);
|
||||||
if (val != null && !val.equals(VCFConstants.EMPTY_ID_FIELD)) {
|
if (val != null && !val.equals(VCFConstants.EMPTY_ID_FIELD)) {
|
||||||
if (iDVal == null)
|
if (iDVal == null)
|
||||||
iDVal = val;
|
iDVal = val;
|
||||||
|
|
@ -1251,8 +1232,10 @@ public class VariantContextUtils {
|
||||||
|
|
||||||
public PhaseAndQuality(Genotype gt) {
|
public PhaseAndQuality(Genotype gt) {
|
||||||
this.isPhased = gt.isPhased();
|
this.isPhased = gt.isPhased();
|
||||||
if (this.isPhased)
|
if (this.isPhased) {
|
||||||
this.PQ = gt.getAttributeAsDoubleNoException(ReadBackedPhasingWalker.PQ_KEY);
|
this.PQ = gt.getAttributeAsDouble(ReadBackedPhasingWalker.PQ_KEY, -1);
|
||||||
|
if ( this.PQ == -1 ) this.PQ = null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -75,7 +75,7 @@ public class WalkerTest extends BaseTest {
|
||||||
Index indexFromOutputFile = IndexFactory.createIndex(resultFile, new VCFCodec());
|
Index indexFromOutputFile = IndexFactory.createIndex(resultFile, new VCFCodec());
|
||||||
Index dynamicIndex = IndexFactory.loadIndex(indexFile.getAbsolutePath());
|
Index dynamicIndex = IndexFactory.loadIndex(indexFile.getAbsolutePath());
|
||||||
|
|
||||||
if ( ! indexFromOutputFile.equalsIgnoreTimestamp(dynamicIndex) ) {
|
if ( ! indexFromOutputFile.equalsIgnoreProperties(dynamicIndex) ) {
|
||||||
Assert.fail(String.format("Index on disk from indexing on the fly not equal to the index created after the run completed. FileIndex %s vs. on-the-fly %s%n",
|
Assert.fail(String.format("Index on disk from indexing on the fly not equal to the index created after the run completed. FileIndex %s vs. on-the-fly %s%n",
|
||||||
indexFromOutputFile.getProperties(),
|
indexFromOutputFile.getProperties(),
|
||||||
dynamicIndex.getProperties()));
|
dynamicIndex.getProperties()));
|
||||||
|
|
|
||||||
|
|
@ -56,6 +56,7 @@ public class FeatureManagerUnitTest extends BaseTest {
|
||||||
private static final File VCF3_FILE = new File(validationDataLocation + "vcfexample3.vcf");
|
private static final File VCF3_FILE = new File(validationDataLocation + "vcfexample3.vcf");
|
||||||
private static final File VCF4_FILE = new File(testDir + "HiSeq.10000.vcf");
|
private static final File VCF4_FILE = new File(testDir + "HiSeq.10000.vcf");
|
||||||
private static final File VCF4_FILE_GZ = new File(testDir + "HiSeq.10000.vcf.gz");
|
private static final File VCF4_FILE_GZ = new File(testDir + "HiSeq.10000.vcf.gz");
|
||||||
|
private static final File VCF4_FILE_BGZIP = new File(testDir + "HiSeq.10000.bgzip.vcf.gz");
|
||||||
|
|
||||||
private FeatureManager manager;
|
private FeatureManager manager;
|
||||||
private GenomeLocParser genomeLocParser;
|
private GenomeLocParser genomeLocParser;
|
||||||
|
|
@ -109,6 +110,7 @@ public class FeatureManagerUnitTest extends BaseTest {
|
||||||
new FMTest(VariantContext.class, VCF3Codec.class, "VCF3", VCF3_FILE);
|
new FMTest(VariantContext.class, VCF3Codec.class, "VCF3", VCF3_FILE);
|
||||||
new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE);
|
new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE);
|
||||||
new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE_GZ);
|
new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE_GZ);
|
||||||
|
new FMTest(VariantContext.class, VCFCodec.class, "VCF", VCF4_FILE_BGZIP);
|
||||||
new FMTest(TableFeature.class, BedTableCodec.class, "bedtable", null);
|
new FMTest(TableFeature.class, BedTableCodec.class, "bedtable", null);
|
||||||
return FMTest.getTests(FMTest.class);
|
return FMTest.getTests(FMTest.class);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -29,7 +29,6 @@ import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
import net.sf.samtools.SAMSequenceDictionary;
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import org.broad.tribble.Tribble;
|
import org.broad.tribble.Tribble;
|
||||||
import org.broad.tribble.index.Index;
|
import org.broad.tribble.index.Index;
|
||||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
|
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
@ -45,7 +44,6 @@ import org.testng.annotations.Test;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.nio.channels.FileChannel;
|
import java.nio.channels.FileChannel;
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -164,7 +162,7 @@ public class RMDTrackBuilderUnitTest extends BaseTest {
|
||||||
try {
|
try {
|
||||||
Index idx = builder.loadIndex(vcfFile, new VCFCodec());
|
Index idx = builder.loadIndex(vcfFile, new VCFCodec());
|
||||||
// catch any exception; this call should pass correctly
|
// catch any exception; this call should pass correctly
|
||||||
SAMSequenceDictionary dict = RMDTrackBuilder.getSequenceDictionaryFromProperties(idx);
|
SAMSequenceDictionary dict = IndexDictionaryUtils.getSequenceDictionaryFromProperties(idx);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
Assert.fail("IO exception unexpected" + e.getMessage());
|
Assert.fail("IO exception unexpected" + e.getMessage());
|
||||||
|
|
|
||||||
|
|
@ -134,7 +134,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
||||||
validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation +
|
validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation +
|
||||||
"snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000",
|
"snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("486fc6a5ca1819f5ab180d5d72b1ebc9")
|
Arrays.asList("ed9d1b37b0bd8b65ff9ce2688e0e102e")
|
||||||
);
|
);
|
||||||
executeTest("Testing SnpEff annotations", spec);
|
executeTest("Testing SnpEff annotations", spec);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -96,8 +96,8 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
||||||
|
|
||||||
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "89f55abea8f59e39d1effb908440548c"); }
|
@Test public void uniqueSNPs() { combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "89f55abea8f59e39d1effb908440548c"); }
|
||||||
|
|
||||||
@Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "4836086891f6cbdd40eebef3076d215a"); }
|
@Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "c6adeda751cb2a08690dd9202356629f"); }
|
||||||
@Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "6a34b5d743efda8b2f3b639f3a2f5de8"); }
|
@Test public void omniHM3Intersect() { combineSites(" -filteredRecordsMergeType KEEP_IF_ALL_UNFILTERED", "3a08fd5ee18993dfc8882156ccf5d2e9"); }
|
||||||
|
|
||||||
@Test public void threeWayWithRefs() {
|
@Test public void threeWayWithRefs() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
|
@ -131,4 +131,13 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
|
||||||
@Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "df96cb3beb2dbb5e02f80abec7d3571e"); }
|
@Test public void complexTestMinimal() { combineComplexSites(" -minimalVCF", "df96cb3beb2dbb5e02f80abec7d3571e"); }
|
||||||
@Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "f704caeaaaed6711943014b847fe381a"); }
|
@Test public void complexTestSitesOnly() { combineComplexSites(" -sites_only", "f704caeaaaed6711943014b847fe381a"); }
|
||||||
@Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "f704caeaaaed6711943014b847fe381a"); }
|
@Test public void complexTestSitesOnlyMinimal() { combineComplexSites(" -sites_only -minimalVCF", "f704caeaaaed6711943014b847fe381a"); }
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void combineDBSNPDuplicateSites() {
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
"-T CombineVariants -NO_HEADER -L 1:902000-903000 -o %s -R " + b37KGReference + " -V:v1 " + b37dbSNP132,
|
||||||
|
1,
|
||||||
|
Arrays.asList(""));
|
||||||
|
executeTest("combineDBSNPDuplicateSites:", spec);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -172,6 +172,17 @@ public class BAQUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(enabled = true)
|
||||||
|
public void testBAQQualRange() {
|
||||||
|
BAQ baq = new BAQ(1e-3, 0.1, 7, (byte)4, false); // matches current samtools parameters
|
||||||
|
final byte ref = (byte)'A';
|
||||||
|
final byte alt = (byte)'A';
|
||||||
|
|
||||||
|
for ( int i = 0; i <= SAMUtils.MAX_PHRED_SCORE; i++ )
|
||||||
|
Assert.assertTrue(baq.calcEpsilon( ref, alt, (byte)i) >= 0.0, "Failed to get baq epsilon range");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public void testBAQ(BAQTest test, boolean lookupWithFasta) {
|
public void testBAQ(BAQTest test, boolean lookupWithFasta) {
|
||||||
BAQ baqHMM = new BAQ(1e-3, 0.1, 7, (byte)4, false); // matches current samtools parameters
|
BAQ baqHMM = new BAQ(1e-3, 0.1, 7, (byte)4, false); // matches current samtools parameters
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,53 +1,43 @@
|
||||||
package org.broadinstitute.sting.utils.codecs.vcf;
|
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import org.broad.tribble.Tribble;
|
import org.broad.tribble.Tribble;
|
||||||
import org.broad.tribble.index.*;
|
import org.broad.tribble.index.*;
|
||||||
import org.broad.tribble.iterators.CloseableTribbleIterator;
|
import org.broad.tribble.iterators.CloseableTribbleIterator;
|
||||||
import org.broad.tribble.source.BasicFeatureSource;
|
import org.broad.tribble.source.BasicFeatureSource;
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.WalkerTest;
|
import org.broadinstitute.sting.WalkerTest;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
|
import org.testng.annotations.BeforeTest;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* tests out the various functions in the index factory class
|
* tests out the various functions in the index factory class
|
||||||
*/
|
*/
|
||||||
public class IndexFactoryUnitTest {
|
public class IndexFactoryUnitTest extends BaseTest {
|
||||||
|
|
||||||
File inputFile = new File("public/testdata/HiSeq.10000.vcf");
|
File inputFile = new File("public/testdata/HiSeq.10000.vcf");
|
||||||
File outputFile = new File("public/testdata/onTheFlyOutputTest.vcf");
|
File outputFile = new File("public/testdata/onTheFlyOutputTest.vcf");
|
||||||
File outputFileIndex = Tribble.indexFile(outputFile);
|
File outputFileIndex = Tribble.indexFile(outputFile);
|
||||||
|
|
||||||
/**
|
private SAMSequenceDictionary dict;
|
||||||
* test out scoring the indexes
|
|
||||||
*/
|
|
||||||
@Test
|
|
||||||
public void testScoreIndexes() {
|
|
||||||
/*// make a list of indexes to score
|
|
||||||
Map<Class,IndexCreator> creators = new HashMap<Class,IndexCreator>();
|
|
||||||
// add a linear index with the default bin size
|
|
||||||
LinearIndexCreator linearNormal = new LinearIndexCreator();
|
|
||||||
linearNormal.initialize(inputFile, linearNormal.defaultBinSize());
|
|
||||||
creators.add(LInearIndexlinearNormal);
|
|
||||||
|
|
||||||
// create a tree index with a small index size
|
@BeforeTest
|
||||||
IntervalIndexCreator treeSmallBin = new IntervalIndexCreator();
|
public void setup() {
|
||||||
treeSmallBin.initialize(inputFile, Math.max(200,treeSmallBin.defaultBinSize()/10));
|
try {
|
||||||
creators.add(treeSmallBin);
|
dict = new CachingIndexedFastaSequenceFile(new File(b37KGReference)).getSequenceDictionary();
|
||||||
|
}
|
||||||
List<Index> indexes = new ArrayList<Index>();
|
catch(FileNotFoundException ex) {
|
||||||
for (IndexCreator creator : creators)
|
throw new UserException.CouldNotReadInputFile(b37KGReference,ex);
|
||||||
indexes.add(creator.finalizeIndex(0));
|
}
|
||||||
|
|
||||||
ArrayList<Double> scores = IndexFactory.scoreIndexes(0.5,indexes,100, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
|
|
||||||
System.err.println("scores are : ");
|
|
||||||
for (Double score : scores) {
|
|
||||||
System.err.println(score);
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
@ -65,7 +55,7 @@ public class IndexFactoryUnitTest {
|
||||||
BasicFeatureSource<VariantContext> source = new BasicFeatureSource<VariantContext>(inputFile.getAbsolutePath(), indexFromInputFile, new VCFCodec());
|
BasicFeatureSource<VariantContext> source = new BasicFeatureSource<VariantContext>(inputFile.getAbsolutePath(), indexFromInputFile, new VCFCodec());
|
||||||
|
|
||||||
int counter = 0;
|
int counter = 0;
|
||||||
VCFWriter writer = new StandardVCFWriter(outputFile);
|
VCFWriter writer = new StandardVCFWriter(outputFile, dict);
|
||||||
writer.writeHeader((VCFHeader)source.getHeader());
|
writer.writeHeader((VCFHeader)source.getHeader());
|
||||||
CloseableTribbleIterator<VariantContext> it = source.iterator();
|
CloseableTribbleIterator<VariantContext> it = source.iterator();
|
||||||
while (it.hasNext() && (counter++ < maxRecords || maxRecords == -1) ) {
|
while (it.hasNext() && (counter++ < maxRecords || maxRecords == -1) ) {
|
||||||
|
|
|
||||||
|
|
@ -38,12 +38,13 @@ public class VCFWriterUnitTest extends BaseTest {
|
||||||
private Set<String> additionalColumns = new HashSet<String>();
|
private Set<String> additionalColumns = new HashSet<String>();
|
||||||
private File fakeVCFFile = new File("FAKEVCFFILEFORTESTING.vcf");
|
private File fakeVCFFile = new File("FAKEVCFFILEFORTESTING.vcf");
|
||||||
private GenomeLocParser genomeLocParser;
|
private GenomeLocParser genomeLocParser;
|
||||||
|
private IndexedFastaSequenceFile seq;
|
||||||
|
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public void beforeTests() {
|
public void beforeTests() {
|
||||||
File referenceFile = new File(hg18Reference);
|
File referenceFile = new File(hg18Reference);
|
||||||
try {
|
try {
|
||||||
IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(referenceFile);
|
seq = new CachingIndexedFastaSequenceFile(referenceFile);
|
||||||
genomeLocParser = new GenomeLocParser(seq);
|
genomeLocParser = new GenomeLocParser(seq);
|
||||||
}
|
}
|
||||||
catch(FileNotFoundException ex) {
|
catch(FileNotFoundException ex) {
|
||||||
|
|
@ -55,7 +56,7 @@ public class VCFWriterUnitTest extends BaseTest {
|
||||||
@Test
|
@Test
|
||||||
public void testBasicWriteAndRead() {
|
public void testBasicWriteAndRead() {
|
||||||
VCFHeader header = createFakeHeader(metaData,additionalColumns);
|
VCFHeader header = createFakeHeader(metaData,additionalColumns);
|
||||||
VCFWriter writer = new StandardVCFWriter(fakeVCFFile);
|
VCFWriter writer = new StandardVCFWriter(fakeVCFFile, seq.getSequenceDictionary());
|
||||||
writer.writeHeader(header);
|
writer.writeHeader(header);
|
||||||
writer.add(createVC(header));
|
writer.add(createVC(header));
|
||||||
writer.add(createVC(header));
|
writer.add(createVC(header));
|
||||||
|
|
|
||||||
|
|
@ -131,11 +131,11 @@ class GATKResourcesBundle extends QScript {
|
||||||
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf",
|
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf",
|
||||||
"hapmap_3.3", b37, true, true))
|
"hapmap_3.3", b37, true, true))
|
||||||
|
|
||||||
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/AFR+EUR+ASN+1KG.dindel_august_release_merged_pilot1.20110126.sites.vcf",
|
addResource(new Resource("/humgen/1kg/processing/official_release/phase1/ALL.wgs.VQSR_consensus_biallelic.20101123.indels.sites.vcf",
|
||||||
"1000G_indels_for_realignment", b37, true, false))
|
"1000G_biallelic.indels", b37, true, false))
|
||||||
|
|
||||||
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/Mills_Devine_Indels_2011/ALL.wgs.indels_mills_devine_hg19_leftAligned_collapsed_double_hit.sites.vcf",
|
addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/Mills_Devine_Indels_2011/ALL.wgs.indels_mills_devine_hg19_leftAligned_collapsed_double_hit.sites.vcf",
|
||||||
"indels_mills_devine", b37, true, true))
|
"Mills_Devine_2hit.indels", b37, true, true))
|
||||||
|
|
||||||
//
|
//
|
||||||
// example call set for wiki tutorial
|
// example call set for wiki tutorial
|
||||||
|
|
@ -300,9 +300,9 @@ class GATKResourcesBundle extends QScript {
|
||||||
bamFile = bamIn
|
bamFile = bamIn
|
||||||
}
|
}
|
||||||
|
|
||||||
class IndexVCF(@Input vcf: File, @Input ref: File) extends CountRod with UNIVERSAL_GATK_ARGS {
|
class IndexVCF(@Input vcf: File, @Input ref: File) extends CountRODs with UNIVERSAL_GATK_ARGS {
|
||||||
//@Output val vcfIndex: File = swapExt(vcf.getParent, vcf, ".vcf", ".vcf.idx")
|
//@Output val vcfIndex: File = swapExt(vcf.getParent, vcf, ".vcf", ".vcf.idx")
|
||||||
this.rodBind :+= RodBind(vcf.getName, "VCF", vcf)
|
this.rod :+= vcf
|
||||||
this.reference_sequence = ref
|
this.reference_sequence = ref
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -313,7 +313,7 @@ class GATKResourcesBundle extends QScript {
|
||||||
}
|
}
|
||||||
|
|
||||||
class MakeDBSNP129(@Input dbsnp: File, @Input ref: File, @Output dbsnp129: File) extends SelectVariants with UNIVERSAL_GATK_ARGS {
|
class MakeDBSNP129(@Input dbsnp: File, @Input ref: File, @Output dbsnp129: File) extends SelectVariants with UNIVERSAL_GATK_ARGS {
|
||||||
this.rodBind :+= RodBind("variant", "VCF", dbsnp)
|
this.variant = dbsnp
|
||||||
this.select ++= List("\"dbSNPBuildID <= 129\"")
|
this.select ++= List("\"dbSNPBuildID <= 129\"")
|
||||||
this.reference_sequence = ref
|
this.reference_sequence = ref
|
||||||
this.out = dbsnp129
|
this.out = dbsnp129
|
||||||
|
|
|
||||||
|
|
@ -98,40 +98,52 @@ class MethodsDevelopmentCallingPipeline extends QScript {
|
||||||
// BUGBUG: We no longer support b36/hg18 because several of the necessary files aren't available aligned to those references
|
// BUGBUG: We no longer support b36/hg18 because several of the necessary files aren't available aligned to those references
|
||||||
|
|
||||||
val targetDataSets: Map[String, Target] = Map(
|
val targetDataSets: Map[String, Target] = Map(
|
||||||
"HiSeq" -> new Target("NA12878.HiSeq", hg18, dbSNP_hg18_129, hapmap_hg18,
|
"NA12878_wgs_b37" -> new Target("NA12878.HiSeq.WGS.b37", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
|
||||||
|
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.bam"),
|
||||||
|
new File("/humgen/gsa-hpprojects/dev/carneiro/hiseq19/analysis/snps/NA12878.HiSeq19.filtered.vcf"),
|
||||||
|
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 2.14, 99.0, !lowPass, !exome, 1),
|
||||||
|
"NA12878_wgs_decoy" -> new Target("NA12878.HiSeq.WGS.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37,
|
||||||
|
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37_decoy.NA12878.clean.dedup.recal.bam"),
|
||||||
|
new File("/humgen/gsa-hpprojects/dev/carneiro/hiseq19/analysis/snps/NA12878.HiSeq19.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
|
||||||
|
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 2.14, 99.0, !lowPass, !exome, 1),
|
||||||
|
"NA12878_wgs_hg18" -> new Target("NA12878.HiSeq.WGS.hg18", hg18, dbSNP_hg18_129, hapmap_hg18,
|
||||||
"/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/HiSeq.WGS.cleaned.indels.10.mask",
|
"/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/HiSeq.WGS.cleaned.indels.10.mask",
|
||||||
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam"),
|
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.HiSeq.WGS.bwa.cleaned.recal.bam"),
|
||||||
new File("/home/radon01/depristo/work/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/HiSeq.WGS.cleaned.ug.snpfiltered.indelfiltered.vcf"),
|
new File("/home/radon01/depristo/work/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/HiSeq.WGS.cleaned.ug.snpfiltered.indelfiltered.vcf"),
|
||||||
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg18.intervals", 2.14, 99.0, !lowPass, !exome, 1),
|
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg18.intervals", 2.14, 99.0, !lowPass, !exome, 1),
|
||||||
"HiSeq19" -> new Target("NA12878.HiSeq19", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
|
"NA12878_wex_b37" -> new Target("NA12878.HiSeq.WEx.b37", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
|
||||||
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.bam"),
|
new File("/seq/picard_aggregation/C339/NA12878/v3/NA12878.bam"),
|
||||||
new File("/humgen/gsa-hpprojects/dev/carneiro/hiseq19/analysis/snps/NA12878.HiSeq19.filtered.vcf"),
|
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
|
||||||
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.noChrY.hg19.intervals", 2.14, 99.0, !lowPass, !exome, 1),
|
"/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 3.3, 98.0, !lowPass, exome, 1),
|
||||||
"GA2hg19" -> new Target("NA12878.GA2.hg19", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
|
"NA12878_wex_hg18" -> new Target("NA12878.HiSeq.WEx.hg18", hg18, dbSNP_hg18_129, hapmap_hg18,
|
||||||
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.GA2.WGS.bwa.cleaned.hg19.bam"),
|
|
||||||
new File("/humgen/gsa-hpprojects/dev/carneiro/hiseq19/analysis/snps/NA12878.GA2.hg19.filtered.vcf"),
|
|
||||||
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 2.14, 99.0, !lowPass, !exome, 1),
|
|
||||||
"WEx" -> new Target("NA12878.WEx", hg18, dbSNP_hg18_129, hapmap_hg18,
|
|
||||||
"/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/GA2.WEx.cleaned.indels.10.mask",
|
"/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/GA2.WEx.cleaned.indels.10.mask",
|
||||||
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.WEx.cleaned.recal.bam"),
|
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.WEx.cleaned.recal.bam"),
|
||||||
new File("/home/radon01/depristo/work/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.vcf"),
|
new File("/home/radon01/depristo/work/oneOffProjects/1000GenomesProcessingPaper/wgs.v13/GA2.WEx.cleaned.ug.snpfiltered.indelfiltered.vcf"),
|
||||||
"/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.targets.interval_list", 3.3, 98.0, !lowPass, exome, 1),
|
"/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.targets.interval_list", 3.3, 98.0, !lowPass, exome, 1),
|
||||||
"WExTrio" -> new Target("CEUTrio.WEx", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
|
"NA12878_wex_decoy" -> new Target("NA12878.HiSeq.WEx.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37,
|
||||||
|
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WEx.b37_decoy.NA12878.clean.dedup.recal.bam"),
|
||||||
|
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
|
||||||
|
"/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 3.3, 98.0, !lowPass, exome, 1),
|
||||||
|
"CEUTrio_wex_b37" -> new Target("CEUTrio.HiSeq.WEx.b37", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
|
||||||
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WEx.bwa.cleaned.recal.bam"),
|
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WEx.bwa.cleaned.recal.bam"),
|
||||||
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"),
|
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"),
|
||||||
"/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 3.3, 98.0, !lowPass, exome, 3),
|
"/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 3.3, 98.0, !lowPass, exome, 3),
|
||||||
"WGSTrio" -> new Target("CEUTrio.WGS", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
|
"CEUTrio_wgs_b37" -> new Target("CEUTrio.HiSeq.WGS.b37", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
|
||||||
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.bwa.cleaned.recal.bam"),
|
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.bwa.cleaned.recal.bam"),
|
||||||
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
|
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
|
||||||
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 2.3, 99.0, !lowPass, !exome, 3),
|
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 2.3, 99.0, !lowPass, !exome, 3),
|
||||||
"WExTrioDecoy" -> new Target("CEUTrio.HiSeq.WEx.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37,
|
"CEUTrio_wex_decoy" -> new Target("CEUTrio.HiSeq.WEx.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37,
|
||||||
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WEx.b37_decoy.list"),
|
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WEx.b37_decoy.list"),
|
||||||
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
|
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
|
||||||
"/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 3.3, 98.0, !lowPass, exome, 3),
|
"/seq/references/HybSelOligos/whole_exome_agilent_1.1_refseq_plus_3_boosters/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list", 3.3, 98.0, !lowPass, exome, 3),
|
||||||
"WGSTrioDecoy" -> new Target("CEUTrio.HiSeq.WGS.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37,
|
"CEUTrio_wgs_decoy" -> new Target("CEUTrio.HiSeq.WGS.b37_decoy", b37_decoy, dbSNP_b37, hapmap_b37, indelMask_b37,
|
||||||
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37_decoy.list"),
|
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37_decoy.list"),
|
||||||
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
|
new File("/humgen/gsa-hpprojects/dev/carneiro/trio/analysis/snps/CEUTrio.WEx.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
|
||||||
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 2.3, 99.0, !lowPass, !exome, 3),
|
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 2.3, 99.0, !lowPass, !exome, 3),
|
||||||
|
"GA2hg19" -> new Target("NA12878.GA2.hg19", hg19, dbSNP_b37, hapmap_b37, indelMask_b37,
|
||||||
|
new File("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.GA2.WGS.bwa.cleaned.hg19.bam"),
|
||||||
|
new File("/humgen/gsa-hpprojects/dev/carneiro/hiseq19/analysis/snps/NA12878.GA2.hg19.filtered.vcf"),
|
||||||
|
"/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals", 2.14, 99.0, !lowPass, !exome, 1),
|
||||||
"FIN" -> new Target("FIN", b37, dbSNP_b37, hapmap_b37, indelMask_b37,
|
"FIN" -> new Target("FIN", b37, dbSNP_b37, hapmap_b37, indelMask_b37,
|
||||||
new File("/humgen/1kg/processing/pipeline_test_bams/FIN.79sample.Nov2010.chr20.bam"),
|
new File("/humgen/1kg/processing/pipeline_test_bams/FIN.79sample.Nov2010.chr20.bam"),
|
||||||
new File("/humgen/gsa-hpprojects/dev/data/AugChr20Calls_v4_3state/ALL.august.v4.chr20.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
|
new File("/humgen/gsa-hpprojects/dev/data/AugChr20Calls_v4_3state/ALL.august.v4.chr20.filtered.vcf"), // ** THIS GOLD STANDARD NEEDS TO BE CORRECTED **
|
||||||
|
|
|
||||||
Binary file not shown.
Binary file not shown.
|
|
@ -1,3 +1,3 @@
|
||||||
<ivy-module version="1.0">
|
<ivy-module version="1.0">
|
||||||
<info organisation="org.broad" module="tribble" revision="24" status="integration" />
|
<info organisation="org.broad" module="tribble" revision="25" status="integration" />
|
||||||
</ivy-module>
|
</ivy-module>
|
||||||
Loading…
Reference in New Issue