VariantContext has now moved over to Tribble, and the VCF4 parser is now the only VCF parser in town. Other changes include:
- Tribble is included directly in the GATK repo; those who have access to commit to Tribble can now directly commit from the GATK directory from Intellij; command line users can commit from inside the tribble directory. - Hapmap ROD now in Tribble; all mentions have been switched over. - VariantContext does not know about GenomeLoc; use VariantContextUtils.getLocation(VariantContext vc) to get a genome loc. - VariantContext.getSNPSubstitutionType is now in VariantContextUtils. - This does not include the checked-in project files for Intellij; still running into issues with changes to the iml files being marked as changes by SVN I'll send out an email to GSAMembers with some more details. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3954 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
b21f90aee0
commit
72ae81c6de
49
build.xml
49
build.xml
|
|
@ -8,6 +8,9 @@
|
|||
<property name="java.classes" value="${build.dir}/java/classes" />
|
||||
<property name="resource.file" value="StingText.properties" />
|
||||
|
||||
<!-- where to find the tribble distro -->
|
||||
<property name="tribble.dir" value="tribble" />
|
||||
|
||||
<!-- where to find 'findbugs', which you must set if you plan to use 'ant findbugs' -->
|
||||
<property name="findbugs.home" value="./findbugs"/>
|
||||
|
||||
|
|
@ -108,7 +111,7 @@
|
|||
<mkdir dir="${java.classes}"/>
|
||||
</target>
|
||||
|
||||
<target name="java.compile" depends="init,resolve"
|
||||
<target name="java.compile" depends="tribble,init,resolve"
|
||||
description="compile the source">
|
||||
<!-- Compile the java code from ${src} into build -->
|
||||
<javac srcdir="${java.source.dir}" destdir="${java.classes}" debug="true" debuglevel="lines,vars,source" classpathref="runtime.dependencies">
|
||||
|
|
@ -357,7 +360,8 @@
|
|||
<junit printsummary="yes" showoutput="yes" maxmemory="${test.maxmemory}" clonevm="yes" haltonfailure="${halt}" failureProperty="test.failure">
|
||||
<formatter type="brief" usefile="false" />
|
||||
<formatter type="xml"/>
|
||||
|
||||
<jvmarg value="-Xdebug"/>
|
||||
<jvmarg value="-Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=8091"/>
|
||||
<classpath>
|
||||
<pathelement location="${java.classes}"/>
|
||||
<path refid="runtime.dependencies"/>
|
||||
|
|
@ -378,26 +382,54 @@
|
|||
|
||||
|
||||
<!-- our three different test conditions: Test, IntegrationTest, PerformanceTest -->
|
||||
<target name="test" depends="java.test.compile" description="Run unit tests">
|
||||
<target name="test" depends="java.test.compile,tribble.test" description="Run unit tests">
|
||||
<condition property="ttype" value="*UnitTest" else="${single}">
|
||||
<not><isset property="single"/></not>
|
||||
</condition>
|
||||
<run-test testtype="${ttype}"/>
|
||||
</target>
|
||||
<target name="integrationtest" depends="java.test.compile" description="Run unit tests">
|
||||
<target name="integrationtest" depends="java.test.compile" description="Run integration tests">
|
||||
<condition property="itype" value="*IntegrationTest" else="${single}">
|
||||
<not><isset property="single"/></not>
|
||||
</condition>
|
||||
<run-test testtype="${itype}"/>
|
||||
</target>
|
||||
<target name="performancetest" depends="java.test.compile" description="Run unit tests">
|
||||
<target name="performancetest" depends="java.test.compile" description="Run performance tests">
|
||||
<condition property="ptype" value="*PerformanceTest" else="${single}">
|
||||
<not><isset property="single"/></not>
|
||||
</condition>
|
||||
<run-test testtype="${ptype}"/>
|
||||
</target>
|
||||
|
||||
|
||||
<!-- ***************************************************************************** -->
|
||||
<!-- *********** Tribble ********* -->
|
||||
<!-- ***************************************************************************** -->
|
||||
<!-- compile the library -->
|
||||
<target name="tribble.compile" description="compiles the tribble library">
|
||||
<echo message="Building the Tribble Library..."/>
|
||||
<ant antfile="build.xml" target="all" dir="${tribble.dir}" inheritAll="false"/>
|
||||
</target>
|
||||
|
||||
<!-- build the jar -->
|
||||
<target name="tribble" description="Builds the tribble jar" depends="tribble.compile">
|
||||
<copy todir="lib">
|
||||
<fileset dir="${tribble.dir}/dist" includes="*.jar"/>
|
||||
</copy>
|
||||
</target>
|
||||
|
||||
<!-- test tribble using the unit tests set in tribble -->
|
||||
<target name="tribble.test" description="runs the tribble tests" depends="tribble.compile">
|
||||
<echo message="Testing the Tribble Library..."/>
|
||||
<ant antfile="build.xml" target="test" dir="${tribble.dir}" inheritAll="false"/>
|
||||
</target>
|
||||
|
||||
<!-- clean tribble -->
|
||||
<target name="tribble.clean" description="cleans the tribble library">
|
||||
<echo message="Cleaning the Tribble Library..."/>
|
||||
<ant antfile="build.xml" target="clean" dir="${tribble.dir}" inheritAll="false"/>
|
||||
</target>
|
||||
<!-- ***************************************************************************** -->
|
||||
|
||||
|
||||
<target name="javadoc" depends="init,resolve" description="generates javadoc">
|
||||
<mkdir dir="javadoc"/>
|
||||
|
|
@ -482,11 +514,14 @@
|
|||
|
||||
|
||||
|
||||
<target name="clean" description="clean up">
|
||||
<target name="clean" description="clean up" depends="tribble.clean">
|
||||
<delete dir="out"/>
|
||||
<delete dir="${build.dir}"/>
|
||||
<delete dir="lib"/>
|
||||
<delete dir="staging"/>
|
||||
<delete dir="${dist.dir}"/>
|
||||
<!-- When we clean, make sure to clean out tribble as well -->
|
||||
<ant antfile="tribble/build.xml" target="clean"/>
|
||||
|
||||
</target>
|
||||
</project>
|
||||
|
|
|
|||
1
ivy.xml
1
ivy.xml
|
|
@ -17,7 +17,6 @@
|
|||
<dependency org="jboss" name="javassist" rev="3.7.ga" conf="default"/>
|
||||
<dependency org="org.simpleframework" name="simple-xml" rev="2.0.4" conf="default"/>
|
||||
<dependency org="org.apache.bcel" name="bcel" rev="5.2" conf="default"/>
|
||||
<dependency org="org.broad" name="tribble" rev="latest.integration" conf="default"/>
|
||||
|
||||
<!-- Dependencies for reflections mvn repository -->
|
||||
<dependency org="org.reflections" name="reflections" rev="0.9.5-svnversion79M_mod2" conf="default"/>
|
||||
|
|
|
|||
|
|
@ -1,37 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broad.tribble.vcf;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: depristo
|
||||
* Date: Jun 29, 2010
|
||||
* Time: 3:48:47 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public interface NameAwareCodec {
|
||||
public String getName();
|
||||
public void setName(String name);
|
||||
}
|
||||
|
|
@ -1,119 +0,0 @@
|
|||
package org.broad.tribble.vcf;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broad.tribble.exception.CodecLineParsingException;
|
||||
import org.broad.tribble.readers.LineReader;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
*
|
||||
* User: delangel
|
||||
*
|
||||
* The reader for VCF 3 files
|
||||
*/
|
||||
public class VCF3Codec implements FeatureCodec {
|
||||
|
||||
// we have to store the list of strings that make up the header until they're needed
|
||||
private List<String> headerStrings = new ArrayList<String>();
|
||||
private VCFHeader header = null;
|
||||
private VCFHeaderVersion version = VCFHeaderVersion.VCF3_3;
|
||||
|
||||
|
||||
// some classes need to transform the line before
|
||||
private LineTransform transformer = null;
|
||||
|
||||
/**
|
||||
* Fast path to get the location of the Feature for indexing
|
||||
* @param line the input line to decode
|
||||
* @return
|
||||
*/
|
||||
public Feature decodeLoc(String line) {
|
||||
return reallyDecode(line, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode a line as a Feature.
|
||||
*
|
||||
* @param line
|
||||
*
|
||||
* @return Return the Feature encoded by the line, or null if the line does not represent a feature (e.g. is
|
||||
* a comment)
|
||||
*/
|
||||
public Feature decode(String line) {
|
||||
return reallyDecode(line, false);
|
||||
}
|
||||
|
||||
private Feature reallyDecode(String line, boolean justLocationPlease ) {
|
||||
// the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line
|
||||
if (line.startsWith("#")) return null;
|
||||
// transform the line, if we have a transform to do
|
||||
if (transformer != null) line = transformer.lineTransform(line);
|
||||
if (line.startsWith("#"))
|
||||
return null;
|
||||
|
||||
// make a VCFRecord of the line and return it
|
||||
VCFRecord rec = VCFReaderUtils.createRecord(line, header, justLocationPlease);
|
||||
if ( ! justLocationPlease ) rec.setHeader(header);
|
||||
return rec;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the # of header lines for this file. We use this to parse out the header
|
||||
*
|
||||
* @return 0
|
||||
*/
|
||||
public Object readHeader(LineReader reader) {
|
||||
String line = "";
|
||||
try {
|
||||
while ((line = reader.readLine()) != null) {
|
||||
if (line.startsWith("##")) {
|
||||
headerStrings.add(line);
|
||||
}
|
||||
else if (line.startsWith("#")) {
|
||||
headerStrings.add(line);
|
||||
header = VCFReaderUtils.createHeader(headerStrings,version);
|
||||
return header;
|
||||
}
|
||||
else {
|
||||
throw new CodecLineParsingException("We never saw the required header line (starting with one #) for the input VCF file");
|
||||
}
|
||||
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("IO Exception ", e);
|
||||
}
|
||||
throw new CodecLineParsingException("We never saw the required header line (starting with one #) for the input VCF file");
|
||||
}
|
||||
|
||||
/**
|
||||
* @return VCFRecord.class
|
||||
*/
|
||||
public Class getFeatureType() {
|
||||
return VCFRecord.class;
|
||||
}
|
||||
|
||||
public static interface LineTransform {
|
||||
public String lineTransform(String line);
|
||||
}
|
||||
|
||||
public LineTransform getTransformer() {
|
||||
return transformer;
|
||||
}
|
||||
|
||||
public void setTransformer(LineTransform transformer) {
|
||||
this.transformer = transformer;
|
||||
}
|
||||
|
||||
public VCFHeaderVersion getVersion() {
|
||||
return version;
|
||||
}
|
||||
|
||||
public void setVersion(VCFHeaderVersion version) {
|
||||
this.version = version;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
package org.broad.tribble.vcf;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.features.vcf4.VCF4Codec;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @author aaron
|
||||
*
|
||||
* Class VCFCodec
|
||||
*
|
||||
* The codec for VCF, which relies on VCFReaderUtils to do most of the processing
|
||||
*/
|
||||
public class VCFCodec extends VCF4Codec {}
|
||||
|
|
@ -1,164 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broad.tribble.vcf;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* a base class for compound header lines, which include info lines and format lines (so far)
|
||||
*/
|
||||
public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine {
|
||||
public enum SupportedHeaderLineType {
|
||||
INFO(true), FORMAT(false);
|
||||
|
||||
public final boolean allowFlagValues;
|
||||
SupportedHeaderLineType(boolean flagValues) {
|
||||
allowFlagValues = flagValues;
|
||||
}
|
||||
}
|
||||
|
||||
// the field types
|
||||
private String name;
|
||||
private int count;
|
||||
private String description;
|
||||
private VCFHeaderLineType type;
|
||||
|
||||
// access methods
|
||||
public String getName() { return name; }
|
||||
public int getCount() { return count; }
|
||||
public String getDescription() { return description; }
|
||||
public VCFHeaderLineType getType() { return type; }
|
||||
|
||||
//
|
||||
public void setNumberToUnbounded() { this.count = UNBOUNDED; }
|
||||
|
||||
// our type of line, i.e. format, info, etc
|
||||
private final SupportedHeaderLineType lineType;
|
||||
|
||||
// line numerical values are allowed to be unbounded (or unknown), which is
|
||||
// marked with a dot (.)
|
||||
public static int UNBOUNDED = -1; // the value we store internally for unbounded types
|
||||
|
||||
/**
|
||||
* create a VCF format header line
|
||||
*
|
||||
* @param name the name for this header line
|
||||
* @param count the count for this header line
|
||||
* @param type the type for this header line
|
||||
* @param description the description for this header line
|
||||
*/
|
||||
protected VCFCompoundHeaderLine(String name, int count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType) {
|
||||
super(lineType.toString(), "");
|
||||
this.name = name;
|
||||
this.count = count;
|
||||
this.type = type;
|
||||
this.description = description;
|
||||
this.lineType = lineType;
|
||||
validate();
|
||||
}
|
||||
|
||||
/**
|
||||
* create a VCF format header line
|
||||
*
|
||||
* @param line the header line
|
||||
* @param version the VCF header version
|
||||
*
|
||||
*/
|
||||
protected VCFCompoundHeaderLine(String line, VCFHeaderVersion version, SupportedHeaderLineType lineType) {
|
||||
super(lineType.toString(), "");
|
||||
Map<String,String> mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Number","Type","Description"));
|
||||
name = mapping.get("ID");
|
||||
count = version == VCFHeaderVersion.VCF4_0 ?
|
||||
mapping.get("Number").equals(VCFConstants.UNBOUNDED_ENCODING_v4) ? UNBOUNDED : Integer.valueOf(mapping.get("Number")) :
|
||||
mapping.get("Number").equals(VCFConstants.UNBOUNDED_ENCODING_v3) ? UNBOUNDED : Integer.valueOf(mapping.get("Number"));
|
||||
type = VCFHeaderLineType.valueOf(mapping.get("Type"));
|
||||
if (type == VCFHeaderLineType.Flag && !allowFlagValues())
|
||||
throw new IllegalArgumentException("Flag is an unsupported type for this kind of field");
|
||||
|
||||
description = mapping.get("Description");
|
||||
if ( description == null && ALLOW_UNBOUND_DESCRIPTIONS ) // handle the case where there's no description provided
|
||||
description = UNBOUND_DESCRIPTION;
|
||||
|
||||
this.lineType = lineType;
|
||||
|
||||
validate();
|
||||
}
|
||||
|
||||
private void validate() {
|
||||
if ( name == null || type == null || description == null || lineType == null )
|
||||
throw new IllegalArgumentException(String.format("Invalid VCFCompoundHeaderLine: key=%s name=%s type=%s desc=%s lineType=%s",
|
||||
super.getKey(), name, type, description, lineType ));
|
||||
}
|
||||
|
||||
/**
|
||||
* make a string representation of this header line
|
||||
* @return a string representation
|
||||
*/
|
||||
protected String toStringEncoding() {
|
||||
Map<String,Object> map = new LinkedHashMap<String,Object>();
|
||||
map.put("ID", name);
|
||||
map.put("Number", count == UNBOUNDED ? VCFConstants.UNBOUNDED_ENCODING_v4 : count);
|
||||
map.put("Type", type);
|
||||
map.put("Description", description);
|
||||
return lineType.toString() + "=" + VCFHeaderLine.toStringEncoding(map);
|
||||
}
|
||||
|
||||
/**
|
||||
* returns true if we're equal to another compounder header line
|
||||
* @param o a compound header line
|
||||
* @return true if equal
|
||||
*/
|
||||
public boolean equals(Object o) {
|
||||
if ( !(o instanceof VCFCompoundHeaderLine) )
|
||||
return false;
|
||||
VCFCompoundHeaderLine other = (VCFCompoundHeaderLine)o;
|
||||
return name.equals(other.name) &&
|
||||
count == other.count &&
|
||||
description.equals(other.description) &&
|
||||
type == other.type &&
|
||||
lineType == other.lineType;
|
||||
}
|
||||
|
||||
public boolean equalsExcludingDescription(VCFCompoundHeaderLine other) {
|
||||
return count == other.count &&
|
||||
type == other.type &&
|
||||
lineType == other.lineType &&
|
||||
name.equals(other.name);
|
||||
}
|
||||
|
||||
public boolean sameLineTypeAndName(VCFCompoundHeaderLine other) {
|
||||
return lineType == other.lineType &&
|
||||
name.equals(other.name);
|
||||
}
|
||||
|
||||
/**
|
||||
* do we allow flag (boolean) values? (i.e. booleans where you don't have specify the value, AQ means AQ=true)
|
||||
* @return true if we do, false otherwise
|
||||
*/
|
||||
abstract boolean allowFlagValues();
|
||||
|
||||
}
|
||||
|
|
@ -1,84 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broad.tribble.vcf;
|
||||
|
||||
public final class VCFConstants {
|
||||
// standard INFO/FORMAT field keys
|
||||
public static final String ANCESTRAL_ALLELE_KEY = "AA";
|
||||
public static final String ALLELE_COUNT_KEY = "AC";
|
||||
public static final String ALLELE_FREQUENCY_KEY = "AF";
|
||||
public static final String ALLELE_NUMBER_KEY = "AN";
|
||||
public static final String RMS_BASE_QUALITY_KEY = "BQ";
|
||||
public static final String CIGAR_KEY = "CIGAR";
|
||||
public static final String DBSNP_KEY = "DB";
|
||||
public static final String DEPTH_KEY = "DP";
|
||||
public static final String END_KEY = "END";
|
||||
public static final String GENOTYPE_FILTER_KEY = "FT";
|
||||
public static final String GENOTYPE_KEY = "GT";
|
||||
public static final String GENOTYPE_LIKELIHOODS_KEY = "GL";
|
||||
public static final String GENOTYPE_QUALITY_KEY = "GQ";
|
||||
public static final String HAPMAP2_KEY = "H2";
|
||||
public static final String HAPMAP3_KEY = "H3";
|
||||
public static final String HAPLOTYPE_QUALITY_KEY = "HQ";
|
||||
public static final String RMS_MAPPING_QUALITY_KEY = "MQ";
|
||||
public static final String MAPPING_QUALITY_ZERO_KEY = "MQ0";
|
||||
public static final String SAMPLE_NUMBER_KEY = "NS";
|
||||
public static final String OLD_DEPTH_KEY = "RD";
|
||||
public static final String STRAND_BIAS_KEY = "SB";
|
||||
public static final String SOMATIC_KEY = "SOMATIC";
|
||||
public static final String VALIDATED_KEY = "VALIDATED";
|
||||
|
||||
// separators
|
||||
public static final String FORMAT_FIELD_SEPARATOR = ":";
|
||||
public static final String GENOTYPE_FIELD_SEPARATOR = ":";
|
||||
public static final String FIELD_SEPARATOR = "\t";
|
||||
public static final String FILTER_CODE_SEPARATOR = ";";
|
||||
public static final String INFO_FIELD_SEPARATOR = ";";
|
||||
public static final String UNPHASED = "/";
|
||||
public static final String PHASED = "|";
|
||||
public static final String PHASED_SWITCH_PROB_v3 = "\\";
|
||||
|
||||
// missing/default values
|
||||
public static final String UNFILTERED = ".";
|
||||
public static final String PASSES_FILTERS_v3 = "0";
|
||||
public static final String PASSES_FILTERS_v4 = "PASS";
|
||||
public static final String EMPTY_ID_FIELD = ".";
|
||||
public static final String EMPTY_INFO_FIELD = ".";
|
||||
public static final String EMPTY_ALTERNATE_ALLELE_FIELD = ".";
|
||||
public static final String MISSING_VALUE_v4 = ".";
|
||||
public static final String MISSING_QUALITY_v3 = "-1";
|
||||
public static final String MISSING_GENOTYPE_QUALITY_v3 = "-1";
|
||||
public static final String MISSING_HAPLOTYPE_QUALITY_v3 = "-1";
|
||||
public static final String MISSING_DEPTH_v3 = "-1";
|
||||
public static final String UNBOUNDED_ENCODING_v4 = ".";
|
||||
public static final String UNBOUNDED_ENCODING_v3 = "-1";
|
||||
public static final String EMPTY_ALLELE = ".";
|
||||
public static final String EMPTY_GENOTYPE = "./.";
|
||||
public static final double MAX_GENOTYPE_QUAL = 99.0;
|
||||
|
||||
public static final String DOUBLE_PRECISION_FORMAT_STRING = "%.2f";
|
||||
public static final String DOUBLE_PRECISION_INT_SUFFIX = ".00";
|
||||
}
|
||||
|
|
@ -1,70 +0,0 @@
|
|||
package org.broad.tribble.vcf;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* A class representing a key=value entry for FILTER fields in the VCF header
|
||||
*/
|
||||
public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeaderLine {
|
||||
|
||||
private String name;
|
||||
private String description;
|
||||
|
||||
|
||||
/**
|
||||
* create a VCF filter header line
|
||||
*
|
||||
* @param name the name for this header line
|
||||
* @param description the description for this header line
|
||||
*/
|
||||
public VCFFilterHeaderLine(String name, String description) {
|
||||
super("FILTER", "");
|
||||
this.name = name;
|
||||
this.description = description;
|
||||
|
||||
if ( name == null || description == null )
|
||||
throw new IllegalArgumentException(String.format("Invalid VCFCompoundHeaderLine: key=%s name=%s desc=%s", super.getKey(), name, description ));
|
||||
}
|
||||
|
||||
/**
|
||||
* create a VCF info header line
|
||||
*
|
||||
* @param line the header line
|
||||
* @param version the vcf header version
|
||||
*/
|
||||
protected VCFFilterHeaderLine(String line, VCFHeaderVersion version) {
|
||||
super("FILTER", "");
|
||||
Map<String,String> mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Description"));
|
||||
name = mapping.get("ID");
|
||||
description = mapping.get("Description");
|
||||
if ( description == null && ALLOW_UNBOUND_DESCRIPTIONS ) // handle the case where there's no description provided
|
||||
description = UNBOUND_DESCRIPTION;
|
||||
}
|
||||
|
||||
protected String toStringEncoding() {
|
||||
Map<String,Object> map = new LinkedHashMap<String,Object>();
|
||||
map.put("ID", name);
|
||||
map.put("Description", description);
|
||||
return "FILTER=" + VCFHeaderLine.toStringEncoding(map);
|
||||
}
|
||||
|
||||
public boolean equals(Object o) {
|
||||
if ( !(o instanceof VCFFilterHeaderLine) )
|
||||
return false;
|
||||
VCFFilterHeaderLine other = (VCFFilterHeaderLine)o;
|
||||
return name.equals(other.name) &&
|
||||
description.equals(other.description);
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return description;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,28 +0,0 @@
|
|||
package org.broad.tribble.vcf;
|
||||
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* <p/>
|
||||
* Class VCFFormatHeaderLine
|
||||
* <p/>
|
||||
* A class representing a key=value entry for genotype FORMAT fields in the VCF header
|
||||
*/
|
||||
public class VCFFormatHeaderLine extends VCFCompoundHeaderLine {
|
||||
|
||||
public VCFFormatHeaderLine(String name, int count, VCFHeaderLineType type, String description) {
|
||||
super(name, count, type, description, SupportedHeaderLineType.FORMAT);
|
||||
if (type == VCFHeaderLineType.Flag)
|
||||
throw new IllegalArgumentException("Flag is an unsupported type for format fields");
|
||||
}
|
||||
|
||||
protected VCFFormatHeaderLine(String line, VCFHeaderVersion version) {
|
||||
super(line, version, SupportedHeaderLineType.FORMAT);
|
||||
}
|
||||
|
||||
// format fields do not allow flag values (that wouldn't make much sense, how would you encode this in the genotype).
|
||||
@Override
|
||||
boolean allowFlagValues() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,136 +0,0 @@
|
|||
package org.broad.tribble.vcf;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class VCFGenotypeEncoding
|
||||
* <p/>
|
||||
* basic encoding class for genotype fields in VCF
|
||||
*/
|
||||
public class VCFGenotypeEncoding {
|
||||
public enum TYPE {
|
||||
SINGLE_BASE,
|
||||
INSERTION,
|
||||
DELETION,
|
||||
UNCALLED,
|
||||
MIXED // this type is only valid in aggregate, not for a single VCFGenotypeEncoding
|
||||
}
|
||||
|
||||
// our length (0 for SINGLE_BASE), our bases, and our type
|
||||
private final int mLength;
|
||||
private final String mBases;
|
||||
private final TYPE mType;
|
||||
|
||||
// public constructor, that parses out the base string
|
||||
public VCFGenotypeEncoding(String baseString){
|
||||
this(baseString, false);
|
||||
}
|
||||
public VCFGenotypeEncoding(String baseString, boolean allowMultipleBaseReference) {
|
||||
if ((baseString.length() == 1)) {
|
||||
// are we an empty (no-call) genotype?
|
||||
if (baseString.equals(VCFConstants.EMPTY_ALLELE)) {
|
||||
mBases = VCFConstants.EMPTY_ALLELE;
|
||||
mLength = 0;
|
||||
mType = TYPE.UNCALLED;
|
||||
} else if (!validBases(baseString)) {
|
||||
throw new IllegalArgumentException("Alleles of length 1 must be one of A,C,G,T, " + baseString + " was passed in");
|
||||
} else { // we're a valid base
|
||||
mBases = baseString.toUpperCase();
|
||||
mLength = 0;
|
||||
mType = TYPE.SINGLE_BASE;
|
||||
}
|
||||
} else { // deletion or insertion
|
||||
if (baseString.length() < 1 ||(!allowMultipleBaseReference && (baseString.toUpperCase().charAt(0) != 'D' && baseString.toUpperCase().charAt(0) != 'I'))) {
|
||||
throw new IllegalArgumentException("Genotype encoding of " + baseString + " was passed in, but is not a valid deletion, insertion, base, or no call (.)");
|
||||
}
|
||||
if (baseString.toUpperCase().charAt(0) == 'D') {
|
||||
mLength = Integer.valueOf(baseString.substring(1, baseString.length()));
|
||||
mBases = "";
|
||||
mType = TYPE.DELETION;
|
||||
} else if (baseString.toUpperCase().charAt(0) == 'I') { // we're an I
|
||||
mBases = baseString.substring(1, baseString.length()).toUpperCase();
|
||||
if (!validBases(mBases))
|
||||
throw new IllegalArgumentException("The insertion base string contained invalid bases -> " + baseString);
|
||||
mLength = mBases.length();
|
||||
mType = TYPE.INSERTION;
|
||||
} else{
|
||||
mBases = baseString;
|
||||
mType = TYPE.MIXED;
|
||||
mLength = mBases.length();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public int getLength() {
|
||||
return mLength;
|
||||
}
|
||||
|
||||
public String getBases() {
|
||||
return mBases;
|
||||
}
|
||||
|
||||
public TYPE getType() {
|
||||
return mType;
|
||||
}
|
||||
|
||||
public boolean equals(Object obj) {
|
||||
if ( obj == null )
|
||||
return false;
|
||||
if ( obj instanceof VCFGenotypeEncoding ) {
|
||||
VCFGenotypeEncoding d = (VCFGenotypeEncoding) obj;
|
||||
return (mType == d.mType) && (mBases.equals(d.mBases)) && (mLength == d.mLength);
|
||||
}
|
||||
if ( mType == TYPE.UNCALLED && obj.toString().equals(VCFConstants.EMPTY_ALLELE) )
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
// our underlying data is immutable, so this is safe (we won't strand a value in a hashtable somewhere
|
||||
// when the data changes underneath, altering this value).
|
||||
String str = this.mBases + String.valueOf(this.mLength) + this.mType.toString();
|
||||
return str.hashCode();
|
||||
}
|
||||
|
||||
/**
|
||||
* dump the string representation of this genotype encoding
|
||||
*
|
||||
* @return string representation
|
||||
*/
|
||||
public String toString() {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
switch (mType) {
|
||||
case SINGLE_BASE:
|
||||
case UNCALLED:
|
||||
case MIXED:
|
||||
builder.append(mBases);
|
||||
break;
|
||||
case INSERTION:
|
||||
builder.append("I");
|
||||
builder.append(mBases);
|
||||
break;
|
||||
case DELETION:
|
||||
builder.append("D");
|
||||
builder.append(mLength);
|
||||
break;
|
||||
}
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* ensure that string contains valid bases
|
||||
*
|
||||
* @param bases the bases to check
|
||||
*
|
||||
* @return true if they're all either A,C,G,T; false otherwise
|
||||
*/
|
||||
private static boolean validBases(String bases) {
|
||||
for (char c : bases.toUpperCase().toCharArray()) {
|
||||
if (c != 'A' && c != 'C' && c != 'G' && c != 'T' && c != 'N')
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,349 +0,0 @@
|
|||
package org.broad.tribble.vcf;
|
||||
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @author aaron
|
||||
*
|
||||
* Class VCFGenotypeRecord
|
||||
*
|
||||
* the basics of a genotype call in VCF
|
||||
*/
|
||||
public class VCFGenotypeRecord {
|
||||
|
||||
public static final double MAX_QUAL_VALUE = 99.0;
|
||||
|
||||
// what kind of phasing this genotype has
|
||||
public enum PHASE {
|
||||
UNPHASED("/"), PHASED("|"), PHASED_SWITCH_PROB("\\"); // , UNKNOWN
|
||||
|
||||
String genotypeSeparator;
|
||||
PHASE(String sep) { this.genotypeSeparator = sep; }
|
||||
}
|
||||
|
||||
// our record
|
||||
private VCFRecord mRecord;
|
||||
|
||||
// our phasing
|
||||
private PHASE mPhaseType;
|
||||
|
||||
// our bases(s)
|
||||
private final List<VCFGenotypeEncoding> mGenotypeAlleles = new ArrayList<VCFGenotypeEncoding>();
|
||||
|
||||
// our mapping of the format mFields to values
|
||||
private final Map<String, String> mFields = new HashMap<String, String>();
|
||||
|
||||
// our sample name
|
||||
private String mSampleName;
|
||||
|
||||
/**
|
||||
* Create a VCF genotype record
|
||||
*
|
||||
* @param sampleName sample name
|
||||
* @param genotypes list of genotypes
|
||||
* @param phasing phasing
|
||||
*/
|
||||
public VCFGenotypeRecord(String sampleName, List<VCFGenotypeEncoding> genotypes, PHASE phasing) {
|
||||
mSampleName = sampleName;
|
||||
if (genotypes != null)
|
||||
this.mGenotypeAlleles.addAll(genotypes);
|
||||
mPhaseType = phasing;
|
||||
}
|
||||
|
||||
public void setVCFRecord(VCFRecord record) {
|
||||
mRecord = record;
|
||||
}
|
||||
|
||||
public void setSampleName(String name) {
|
||||
mSampleName = name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a field to the genotype record.
|
||||
* Throws an exception if the key is GT, as that's computed internally.
|
||||
*
|
||||
* @param key the field name (use static variables above for common fields)
|
||||
* @param value the field value
|
||||
*/
|
||||
public void setField(String key, String value) {
|
||||
// make sure the GT field isn't being set
|
||||
if ( key.equals(VCFConstants.GENOTYPE_KEY) )
|
||||
throw new IllegalArgumentException("Setting the GT field is not allowed as that's done internally");
|
||||
mFields.put(key, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* determine the phase of the genotype
|
||||
*
|
||||
* @param phase the string that contains the phase character
|
||||
*
|
||||
* @return the phase
|
||||
*/
|
||||
static PHASE determinePhase(String phase) {
|
||||
// find the phasing information
|
||||
for ( PHASE p : PHASE.values() ) {
|
||||
if (phase.equals(p.genotypeSeparator))
|
||||
return p;
|
||||
}
|
||||
|
||||
throw new IllegalArgumentException("Unknown genotype phasing parameter: " + phase);
|
||||
}
|
||||
|
||||
|
||||
public PHASE getPhaseType() {
|
||||
return mPhaseType;
|
||||
}
|
||||
|
||||
public String getSampleName() {
|
||||
return mSampleName;
|
||||
}
|
||||
|
||||
public List<VCFGenotypeEncoding> getAlleles() {
|
||||
return mGenotypeAlleles;
|
||||
}
|
||||
|
||||
public Map<String, String> getFields() {
|
||||
return mFields;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the phred-scaled quality score
|
||||
*/
|
||||
public double getQual() {
|
||||
return ( mFields.containsKey(VCFConstants.GENOTYPE_QUALITY_KEY) ? Double.valueOf(mFields.get(VCFConstants.GENOTYPE_QUALITY_KEY)) : Double.valueOf(VCFConstants.MISSING_GENOTYPE_QUALITY_v3));
|
||||
}
|
||||
|
||||
public boolean isMissingQual() {
|
||||
return VCFConstants.MISSING_GENOTYPE_QUALITY_v3.equals(String.valueOf((int)getQual()));
|
||||
}
|
||||
|
||||
public double getNegLog10PError() {
|
||||
return (isMissingQual() ? Double.valueOf(VCFConstants.MISSING_GENOTYPE_QUALITY_v3) : getQual() / 10.0);
|
||||
}
|
||||
|
||||
public int getReadCount() {
|
||||
return ( mFields.containsKey(VCFConstants.DEPTH_KEY) ? Integer.valueOf(mFields.get(VCFConstants.DEPTH_KEY)) : Integer.valueOf(VCFConstants.MISSING_DEPTH_v3));
|
||||
}
|
||||
|
||||
public String getLocation() {
|
||||
return mRecord != null ? mRecord.getChr() + ":" + mRecord.getPosition() : null;
|
||||
}
|
||||
|
||||
public String getReference() {
|
||||
return mRecord != null ? mRecord.getReference() : "N";
|
||||
}
|
||||
|
||||
public String getBases() {
|
||||
String genotype = "";
|
||||
for ( VCFGenotypeEncoding encoding : mGenotypeAlleles )
|
||||
genotype += encoding.getBases();
|
||||
return genotype;
|
||||
}
|
||||
|
||||
public boolean isVariant(char ref) {
|
||||
for ( VCFGenotypeEncoding encoding : mGenotypeAlleles ) {
|
||||
if ( encoding.getType() == VCFGenotypeEncoding.TYPE.UNCALLED )
|
||||
continue;
|
||||
if ( encoding.getType() != VCFGenotypeEncoding.TYPE.SINGLE_BASE ||
|
||||
encoding.getBases().charAt(0) != ref )
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean isPointGenotype() {
|
||||
return (mRecord != null ? !mRecord.isIndel() : true);
|
||||
}
|
||||
|
||||
public boolean isHom() {
|
||||
if ( mGenotypeAlleles.size() == 0 )
|
||||
return true;
|
||||
|
||||
String bases = mGenotypeAlleles.get(0).getBases();
|
||||
for ( int i = 1; i < mGenotypeAlleles.size(); i++ ) {
|
||||
if ( !bases.equals(mGenotypeAlleles.get(1).getBases()) )
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean isHet() {
|
||||
return !isHom();
|
||||
}
|
||||
|
||||
public boolean isNoCall() {
|
||||
for ( VCFGenotypeEncoding encoding : mGenotypeAlleles ) {
|
||||
if ( encoding.getType() != VCFGenotypeEncoding.TYPE.UNCALLED )
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean isFiltered() {
|
||||
return ( mFields.get(VCFConstants.GENOTYPE_FILTER_KEY) != null &&
|
||||
!mFields.get(VCFConstants.GENOTYPE_FILTER_KEY).equals(VCFConstants.UNFILTERED) &&
|
||||
!mFields.get(VCFConstants.GENOTYPE_FILTER_KEY).equals(VCFConstants.PASSES_FILTERS_v3));
|
||||
}
|
||||
|
||||
public int getPloidy() {
|
||||
return mGenotypeAlleles.size();
|
||||
}
|
||||
|
||||
public VCFRecord getRecord() {
|
||||
return mRecord;
|
||||
}
|
||||
|
||||
private String toGenotypeString(List<VCFGenotypeEncoding> altAlleles) {
|
||||
List<String> alleleStrings = new ArrayList<String>(altAlleles.size());
|
||||
for (VCFGenotypeEncoding allele : mGenotypeAlleles) {
|
||||
if (allele.getType() == VCFGenotypeEncoding.TYPE.UNCALLED)
|
||||
alleleStrings.add(VCFConstants.EMPTY_ALLELE);
|
||||
else
|
||||
alleleStrings.add(String.valueOf((altAlleles.contains(allele)) ? altAlleles.indexOf(allele) + 1 : 0));
|
||||
}
|
||||
|
||||
return Utils.join(mPhaseType.genotypeSeparator, alleleStrings);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("[VCFGenotype %s %s %s %s]", getLocation(), mSampleName, this.mGenotypeAlleles, mFields);
|
||||
}
|
||||
|
||||
public boolean isEmptyGenotype() {
|
||||
for ( VCFGenotypeEncoding encoding : mGenotypeAlleles ) {
|
||||
if ( encoding.getType() != VCFGenotypeEncoding.TYPE.UNCALLED )
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean equals(Object other) {
|
||||
if (other instanceof VCFGenotypeRecord) {
|
||||
if (((VCFGenotypeRecord) other).mPhaseType != this.mPhaseType) return false;
|
||||
if (!((VCFGenotypeRecord) other).mGenotypeAlleles.equals(this.mGenotypeAlleles)) return false;
|
||||
if (!((VCFGenotypeRecord) other).mFields.equals(mFields)) return false;
|
||||
if (!((VCFGenotypeRecord) other).mSampleName.equals(this.mSampleName)) return false;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* output a string representation of the VCFGenotypeRecord, given the alternate alleles
|
||||
*
|
||||
* @param altAlleles the alternate alleles, needed for toGenotypeString()
|
||||
* @param genotypeFormatStrings genotype format strings
|
||||
*
|
||||
* @return a string
|
||||
*/
|
||||
public String toStringEncoding(List<VCFGenotypeEncoding> altAlleles, String[] genotypeFormatStrings) {
|
||||
return toStringEncoding(altAlleles, genotypeFormatStrings, false);
|
||||
}
|
||||
|
||||
public String toStringEncoding(List<VCFGenotypeEncoding> altAlleles, String[] genotypeFormatStrings, boolean doVCF40) {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
builder.append(toGenotypeString(altAlleles));
|
||||
|
||||
for ( String field : genotypeFormatStrings ) {
|
||||
if ( field.equals(VCFConstants.GENOTYPE_KEY) )
|
||||
continue;
|
||||
|
||||
String value = mFields.get(field);
|
||||
if ( value == null && field.equals(VCFConstants.OLD_DEPTH_KEY) )
|
||||
value = mFields.get(VCFConstants.DEPTH_KEY);
|
||||
|
||||
builder.append(VCFConstants.GENOTYPE_FIELD_SEPARATOR);
|
||||
if ( value == null || value.equals("") )
|
||||
builder.append(getMissingFieldValue(field, doVCF40));
|
||||
else
|
||||
builder.append(value);
|
||||
}
|
||||
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* output a string representation of an empty genotype
|
||||
*
|
||||
* @param genotypeFormatStrings genotype format strings
|
||||
*
|
||||
* @return a string
|
||||
*/
|
||||
public static String stringEncodingForEmptyGenotype(String[] genotypeFormatStrings) {
|
||||
// backward compatibility to VCF 3.3
|
||||
return stringEncodingForEmptyGenotype(genotypeFormatStrings, false);
|
||||
}
|
||||
public static String stringEncodingForEmptyGenotype(String[] genotypeFormatStrings, boolean doVCF40) {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
builder.append(VCFConstants.EMPTY_GENOTYPE);
|
||||
|
||||
for ( String field : genotypeFormatStrings ) {
|
||||
if ( field.equals(VCFConstants.GENOTYPE_KEY) )
|
||||
continue;
|
||||
|
||||
// in VCF4.0, if a genotype is empty only the ./. key can be included
|
||||
if (!doVCF40) {
|
||||
builder.append(VCFConstants.GENOTYPE_FIELD_SEPARATOR);
|
||||
builder.append(getMissingFieldValue(field));
|
||||
}
|
||||
}
|
||||
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
public static String getMissingFieldValue(String field) {
|
||||
// backward compatibility to VCF 3.3
|
||||
return getMissingFieldValue(field, false);
|
||||
}
|
||||
public static String getMissingFieldValue(String field, boolean doVCF40) {
|
||||
String result;
|
||||
if (doVCF40) {
|
||||
result = "."; // default missing value
|
||||
// TODO - take number of elements in field as input and output corresponding .'s
|
||||
if ( field.equals(VCFConstants.GENOTYPE_LIKELIHOODS_KEY) )
|
||||
result = ".,.,.";
|
||||
else if ( field.equals(VCFConstants.HAPLOTYPE_QUALITY_KEY) )
|
||||
result = ".,.";
|
||||
|
||||
}
|
||||
else {
|
||||
result = "";
|
||||
|
||||
|
||||
if ( field.equals(VCFConstants.GENOTYPE_QUALITY_KEY) )
|
||||
result = String.valueOf(VCFConstants.MISSING_GENOTYPE_QUALITY_v3);
|
||||
else if ( field.equals(VCFConstants.DEPTH_KEY) || field.equals(VCFConstants.OLD_DEPTH_KEY) )
|
||||
result = String.valueOf(VCFConstants.MISSING_DEPTH_v3);
|
||||
else if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) )
|
||||
result = VCFConstants.UNFILTERED;
|
||||
else if ( field.equals(VCFConstants.GENOTYPE_LIKELIHOODS_KEY) )
|
||||
result = "0,0,0";
|
||||
// TODO -- support haplotype quality
|
||||
//else if ( field.equals(HAPLOTYPE_QUALITY_KEY) )
|
||||
// result = String.valueOf(MISSING_HAPLOTYPE_QUALITY);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public static Set<VCFFormatHeaderLine> getSupportedHeaderStrings(VCFHeaderVersion version) {
|
||||
Set<VCFFormatHeaderLine> result = new HashSet<VCFFormatHeaderLine>();
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, 3, VCFHeaderLineType.Float, "Log-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; not applicable if site is not biallelic"));
|
||||
//result.add(new VCFFormatHeaderLine(HAPLOTYPE_QUALITY_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.Integer, "Haplotype Quality"));
|
||||
return result;
|
||||
}
|
||||
|
||||
public void replaceFields(HashMap<String,String> newFields) {
|
||||
mFields.clear();
|
||||
for ( String s : newFields.keySet() ) {
|
||||
mFields.put(s,newFields.get(s));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,165 +0,0 @@
|
|||
package org.broad.tribble.vcf;
|
||||
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class VCFHeader
|
||||
* <p/>
|
||||
* A class representing the VCF header
|
||||
*/
|
||||
public class VCFHeader {
|
||||
|
||||
// the mandatory header fields
|
||||
public enum HEADER_FIELDS {
|
||||
CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO
|
||||
}
|
||||
|
||||
// the associated meta data
|
||||
private final Set<VCFHeaderLine> mMetaData;
|
||||
private final Map<String, VCFInfoHeaderLine> mInfoMetaData = new HashMap<String, VCFInfoHeaderLine>();
|
||||
private final Map<String, VCFFormatHeaderLine> mFormatMetaData = new HashMap<String, VCFFormatHeaderLine>();
|
||||
|
||||
// the list of auxillary tags
|
||||
private final Set<String> mGenotypeSampleNames = new LinkedHashSet<String>();
|
||||
|
||||
// the character string that indicates meta data
|
||||
public static final String METADATA_INDICATOR = "##";
|
||||
|
||||
// the header string indicator
|
||||
public static final String HEADER_INDICATOR = "#";
|
||||
|
||||
/** do we have genotying data? */
|
||||
private boolean hasGenotypingData = false;
|
||||
|
||||
/**
|
||||
* create a VCF header, given a list of meta data and auxillary tags
|
||||
*
|
||||
* @param metaData the meta data associated with this header
|
||||
*/
|
||||
public VCFHeader(Set<VCFHeaderLine> metaData) {
|
||||
mMetaData = new TreeSet<VCFHeaderLine>(metaData);
|
||||
loadVCFVersion();
|
||||
loadMetaDataMaps();
|
||||
}
|
||||
|
||||
/**
|
||||
* create a VCF header, given a list of meta data and auxillary tags
|
||||
*
|
||||
* @param metaData the meta data associated with this header
|
||||
* @param genotypeSampleNames the genotype format field, and the sample names
|
||||
*/
|
||||
public VCFHeader(Set<VCFHeaderLine> metaData, Set<String> genotypeSampleNames) {
|
||||
mMetaData = new TreeSet<VCFHeaderLine>();
|
||||
if ( metaData != null )
|
||||
mMetaData.addAll(metaData);
|
||||
for (String col : genotypeSampleNames) {
|
||||
if (!col.equals("FORMAT"))
|
||||
mGenotypeSampleNames.add(col);
|
||||
}
|
||||
if (genotypeSampleNames.size() > 0) hasGenotypingData = true;
|
||||
loadVCFVersion();
|
||||
loadMetaDataMaps();
|
||||
}
|
||||
|
||||
/**
|
||||
* check our metadata for a VCF version tag, and throw an exception if the version is out of date
|
||||
* or the version is not present
|
||||
*/
|
||||
public void loadVCFVersion() {
|
||||
List<VCFHeaderLine> toRemove = new ArrayList<VCFHeaderLine>();
|
||||
for ( VCFHeaderLine line : mMetaData )
|
||||
if ( VCFHeaderVersion.isFormatString(line.getKey())) {
|
||||
toRemove.add(line);
|
||||
}
|
||||
// remove old header lines for now,
|
||||
mMetaData.removeAll(toRemove);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* load the format/info meta data maps (these are used for quick lookup by key name)
|
||||
*/
|
||||
private void loadMetaDataMaps() {
|
||||
for ( VCFHeaderLine line : mMetaData ) {
|
||||
if ( line instanceof VCFInfoHeaderLine ) {
|
||||
VCFInfoHeaderLine infoLine = (VCFInfoHeaderLine)line;
|
||||
mInfoMetaData.put(infoLine.getName(), infoLine);
|
||||
}
|
||||
else if ( line instanceof VCFFormatHeaderLine ) {
|
||||
VCFFormatHeaderLine formatLine = (VCFFormatHeaderLine)line;
|
||||
mFormatMetaData.put(formatLine.getName(), formatLine);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* get the header fields in order they're presented in the input file (which is now required to be
|
||||
* the order presented in the spec).
|
||||
*
|
||||
* @return a set of the header fields, in order
|
||||
*/
|
||||
public Set<HEADER_FIELDS> getHeaderFields() {
|
||||
Set<HEADER_FIELDS> fields = new LinkedHashSet<HEADER_FIELDS>();
|
||||
for (HEADER_FIELDS field : HEADER_FIELDS.values())
|
||||
fields.add(field);
|
||||
return fields;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the meta data, associated with this header
|
||||
*
|
||||
* @return a set of the meta data
|
||||
*/
|
||||
public Set<VCFHeaderLine> getMetaData() {
|
||||
Set<VCFHeaderLine> lines = new LinkedHashSet<VCFHeaderLine>();
|
||||
lines.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_0.getFormatString(), VCFHeaderVersion.VCF4_0.getVersionString()));
|
||||
lines.addAll(mMetaData);
|
||||
return lines;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the genotyping sample names
|
||||
*
|
||||
* @return a list of the genotype column names, which may be empty if hasGenotypingData() returns false
|
||||
*/
|
||||
public Set<String> getGenotypeSamples() {
|
||||
return mGenotypeSampleNames;
|
||||
}
|
||||
|
||||
/**
|
||||
* do we have genotyping data?
|
||||
*
|
||||
* @return true if we have genotyping columns, false otherwise
|
||||
*/
|
||||
public boolean hasGenotypingData() {
|
||||
return hasGenotypingData;
|
||||
}
|
||||
|
||||
/** @return the column count */
|
||||
public int getColumnCount() {
|
||||
return HEADER_FIELDS.values().length + ((hasGenotypingData) ? mGenotypeSampleNames.size() + 1 : 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param key the header key name
|
||||
* @return the meta data line, or null if there is none
|
||||
*/
|
||||
public VCFInfoHeaderLine getInfoHeaderLine(String key) {
|
||||
return mInfoMetaData.get(key);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param key the header key name
|
||||
* @return the meta data line, or null if there is none
|
||||
*/
|
||||
public VCFFormatHeaderLine getFormatHeaderLine(String key) {
|
||||
return mFormatMetaData.get(key);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,134 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broad.tribble.vcf;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* <p/>
|
||||
* Class VCFHeaderLine
|
||||
* <p/>
|
||||
* A class representing a key=value entry in the VCF header
|
||||
*/
|
||||
public class VCFHeaderLine implements Comparable {
|
||||
protected static boolean ALLOW_UNBOUND_DESCRIPTIONS = true;
|
||||
protected static String UNBOUND_DESCRIPTION = "Not provided in original VCF header";
|
||||
|
||||
private String mKey = null;
|
||||
private String mValue = null;
|
||||
|
||||
|
||||
/**
|
||||
* create a VCF header line
|
||||
*
|
||||
* @param key the key for this header line
|
||||
* @param value the value for this header line
|
||||
*/
|
||||
public VCFHeaderLine(String key, String value) {
|
||||
if ( key == null )
|
||||
throw new IllegalArgumentException("VCFHeaderLine: key cannot be null: key = " + key);
|
||||
mKey = key;
|
||||
mValue = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the key
|
||||
*
|
||||
* @return the key
|
||||
*/
|
||||
public String getKey() {
|
||||
return mKey;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the value
|
||||
*
|
||||
* @return the value
|
||||
*/
|
||||
public String getValue() {
|
||||
return mValue;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return toStringEncoding();
|
||||
}
|
||||
|
||||
/**
|
||||
* Should be overloaded in sub classes to do subclass specific
|
||||
*
|
||||
* @return the string encoding
|
||||
*/
|
||||
protected String toStringEncoding() {
|
||||
return mKey + "=" + mValue;
|
||||
}
|
||||
|
||||
public boolean equals(Object o) {
|
||||
if ( !(o instanceof VCFHeaderLine) )
|
||||
return false;
|
||||
return mKey.equals(((VCFHeaderLine)o).getKey()) && mValue.equals(((VCFHeaderLine)o).getValue());
|
||||
}
|
||||
|
||||
public int compareTo(Object other) {
|
||||
return toString().compareTo(other.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* @param line the line
|
||||
* @return true if the line is a VCF meta data line, or false if it is not
|
||||
*/
|
||||
public static boolean isHeaderLine(String line) {
|
||||
return line != null && line.length() > 0 && VCFHeader.HEADER_INDICATOR.equals(line.substring(0,1));
|
||||
}
|
||||
|
||||
/**
|
||||
* create a string of a mapping pair for the target VCF version
|
||||
* @param keyValues a mapping of the key->value pairs to output
|
||||
* @return a string, correctly formatted
|
||||
*/
|
||||
public static String toStringEncoding(Map<String, ? extends Object> keyValues) {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
builder.append("<");
|
||||
boolean start = true;
|
||||
for (Map.Entry<String,?> entry : keyValues.entrySet()) {
|
||||
if (start) start = false;
|
||||
else builder.append(",");
|
||||
|
||||
if ( entry.getValue() == null ) throw new StingException("Header problem: unbound value at " + entry + " from " + keyValues);
|
||||
|
||||
builder.append(entry.getKey());
|
||||
builder.append("=");
|
||||
builder.append(entry.getValue().toString().contains(",") ||
|
||||
entry.getValue().toString().contains(" ") ||
|
||||
entry.getKey().equals("Description") ? "\""+ entry.getValue() + "\"" : entry.getValue());
|
||||
}
|
||||
builder.append(">");
|
||||
return builder.toString();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,116 +0,0 @@
|
|||
package org.broad.tribble.vcf;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* A class for translating between vcf header versions
|
||||
*/
|
||||
public class VCFHeaderLineTranslator {
|
||||
private static Map<VCFHeaderVersion,VCFLineParser> mapping;
|
||||
|
||||
static {
|
||||
mapping = new HashMap<VCFHeaderVersion,VCFLineParser>();
|
||||
mapping.put(VCFHeaderVersion.VCF4_0,new VCF4Parser());
|
||||
mapping.put(VCFHeaderVersion.VCF3_3,new VCF3Parser());
|
||||
}
|
||||
|
||||
public static Map<String,String> parseLine(VCFHeaderVersion version, String valueLine, List<String> expectedTagOrder) {
|
||||
return mapping.get(version).parseLine(valueLine,expectedTagOrder);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
interface VCFLineParser {
|
||||
public Map<String,String> parseLine(String valueLine, List<String> expectedTagOrder);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* a class that handles the to and from disk for VCF 4 lines
|
||||
*/
|
||||
class VCF4Parser implements VCFLineParser {
|
||||
Set<String> bracketed = new HashSet<String>();
|
||||
|
||||
/**
|
||||
* parse a VCF4 line
|
||||
* @param valueLine the line
|
||||
* @return a mapping of the tags parsed out
|
||||
*/
|
||||
public Map<String, String> parseLine(String valueLine, List<String> expectedTagOrder) {
|
||||
// our return map
|
||||
Map<String, String> ret = new LinkedHashMap<String, String>();
|
||||
|
||||
// a builder to store up characters as we go
|
||||
StringBuilder builder = new StringBuilder();
|
||||
|
||||
// store the key when we're parsing out the values
|
||||
String key = "";
|
||||
|
||||
// where are we in the stream of characters?
|
||||
int index = 0;
|
||||
|
||||
// are we inside a quotation? we don't special case ',' then
|
||||
boolean inQuote = false;
|
||||
|
||||
// a little switch machine to parse out the tags. Regex ended up being really complicated and ugly
|
||||
for (char c: valueLine.toCharArray()) {
|
||||
switch (c) {
|
||||
case ('<') : if (index == 0) break; // if we see a open bracket at the beginning, ignore it
|
||||
case ('>') : if (index == valueLine.length()-1) ret.put(key,builder.toString().trim()); break; // if we see a close bracket, and we're at the end, add an entry to our list
|
||||
case ('=') : if (!inQuote) { key = builder.toString().trim(); builder = new StringBuilder(); } else { builder.append(c); } break; // at an equals, copy the key and reset the builder
|
||||
case ('\"') : inQuote = !inQuote; break; // a quote means we ignore ',' in our strings, keep track of it
|
||||
case (',') : if (!inQuote) { ret.put(key,builder.toString().trim()); builder = new StringBuilder(); break; } // drop the current key value to the return map
|
||||
default: builder.append(c); // otherwise simply append to the current string
|
||||
}
|
||||
index++;
|
||||
}
|
||||
|
||||
// validate the tags against the expected list
|
||||
index = 0;
|
||||
if (ret.size() > expectedTagOrder.size()) throw new IllegalArgumentException("Unexpected tag count " + ret.size() + " in string " + expectedTagOrder.size());
|
||||
for (String str : ret.keySet()) {
|
||||
if (!expectedTagOrder.get(index).equals(str)) throw new IllegalArgumentException("Unexpected tag " + str + " in string " + valueLine);
|
||||
index++;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
class VCF3Parser implements VCFLineParser {
|
||||
|
||||
public Map<String, String> parseLine(String valueLine, List<String> expectedTagOrder) {
|
||||
// our return map
|
||||
Map<String, String> ret = new LinkedHashMap<String, String>();
|
||||
|
||||
// a builder to store up characters as we go
|
||||
StringBuilder builder = new StringBuilder();
|
||||
|
||||
// where are we in the stream of characters?
|
||||
int index = 0;
|
||||
// where in the expected tag order are we?
|
||||
int tagIndex = 0;
|
||||
|
||||
// are we inside a quotation? we don't special case ',' then
|
||||
boolean inQuote = false;
|
||||
|
||||
// a little switch machine to parse out the tags. Regex ended up being really complicated and ugly
|
||||
for (char c: valueLine.toCharArray()) {
|
||||
switch (c) {
|
||||
case ('\"') : inQuote = !inQuote; break; // a quote means we ignore ',' in our strings, keep track of it
|
||||
case (',') : if (!inQuote) { ret.put(expectedTagOrder.get(tagIndex++),builder.toString()); builder = new StringBuilder(); break; } // drop the current key value to the return map
|
||||
default: builder.append(c); // otherwise simply append to the current string
|
||||
}
|
||||
index++;
|
||||
}
|
||||
ret.put(expectedTagOrder.get(tagIndex++),builder.toString());
|
||||
|
||||
// validate the tags against the expected list
|
||||
index = 0;
|
||||
if (tagIndex != expectedTagOrder.size()) throw new IllegalArgumentException("Unexpected tag count " + tagIndex + ", we expected " + expectedTagOrder.size());
|
||||
for (String str : ret.keySet()){
|
||||
if (!expectedTagOrder.get(index).equals(str)) throw new IllegalArgumentException("Unexpected tag " + str + " in string " + valueLine);
|
||||
index++;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,28 +0,0 @@
|
|||
package org.broad.tribble.vcf;
|
||||
|
||||
/**
|
||||
* the type encodings we use for fields in VCF header lines
|
||||
*/
|
||||
public enum VCFHeaderLineType {
|
||||
Integer, Float, String, Character, Flag;
|
||||
|
||||
public Object convert(String value, VCFCompoundHeaderLine.SupportedHeaderLineType hlt) {
|
||||
switch (this) {
|
||||
case Integer:
|
||||
return Math.round(java.lang.Float.valueOf(value)); // this seems like we're allowing off spec values, but use it for now
|
||||
case Float:
|
||||
return java.lang.Float.valueOf(value);
|
||||
case String:
|
||||
return value;
|
||||
case Character:
|
||||
if (value.length()!= 0)
|
||||
throw new IllegalStateException("INFO_TYPE." + this + " requires fields of length 1, what was provided was " + value);
|
||||
return value;
|
||||
case Flag:
|
||||
if (hlt.allowFlagValues)
|
||||
return value.equals("0") ? false : true;
|
||||
default:
|
||||
throw new IllegalStateException("INFO_TYPE." + this + " doesn't have a set conversion approach");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,78 +0,0 @@
|
|||
package org.broad.tribble.vcf;
|
||||
|
||||
/**
|
||||
* information that identifies each header version
|
||||
*/
|
||||
public enum VCFHeaderVersion {
|
||||
VCF3_2("VCRv3.2","format"),
|
||||
VCF3_3("VCFv3.3","fileformat"),
|
||||
VCF4_0("VCFv4.0","fileformat");
|
||||
|
||||
private final String versionString;
|
||||
private final String formatString;
|
||||
|
||||
/**
|
||||
* create the enum, privately, using:
|
||||
* @param vString the version string
|
||||
* @param fString the format string
|
||||
*/
|
||||
VCFHeaderVersion(String vString, String fString) {
|
||||
this.versionString = vString;
|
||||
this.formatString = fString;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the header version
|
||||
* @param version the version string
|
||||
* @param format the format string
|
||||
* @return a VCFHeaderVersion object
|
||||
*/
|
||||
public static VCFHeaderVersion toHeaderVersion(String version, String format) {
|
||||
for (VCFHeaderVersion hv : VCFHeaderVersion.values())
|
||||
if (hv.versionString.equals(version) && hv.formatString.equals(format))
|
||||
return hv;
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the header version
|
||||
* @param version the version string
|
||||
* @return a VCFHeaderVersion object
|
||||
*/
|
||||
public static VCFHeaderVersion toHeaderVersion(String version) {
|
||||
for (VCFHeaderVersion hv : VCFHeaderVersion.values())
|
||||
if (hv.versionString.equals(version))
|
||||
return hv;
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* are we a valid version string of some type
|
||||
* @param version the version string
|
||||
* @return true if we're valid of some type, false otherwise
|
||||
*/
|
||||
public static boolean isVersionString(String version){
|
||||
return toHeaderVersion(version) != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* are we a valid format string for some type
|
||||
* @param format the format string
|
||||
* @return true if we're valid of some type, false otherwise
|
||||
*/
|
||||
public static boolean isFormatString(String format){
|
||||
for (VCFHeaderVersion hv : VCFHeaderVersion.values())
|
||||
if (hv.formatString.equals(format))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
public String getVersionString() {
|
||||
return versionString;
|
||||
}
|
||||
|
||||
public String getFormatString() {
|
||||
return formatString;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
package org.broad.tribble.vcf;
|
||||
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* <p/>
|
||||
* Class VCFInfoHeaderLine
|
||||
* <p/>
|
||||
* A class representing a key=value entry for INFO fields in the VCF header
|
||||
*/
|
||||
public class VCFInfoHeaderLine extends VCFCompoundHeaderLine {
|
||||
public VCFInfoHeaderLine(String name, int count, VCFHeaderLineType type, String description) {
|
||||
super(name, count, type, description, SupportedHeaderLineType.INFO);
|
||||
}
|
||||
|
||||
protected VCFInfoHeaderLine(String line, VCFHeaderVersion version) {
|
||||
super(line, version, SupportedHeaderLineType.INFO);
|
||||
}
|
||||
|
||||
// info fields allow flag values
|
||||
@Override
|
||||
boolean allowFlagValues() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,30 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broad.tribble.vcf;
|
||||
|
||||
/** an interface for named header lines **/
|
||||
public interface VCFNamedHeaderLine {
|
||||
String getName();
|
||||
}
|
||||
|
|
@ -1,209 +0,0 @@
|
|||
package org.broad.tribble.vcf;
|
||||
|
||||
|
||||
|
||||
import java.util.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/** The VCFReaderUtils class, which contains a collection of utilities for working with VCF files */
|
||||
public class VCFReaderUtils {
|
||||
|
||||
// our pattern matching for the genotype mFields
|
||||
private static final Pattern gtPattern = Pattern.compile("([0-9\\.]+)([\\\\|\\/])([0-9\\.]*)");
|
||||
|
||||
/**
|
||||
* create a VCF header, given an array of strings that all start with at least the # character. This function is
|
||||
* package protected so that the VCFReaderUtils can access this function
|
||||
*
|
||||
* @param headerStrings a list of header strings
|
||||
* @param version Header version to parse
|
||||
* @return a VCF Header created from the list of stinrgs
|
||||
*/
|
||||
public static VCFHeader createHeader(List<String> headerStrings, VCFHeaderVersion version) {
|
||||
Set<VCFHeaderLine> metaData = new TreeSet<VCFHeaderLine>();
|
||||
Set<String> auxTags = new LinkedHashSet<String>();
|
||||
// iterate over all the passed in strings
|
||||
for ( String str : headerStrings ) {
|
||||
if ( !str.startsWith(VCFHeader.METADATA_INDICATOR) ) {
|
||||
String[] strings = str.substring(1).split(VCFConstants.FIELD_SEPARATOR);
|
||||
int arrayIndex = 0;
|
||||
for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) {
|
||||
try {
|
||||
if (field != VCFHeader.HEADER_FIELDS.valueOf(strings[arrayIndex]))
|
||||
throw new RuntimeException("VCFReaderUtils: we were expecting column name " + field + " but we saw " + strings[arrayIndex]);
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new RuntimeException("VCFReaderUtils: Unknown column name \"" + strings[arrayIndex] + "\", it does not match a known column header name.");
|
||||
}
|
||||
arrayIndex++;
|
||||
}
|
||||
if ( arrayIndex < strings.length ) {
|
||||
if ( !strings[arrayIndex].equals("FORMAT") )
|
||||
throw new RuntimeException("VCFReaderUtils: we were expecting column name FORMAT but we saw " + strings[arrayIndex]);
|
||||
arrayIndex++;
|
||||
}
|
||||
|
||||
while (arrayIndex < strings.length)
|
||||
auxTags.add(strings[arrayIndex++]);
|
||||
|
||||
} else {
|
||||
if ( str.startsWith("##INFO=") )
|
||||
metaData.add(new VCFInfoHeaderLine(str.substring(7),version));
|
||||
else if ( str.startsWith("##FILTER=") )
|
||||
metaData.add(new VCFFilterHeaderLine(str.substring(9),version));
|
||||
else if ( str.startsWith("##FORMAT=") )
|
||||
metaData.add(new VCFFormatHeaderLine(str.substring(9),version));
|
||||
else {
|
||||
int equals = str.indexOf("=");
|
||||
if ( equals != -1 )
|
||||
metaData.add(new VCFHeaderLine(str.substring(2, equals), str.substring(equals+1)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new VCFHeader(metaData, auxTags);
|
||||
}
|
||||
|
||||
/**
|
||||
* create the next VCFRecord, given the input line
|
||||
*
|
||||
* @param line the line from the file
|
||||
* @param mHeader the VCF header
|
||||
*
|
||||
* @return the VCFRecord
|
||||
*/
|
||||
public static VCFRecord createRecord(String line, VCFHeader mHeader) {
|
||||
return createRecord(line, mHeader, false);
|
||||
}
|
||||
|
||||
public static VCFRecord createRecord(String line, VCFHeader mHeader, boolean ignoreGenotypes) {
|
||||
// things we need to make a VCF record
|
||||
Map<VCFHeader.HEADER_FIELDS, String> values = new HashMap<VCFHeader.HEADER_FIELDS, String>();
|
||||
String tokens[] = line.split("\\t");
|
||||
|
||||
// check to ensure that the column count of tokens is right
|
||||
if (tokens.length != mHeader.getColumnCount()) {
|
||||
throw new RuntimeException("The input file line doesn't contain enough fields, it should have " + mHeader.getColumnCount() + " fields, it has " + tokens.length + ". Line = " + line);
|
||||
}
|
||||
|
||||
int index = 0;
|
||||
for (VCFHeader.HEADER_FIELDS field : mHeader.getHeaderFields())
|
||||
values.put(field, tokens[index++]);
|
||||
// if we have genotyping data, we try and extract the genotype fields
|
||||
if ( ! ignoreGenotypes && mHeader.hasGenotypingData()) {
|
||||
String mFormatString = tokens[index];
|
||||
String keyStrings[] = mFormatString.split(":");
|
||||
List<VCFGenotypeRecord> genotypeRecords = new ArrayList<VCFGenotypeRecord>();
|
||||
index++;
|
||||
String[] alt_alleles = values.get(VCFHeader.HEADER_FIELDS.ALT).split(",");
|
||||
for (String str : mHeader.getGenotypeSamples()) {
|
||||
genotypeRecords.add(getVCFGenotype(str, keyStrings, tokens[index], alt_alleles, values.get(VCFHeader.HEADER_FIELDS.REF).charAt(0)));
|
||||
index++;
|
||||
}
|
||||
VCFRecord vrec = new VCFRecord(values, mFormatString, genotypeRecords);
|
||||
// associate the genotypes with this new record
|
||||
for ( VCFGenotypeRecord gr : genotypeRecords )
|
||||
gr.setVCFRecord(vrec);
|
||||
return vrec;
|
||||
|
||||
}
|
||||
return new VCFRecord(values);
|
||||
}
|
||||
|
||||
/**
|
||||
* generate a VCF genotype record, given it's format string, the genotype string, and allele info
|
||||
*
|
||||
* @param sampleName the sample name
|
||||
* @param formatString the format string for this record, which contains the keys for the genotype parameters
|
||||
* @param genotypeString contains the phasing information, allele information, and values for genotype parameters
|
||||
* @param altAlleles the alternate allele string array, which we index into based on the field parameters
|
||||
* @param referenceBase the reference base
|
||||
*
|
||||
* @return a VCFGenotypeRecord
|
||||
*/
|
||||
public static VCFGenotypeRecord getVCFGenotype(String sampleName, String formatString, String genotypeString, String altAlleles[], char referenceBase) {
|
||||
return getVCFGenotype(sampleName, formatString.split(":"), genotypeString, altAlleles, referenceBase);
|
||||
}
|
||||
|
||||
/**
|
||||
* generate a VCF genotype record, given it's format string, the genotype string, and allele info
|
||||
*
|
||||
* @param sampleName the sample name
|
||||
* @param keyStrings the split format string for this record, which contains the keys for the genotype parameters
|
||||
* @param genotypeString contains the phasing information, allele information, and values for genotype parameters
|
||||
* @param altAlleles the alternate allele string array, which we index into based on the field parameters
|
||||
* @param referenceBase the reference base
|
||||
*
|
||||
* @return a VCFGenotypeRecord
|
||||
*/
|
||||
public static VCFGenotypeRecord getVCFGenotype(String sampleName, String[] keyStrings, String genotypeString, String altAlleles[], char referenceBase) {
|
||||
// parameters to create the VCF genotype record
|
||||
HashMap<String, String> tagToValue = new HashMap<String, String>();
|
||||
VCFGenotypeRecord.PHASE phase = VCFGenotypeRecord.PHASE.UNPHASED;
|
||||
List<VCFGenotypeEncoding> bases = new ArrayList<VCFGenotypeEncoding>();
|
||||
|
||||
for (String key : keyStrings) {
|
||||
String parse;
|
||||
int nextDivider;
|
||||
if (!genotypeString.contains(":")) {
|
||||
nextDivider = genotypeString.length();
|
||||
parse = genotypeString;
|
||||
} else {
|
||||
nextDivider = (genotypeString.indexOf(":") > genotypeString.length()) ? genotypeString.length() : genotypeString.indexOf(":");
|
||||
parse = genotypeString.substring(0, nextDivider);
|
||||
}
|
||||
if (key.equals(VCFConstants.GENOTYPE_KEY)) {
|
||||
Matcher m = gtPattern.matcher(parse);
|
||||
if (!m.matches())
|
||||
throw new RuntimeException("VCFReaderUtils: Unable to match GT genotype flag to it's expected pattern, the field was: " + parse);
|
||||
phase = VCFGenotypeRecord.determinePhase(m.group(2));
|
||||
addAllele(m.group(1), altAlleles, referenceBase, bases);
|
||||
if (m.group(3).length() > 0) addAllele(m.group(3), altAlleles, referenceBase, bases);
|
||||
} else {
|
||||
if ( parse.length() == 0 )
|
||||
parse = VCFGenotypeRecord.getMissingFieldValue(key);
|
||||
tagToValue.put(key, parse);
|
||||
}
|
||||
if (nextDivider + 1 >= genotypeString.length()) nextDivider = genotypeString.length() - 1;
|
||||
genotypeString = genotypeString.substring(nextDivider + 1, genotypeString.length());
|
||||
}
|
||||
if ( bases.size() > 0 && bases.get(0).equals(VCFConstants.EMPTY_ALLELE) )
|
||||
tagToValue.clear();
|
||||
// catch some common errors, either there are too many field keys or there are two many field values
|
||||
else if ( keyStrings.length != tagToValue.size() + ((bases.size() > 0) ? 1 : 0))
|
||||
throw new RuntimeException("VCFReaderUtils: genotype value count doesn't match the key count (expected "
|
||||
+ keyStrings.length + " but saw " + tagToValue.size() + ")");
|
||||
else if ( genotypeString.length() > 0 )
|
||||
throw new RuntimeException("VCFReaderUtils: genotype string contained additional unprocessed fields: " + genotypeString
|
||||
+ ". This most likely means that the format string is shorter then the value fields.");
|
||||
|
||||
VCFGenotypeRecord rec = new VCFGenotypeRecord(sampleName, bases, phase);
|
||||
for ( Map.Entry<String, String> entry : tagToValue.entrySet() )
|
||||
rec.setField(entry.getKey(), entry.getValue());
|
||||
return rec;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* add an alternate allele to the list of alleles we have for a VCF genotype record
|
||||
*
|
||||
* @param alleleNumber the allele number, as a string
|
||||
* @param altAlleles the list of alternate alleles
|
||||
* @param referenceBase the reference base
|
||||
* @param bases the list of bases for this genotype call
|
||||
*/
|
||||
private static void addAllele(String alleleNumber, String[] altAlleles, char referenceBase, List<VCFGenotypeEncoding> bases) {
|
||||
if (alleleNumber.equals(VCFConstants.EMPTY_ALLELE)) {
|
||||
bases.add(new VCFGenotypeEncoding(VCFConstants.EMPTY_ALLELE));
|
||||
} else {
|
||||
int alleleValue = Integer.valueOf(alleleNumber);
|
||||
// check to make sure the allele value is within bounds
|
||||
if (alleleValue < 0 || alleleValue > altAlleles.length)
|
||||
throw new IllegalArgumentException("VCFReaderUtils: the allele value of " + alleleValue + " is out of bounds given the alternate allele list.");
|
||||
if (alleleValue == 0)
|
||||
bases.add(new VCFGenotypeEncoding(String.valueOf(referenceBase)));
|
||||
else
|
||||
bases.add(new VCFGenotypeEncoding(altAlleles[alleleValue - 1]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,652 +0,0 @@
|
|||
package org.broad.tribble.vcf;
|
||||
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.util.ParsingUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/** the basic VCF record type */
|
||||
public class VCFRecord implements Feature {
|
||||
|
||||
// the reference base
|
||||
private String mReferenceBases;
|
||||
// our location
|
||||
private String mContig;
|
||||
private int mPosition;
|
||||
// our id
|
||||
private String mID;
|
||||
// the alternate bases
|
||||
private final List<VCFGenotypeEncoding> mAlts = new ArrayList<VCFGenotypeEncoding>();
|
||||
// our qual value
|
||||
private double mQual;
|
||||
// our filter string
|
||||
private String mFilterString;
|
||||
// our info fields -- use a TreeMap to ensure they can be pulled out in order (so it passes integration tests)
|
||||
private final Map<String, String> mInfoFields = new TreeMap<String, String>();
|
||||
|
||||
// our genotype formatting string
|
||||
private String mGenotypeFormatString;
|
||||
|
||||
// the vcf header we're associated with
|
||||
private VCFHeader vcfHeader = null;
|
||||
|
||||
// our genotype sample fields
|
||||
private final List<VCFGenotypeRecord> mGenotypeRecords = new ArrayList<VCFGenotypeRecord>();
|
||||
|
||||
/**
|
||||
* given a reference base, a location, and the format string, create a VCF record.
|
||||
*
|
||||
* @param referenceBases the reference bases to use
|
||||
* @param contig our contig
|
||||
* @param start the start location
|
||||
* @param genotypeFormatString the format string
|
||||
*/
|
||||
public VCFRecord(String referenceBases, String contig, int start, String genotypeFormatString) {
|
||||
setReferenceBase(referenceBases);
|
||||
setLocation(contig, start);
|
||||
mGenotypeFormatString = genotypeFormatString;
|
||||
}
|
||||
|
||||
/**
|
||||
* given the values for each of the columns, create a VCF record.
|
||||
*
|
||||
* @param columnValues a mapping of header strings to values
|
||||
* @param genotypeFormatString the format string for the genotype records
|
||||
* @param genotypeRecords the genotype records
|
||||
*/
|
||||
public VCFRecord(Map<VCFHeader.HEADER_FIELDS, String> columnValues, String genotypeFormatString, List<VCFGenotypeRecord> genotypeRecords) {
|
||||
extractFields(columnValues);
|
||||
mGenotypeRecords.addAll(genotypeRecords);
|
||||
mGenotypeFormatString = genotypeFormatString;
|
||||
}
|
||||
|
||||
/**
|
||||
* given the values for each of the columns, create a VCF record.
|
||||
*
|
||||
* @param columnValues a mapping of header strings to values
|
||||
*/
|
||||
public VCFRecord(Map<VCFHeader.HEADER_FIELDS, String> columnValues) {
|
||||
extractFields(columnValues);
|
||||
mGenotypeFormatString = "";
|
||||
}
|
||||
|
||||
/**
|
||||
* create a VCF record
|
||||
*
|
||||
* @param referenceBases the reference bases to use
|
||||
* @param contig the contig this variant is on
|
||||
* @param position our position
|
||||
* @param ID our ID string
|
||||
* @param altBases the list of alternate bases
|
||||
* @param qual the qual field
|
||||
* @param filters the filters used on this variant
|
||||
* @param infoFields the information fields
|
||||
* @param genotypeFormatString the format string
|
||||
* @param genotypeObjects the genotype objects
|
||||
*/
|
||||
public VCFRecord(String referenceBases,
|
||||
String contig,
|
||||
long position,
|
||||
String ID,
|
||||
List<VCFGenotypeEncoding> altBases,
|
||||
double qual,
|
||||
String filters,
|
||||
Map<String, String> infoFields,
|
||||
String genotypeFormatString,
|
||||
List<VCFGenotypeRecord> genotypeObjects) {
|
||||
setReferenceBase(referenceBases);
|
||||
setLocation(contig, position);
|
||||
this.mID = ID;
|
||||
for (VCFGenotypeEncoding alt : altBases)
|
||||
this.addAlternateBase(alt);
|
||||
this.setQual(qual);
|
||||
this.setFilterString(filters);
|
||||
this.mInfoFields.putAll(infoFields);
|
||||
this.mGenotypeFormatString = genotypeFormatString;
|
||||
this.mGenotypeRecords.addAll(genotypeObjects);
|
||||
}
|
||||
|
||||
/**
|
||||
* extract the field values from the passed in array
|
||||
*
|
||||
* @param columnValues a map of the header fields to values
|
||||
*/
|
||||
private void extractFields(Map<VCFHeader.HEADER_FIELDS, String> columnValues) {
|
||||
String chrom = null;
|
||||
long position = -1;
|
||||
|
||||
for (VCFHeader.HEADER_FIELDS val : columnValues.keySet()) {
|
||||
switch (val) {
|
||||
case CHROM:
|
||||
chrom = columnValues.get(val);
|
||||
break;
|
||||
case POS:
|
||||
position = Integer.valueOf(columnValues.get(val));
|
||||
break;
|
||||
case ID:
|
||||
setID(columnValues.get(val));
|
||||
break;
|
||||
case REF:
|
||||
if (columnValues.get(val).length() != 1)
|
||||
throw new IllegalArgumentException("Reference base should be a single character");
|
||||
setReferenceBase(columnValues.get(val));
|
||||
break;
|
||||
case ALT:
|
||||
String values[] = columnValues.get(val).split(",");
|
||||
for (String alt : values)
|
||||
addAlternateBase(new VCFGenotypeEncoding(alt));
|
||||
break;
|
||||
case QUAL:
|
||||
setQual(Double.valueOf(columnValues.get(val)));
|
||||
break;
|
||||
case FILTER:
|
||||
setFilterString(columnValues.get(val));
|
||||
break;
|
||||
case INFO:
|
||||
String vals[] = columnValues.get(val).split(";");
|
||||
for (String alt : vals) {
|
||||
if ( alt.equals(VCFConstants.EMPTY_INFO_FIELD) )
|
||||
continue;
|
||||
String keyVal[] = alt.split("=");
|
||||
if ( keyVal.length == 1 )
|
||||
addInfoField(keyVal[0], "");
|
||||
else if (keyVal.length == 2)
|
||||
addInfoField(keyVal[0], keyVal[1]);
|
||||
else
|
||||
throw new IllegalArgumentException("info field key-value pair did not parse into key->value pair: " + alt);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
setLocation(chrom, position);
|
||||
}
|
||||
|
||||
/**
|
||||
* do we have genotyping data
|
||||
*
|
||||
* @return true if we have genotyping data, false otherwise
|
||||
*/
|
||||
|
||||
public boolean hasGenotypeData() {
|
||||
return (mGenotypeRecords.size() > 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the ID value for this record
|
||||
*/
|
||||
public String getID() {
|
||||
return mID == null ? VCFConstants.EMPTY_ID_FIELD : mID;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the reference base
|
||||
*
|
||||
* @return either A, T, C, G, or N
|
||||
*/
|
||||
public String getReference() {
|
||||
return mReferenceBases;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the alternate allele strings
|
||||
*
|
||||
* @return an array of strings representing the alt alleles, or null if there are none
|
||||
*/
|
||||
public List<String> getAlternateAlleleList() {
|
||||
ArrayList<String> alts = new ArrayList<String>();
|
||||
for ( VCFGenotypeEncoding alt : mAlts )
|
||||
alts.add(alt.getBases());
|
||||
return alts;
|
||||
}
|
||||
|
||||
public List<VCFGenotypeEncoding> getAlternateAlleles() {
|
||||
return mAlts;
|
||||
}
|
||||
|
||||
public boolean hasAlternateAllele() {
|
||||
for ( VCFGenotypeEncoding alt : mAlts ) {
|
||||
if ( alt.getType() != VCFGenotypeEncoding.TYPE.UNCALLED )
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean isBiallelic() {
|
||||
return getAlternateAlleles().size() == 1;
|
||||
}
|
||||
|
||||
public boolean isReference() {
|
||||
return !hasAlternateAllele();
|
||||
}
|
||||
|
||||
public List<String> getAlleleList() {
|
||||
ArrayList<String> list = new ArrayList<String>();
|
||||
list.add(getReference());
|
||||
list.addAll(getAlternateAlleleList());
|
||||
return list;
|
||||
}
|
||||
|
||||
public double getNonRefAlleleFrequency() {
|
||||
if ( mInfoFields.containsKey(VCFConstants.ALLELE_FREQUENCY_KEY) ) {
|
||||
return Double.valueOf(mInfoFields.get(VCFConstants.ALLELE_FREQUENCY_KEY));
|
||||
} else {
|
||||
// this is the poor man's AF
|
||||
if ( mInfoFields.containsKey(VCFConstants.ALLELE_COUNT_KEY) && mInfoFields.containsKey(VCFConstants.ALLELE_NUMBER_KEY)) {
|
||||
String splt[] = mInfoFields.get(VCFConstants.ALLELE_COUNT_KEY).split(",");
|
||||
if ( splt.length > 0 ) {
|
||||
return (Double.valueOf(splt[0]) / Double.valueOf(mInfoFields.get(VCFConstants.ALLELE_NUMBER_KEY)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
public VCFGenotypeEncoding.TYPE getType() {
|
||||
VCFGenotypeEncoding.TYPE type = mAlts.get(0).getType();
|
||||
for (int i = 1; i < mAlts.size(); i++) {
|
||||
if ( mAlts.get(i).getType() != type )
|
||||
return VCFGenotypeEncoding.TYPE.MIXED; // if we have more than one type, return mixed
|
||||
}
|
||||
return type;
|
||||
}
|
||||
|
||||
public boolean isDeletion() {
|
||||
return getType() == VCFGenotypeEncoding.TYPE.DELETION;
|
||||
}
|
||||
|
||||
public boolean isInsertion() {
|
||||
return getType() == VCFGenotypeEncoding.TYPE.INSERTION;
|
||||
}
|
||||
|
||||
public boolean isIndel() {
|
||||
return isDeletion() || isInsertion();
|
||||
}
|
||||
|
||||
public boolean isSNP() {
|
||||
return getType() == VCFGenotypeEncoding.TYPE.SINGLE_BASE;
|
||||
}
|
||||
|
||||
public boolean isNovel() {
|
||||
return ( ! isInDBSNP() ) && ( ! isInHapmap() );
|
||||
}
|
||||
|
||||
public boolean isInDBSNP() {
|
||||
return ( ( mID != null && ! mID.equals(".") ) || ( mInfoFields.get(VCFConstants.DBSNP_KEY) != null && mInfoFields.get(VCFConstants.DBSNP_KEY).equals("1") ) );
|
||||
}
|
||||
|
||||
public boolean isInHapmap() {
|
||||
if ( mInfoFields.get(VCFConstants.HAPMAP2_KEY) != null && mInfoFields.get(VCFConstants.HAPMAP2_KEY).equals("1") ) {
|
||||
return true;
|
||||
} else {
|
||||
return ( mInfoFields.get(VCFConstants.HAPMAP3_KEY) != null && mInfoFields.get(VCFConstants.HAPMAP3_KEY).equals("1") );
|
||||
}
|
||||
}
|
||||
|
||||
public char getAlternativeBaseForSNP() {
|
||||
if ( !isSNP() && !isBiallelic() )
|
||||
throw new IllegalStateException("This record does not represent a SNP");
|
||||
return mAlts.get(0).getBases().charAt(0);
|
||||
}
|
||||
|
||||
public char getReferenceForSNP() {
|
||||
if ( !isSNP() )
|
||||
throw new IllegalStateException("This record does not represent a SNP");
|
||||
return getReference().charAt(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the phred-scaled quality score
|
||||
*/
|
||||
public double getQual() {
|
||||
return mQual;
|
||||
}
|
||||
|
||||
public int getPosition() {
|
||||
return mPosition;
|
||||
}
|
||||
|
||||
public boolean isMissingQual() {
|
||||
return VCFConstants.MISSING_GENOTYPE_QUALITY_v3.equals(String.valueOf((int)mQual));
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the -log10PError
|
||||
*/
|
||||
public double getNegLog10PError() {
|
||||
return mQual / 10.0;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the filter criteria
|
||||
*
|
||||
* @return an array of strings representing the filtering criteria, or UNFILTERED if none are applied
|
||||
*/
|
||||
public String[] getFilteringCodes() {
|
||||
if (mFilterString == null) return new String[]{VCFConstants.UNFILTERED};
|
||||
return mFilterString.split(VCFConstants.FILTER_CODE_SEPARATOR);
|
||||
}
|
||||
|
||||
public boolean isFiltered() {
|
||||
String[] codes = getFilteringCodes();
|
||||
return !codes[0].equals(VCFConstants.UNFILTERED) && !codes[0].equals(VCFConstants.PASSES_FILTERS_v3);
|
||||
}
|
||||
|
||||
// public boolean hasFilteringCodes() {
|
||||
// return mFilterString != null;
|
||||
// }
|
||||
|
||||
public String getFilterString() {
|
||||
return mFilterString;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the information key-value pairs as a Map<>
|
||||
*
|
||||
* @return a map, of the info key-value pairs
|
||||
*/
|
||||
public final Map<String, String> getInfoValues() {
|
||||
return mInfoFields;
|
||||
}
|
||||
|
||||
public List<VCFGenotypeRecord> getVCFGenotypeRecords() {
|
||||
return mGenotypeRecords;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return a List of the sample names
|
||||
*/
|
||||
public String[] getSampleNames() {
|
||||
String names[] = new String[mGenotypeRecords.size()];
|
||||
for (int i = 0; i < mGenotypeRecords.size(); i++) {
|
||||
names[i] = mGenotypeRecords.get(i).getSampleName();
|
||||
}
|
||||
return names;
|
||||
}
|
||||
|
||||
public VCFGenotypeRecord getGenotype(final String sampleName) {
|
||||
for ( VCFGenotypeRecord rec : getVCFGenotypeRecords() ) {
|
||||
if ( rec.getSampleName().equals(sampleName) ) {
|
||||
return rec;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public String getGenotypeFormatString() {
|
||||
return mGenotypeFormatString;
|
||||
}// the formatting string for our genotype records
|
||||
|
||||
public void setGenotypeFormatString(String newFormatString) {
|
||||
mGenotypeFormatString = newFormatString;
|
||||
}
|
||||
|
||||
public void setReferenceBase(String reference) {
|
||||
mReferenceBases = reference.toUpperCase();
|
||||
}
|
||||
|
||||
public void setLocation(String chrom, long position) {
|
||||
if ( chrom == null )
|
||||
throw new IllegalArgumentException("Chromosomes cannot be missing");
|
||||
if ( position < 0 )
|
||||
throw new IllegalArgumentException("Position values must be greater than 0");
|
||||
this.mContig = chrom;
|
||||
this.mPosition = (int)position;
|
||||
}
|
||||
|
||||
public void setID(String ID) {
|
||||
mID = ID;
|
||||
}
|
||||
|
||||
public void setQual(double qual) {
|
||||
if ( qual < 0 && !VCFConstants.MISSING_GENOTYPE_QUALITY_v3.equals(String.valueOf((int)qual)) )
|
||||
throw new IllegalArgumentException("Qual values cannot be negative unless they are " + VCFConstants.MISSING_GENOTYPE_QUALITY_v3 + " ('unknown')");
|
||||
mQual = qual;
|
||||
}
|
||||
|
||||
public void setFilterString(String filterString) {
|
||||
mFilterString = filterString;
|
||||
}
|
||||
|
||||
public void addGenotypeRecord(VCFGenotypeRecord mGenotypeRecord) {
|
||||
mGenotypeRecords.add(mGenotypeRecord);
|
||||
}
|
||||
|
||||
public void setGenotypeRecords(List<VCFGenotypeRecord> records) {
|
||||
mGenotypeRecords.clear();
|
||||
for ( VCFGenotypeRecord g : records )
|
||||
addGenotypeRecord(g);
|
||||
}
|
||||
|
||||
/**
|
||||
* add an alternate base to our alternate base list. All bases are uppercased
|
||||
* before being added to the list.
|
||||
*
|
||||
* @param base the base to add
|
||||
*/
|
||||
public void addAlternateBase(VCFGenotypeEncoding base) {
|
||||
if (!mAlts.contains(base)) mAlts.add(base);
|
||||
}
|
||||
|
||||
public void setAlternateBases(List<VCFGenotypeEncoding> bases) {
|
||||
mAlts.clear();
|
||||
for ( VCFGenotypeEncoding e : bases )
|
||||
addAlternateBase(e);
|
||||
}
|
||||
|
||||
/**
|
||||
* add an info field to the record
|
||||
*
|
||||
* @param key the key, from the spec or a user created key
|
||||
* @param value it's value as a string
|
||||
*/
|
||||
public void addInfoField(String key, String value) {
|
||||
//System.out.printf("Adding info field %s=%s%n", key, value);
|
||||
mInfoFields.put(key, value);
|
||||
}
|
||||
|
||||
public void printInfoFields() {
|
||||
for ( Map.Entry<String, String> e : mInfoFields.entrySet() ) {
|
||||
System.out.printf(" Current info field %s=%s this=%s%n", e.getKey(), e.getValue(), this);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* add an info field to the record
|
||||
*
|
||||
* @param m A map from info keys to info values
|
||||
*/
|
||||
public void addInfoFields(Map<String,String> m) {
|
||||
for ( Map.Entry<String, String> e : m.entrySet() )
|
||||
addInfoField(e.getKey(), e.getValue());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* the generation of a string representation, which is used by the VCF writer
|
||||
*
|
||||
* @param header the VCF header for this VCF Record
|
||||
* @return a string
|
||||
*/
|
||||
public String toStringEncoding(VCFHeader header) {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
|
||||
// CHROM \t POS \t ID \t REF \t ALT \t QUAL \t FILTER \t INFO
|
||||
builder.append(mContig);
|
||||
builder.append(VCFConstants.FIELD_SEPARATOR);
|
||||
builder.append(mPosition);
|
||||
builder.append(VCFConstants.FIELD_SEPARATOR);
|
||||
builder.append(getID());
|
||||
builder.append(VCFConstants.FIELD_SEPARATOR);
|
||||
builder.append(getReference());
|
||||
builder.append(VCFConstants.FIELD_SEPARATOR);
|
||||
List<VCFGenotypeEncoding> alts = getAlternateAlleles();
|
||||
if ( alts.size() > 0 ) {
|
||||
builder.append(alts.get(0));
|
||||
for ( int i = 1; i < alts.size(); i++ ) {
|
||||
builder.append(",");
|
||||
builder.append(alts.get(i));
|
||||
}
|
||||
} else {
|
||||
builder.append(VCFConstants.EMPTY_ALTERNATE_ALLELE_FIELD);
|
||||
}
|
||||
builder.append(VCFConstants.FIELD_SEPARATOR);
|
||||
if ( isMissingQual() )
|
||||
builder.append(VCFConstants.MISSING_GENOTYPE_QUALITY_v3);
|
||||
else
|
||||
builder.append(String.format(VCFConstants.DOUBLE_PRECISION_FORMAT_STRING, mQual));
|
||||
builder.append(VCFConstants.FIELD_SEPARATOR);
|
||||
builder.append(ParsingUtils.join(VCFConstants.FILTER_CODE_SEPARATOR, getFilteringCodes()));
|
||||
builder.append(VCFConstants.FIELD_SEPARATOR);
|
||||
builder.append(createInfoString());
|
||||
|
||||
if ( mGenotypeFormatString != null && mGenotypeFormatString.length() > 0 ) {
|
||||
// try {
|
||||
addGenotypeData(builder, header);
|
||||
// } catch (Exception e) {
|
||||
// if ( validationStringency == VCFGenotypeWriter.VALIDATION_STRINGENCY.STRICT )
|
||||
// throw new RuntimeException(e);
|
||||
// }
|
||||
}
|
||||
|
||||
return builder.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* create the info string
|
||||
*
|
||||
* @return a string representing the infomation fields
|
||||
*/
|
||||
protected String createInfoString() {
|
||||
StringBuffer info = new StringBuffer();
|
||||
boolean isFirst = true;
|
||||
for (Map.Entry<String, String> entry : mInfoFields.entrySet()) {
|
||||
if ( isFirst )
|
||||
isFirst = false;
|
||||
else
|
||||
info.append(VCFConstants.INFO_FIELD_SEPARATOR);
|
||||
info.append(entry.getKey());
|
||||
if ( entry.getValue() != null && !entry.getValue().equals("") ) {
|
||||
info.append("=");
|
||||
info.append(entry.getValue());
|
||||
}
|
||||
}
|
||||
return info.length() == 0 ? VCFConstants.EMPTY_INFO_FIELD : info.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* add the genotype data
|
||||
*
|
||||
* @param builder the string builder
|
||||
* @param header the header object
|
||||
*/
|
||||
private void addGenotypeData(StringBuilder builder, VCFHeader header) {
|
||||
Map<String, VCFGenotypeRecord> gMap = genotypeListToMap(getVCFGenotypeRecords());
|
||||
|
||||
StringBuffer tempStr = new StringBuffer();
|
||||
if ( header.getGenotypeSamples().size() < getVCFGenotypeRecords().size() ) {
|
||||
for ( String sample : gMap.keySet() ) {
|
||||
if ( !header.getGenotypeSamples().contains(sample) )
|
||||
System.err.println("Sample " + sample + " is a duplicate or is otherwise not present in the header");
|
||||
else
|
||||
header.getGenotypeSamples().remove(sample);
|
||||
}
|
||||
throw new IllegalStateException("We have more genotype samples than the header specified; please check that samples aren't duplicated");
|
||||
}
|
||||
tempStr.append(VCFConstants.FIELD_SEPARATOR + mGenotypeFormatString);
|
||||
|
||||
String[] genotypeFormatStrings = mGenotypeFormatString.split(":");
|
||||
|
||||
for ( String genotype : header.getGenotypeSamples() ) {
|
||||
tempStr.append(VCFConstants.FIELD_SEPARATOR);
|
||||
if ( gMap.containsKey(genotype) ) {
|
||||
VCFGenotypeRecord rec = gMap.get(genotype);
|
||||
tempStr.append(rec.toStringEncoding(mAlts, genotypeFormatStrings));
|
||||
gMap.remove(genotype);
|
||||
} else {
|
||||
tempStr.append(VCFGenotypeRecord.stringEncodingForEmptyGenotype(genotypeFormatStrings));
|
||||
}
|
||||
}
|
||||
if ( gMap.size() != 0 ) {
|
||||
for ( String sample : gMap.keySet() )
|
||||
System.err.println("Sample " + sample + " is being genotyped but isn't in the header.");
|
||||
throw new IllegalStateException("We failed to use all the genotype samples; there must be an inconsistancy between the header and records");
|
||||
}
|
||||
|
||||
builder.append(tempStr);
|
||||
}
|
||||
|
||||
/**
|
||||
* compare two VCF records
|
||||
*
|
||||
* @param other the other VCF record
|
||||
* @return true if they're equal
|
||||
*/
|
||||
public boolean equals(VCFRecord other) {
|
||||
if (!this.mAlts.equals(other.mAlts)) return false;
|
||||
if (!this.mReferenceBases.equals(other.mReferenceBases)) return false;
|
||||
if (!this.mContig.equals(other.mContig)) return false;
|
||||
if (mPosition != other.mPosition) return false;
|
||||
if (!this.mID.equals(other.mID)) return false;
|
||||
if (this.mQual != other.mQual) return false;
|
||||
if ( this.mFilterString == null ) {
|
||||
if ( other.mFilterString != null ) return false;
|
||||
} else if ( !this.mFilterString.equals(other.mFilterString) ) return false;
|
||||
if (!this.mInfoFields.equals(other.mInfoFields)) return false;
|
||||
if (!this.mGenotypeRecords.equals(other.mGenotypeRecords)) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* create a genotype mapping from a list and their sample names
|
||||
*
|
||||
* @param list a list of genotype samples
|
||||
* @return a mapping of the sample name to VCF genotype record
|
||||
*/
|
||||
private static Map<String, VCFGenotypeRecord> genotypeListToMap(List<VCFGenotypeRecord> list) {
|
||||
Map<String, VCFGenotypeRecord> map = new HashMap<String, VCFGenotypeRecord>();
|
||||
for (int i = 0; i < list.size(); i++) {
|
||||
VCFGenotypeRecord rec = list.get(i);
|
||||
map.put(rec.getSampleName(), rec);
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
/** Return the features reference sequence name, e.g chromosome or contig */
|
||||
public String getChr() {
|
||||
return this.mContig;
|
||||
}
|
||||
|
||||
/** Return the start position in 1-based coordinates (first base is 1) */
|
||||
public int getStart() {
|
||||
return this.mPosition;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the end position following 1-based fully closed conventions. The length of a feature is
|
||||
* end - start + 1;
|
||||
*/
|
||||
public int getEnd() {
|
||||
return this.mPosition;
|
||||
}
|
||||
|
||||
/**
|
||||
* set the VCF header we're associated with
|
||||
* @param header the header
|
||||
*/
|
||||
void setHeader(VCFHeader header) {
|
||||
vcfHeader = header;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the associated header
|
||||
* @return the VCF Header
|
||||
*/
|
||||
public VCFHeader getHeader() {
|
||||
return vcfHeader;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,404 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.contexts.variantcontext;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* Immutable representation of an allele
|
||||
*
|
||||
* Types of alleles:
|
||||
*
|
||||
* Ref: a t C g a // C is the reference base
|
||||
*
|
||||
* : a t G g a // C base is a G in some individuals
|
||||
*
|
||||
* : a t - g a // C base is deleted w.r.t. the reference
|
||||
*
|
||||
* : a t CAg a // A base is inserted w.r.t. the reference sequence
|
||||
*
|
||||
* In these cases, where are the alleles?
|
||||
*
|
||||
* SNP polymorphism of C/G -> { C , G } -> C is the reference allele
|
||||
* 1 base deletion of C -> { C , - } -> C is the reference allele
|
||||
* 1 base insertion of A -> { - ; A } -> Null is the reference allele
|
||||
*
|
||||
* Suppose I see a the following in the population:
|
||||
*
|
||||
* Ref: a t C g a // C is the reference base
|
||||
* : a t G g a // C base is a G in some individuals
|
||||
* : a t - g a // C base is deleted w.r.t. the reference
|
||||
*
|
||||
* How do I represent this? There are three segregating alleles:
|
||||
*
|
||||
* { C , G , - }
|
||||
*
|
||||
* Now suppose I have this more complex example:
|
||||
*
|
||||
* Ref: a t C g a // C is the reference base
|
||||
* : a t - g a
|
||||
* : a t - - a
|
||||
* : a t CAg a
|
||||
*
|
||||
* There are actually four segregating alleles:
|
||||
*
|
||||
* { C g , - g, - -, and CAg } over bases 2-4
|
||||
*
|
||||
* However, the molecular equivalence explicitly listed above is usually discarded, so the actual
|
||||
* segregating alleles are:
|
||||
*
|
||||
* { C g, g, -, C a g }
|
||||
*
|
||||
* Critically, it should be possible to apply an allele to a reference sequence to create the
|
||||
* correct haplotype sequence:
|
||||
*
|
||||
* Allele + reference => haplotype
|
||||
*
|
||||
* For convenience, we are going to create Alleles where the GenomeLoc of the allele is stored outside of the
|
||||
* Allele object itself. So there's an idea of an A/C polymorphism independent of it's surrounding context.
|
||||
*
|
||||
* Given list of alleles it's possible to determine the "type" of the variation
|
||||
*
|
||||
* A / C @ loc => SNP with
|
||||
* - / A => INDEL
|
||||
*
|
||||
* If you know where allele is the reference, you can determine whether the variant is an insertion or deletion.
|
||||
*
|
||||
* Alelle also supports is concept of a NO_CALL allele. This Allele represents a haplotype that couldn't be
|
||||
* determined. This is usually represented by a '.' allele.
|
||||
*
|
||||
* Note that Alleles store all bases as bytes, in **UPPER CASE**. So 'atc' == 'ATC' from the perspective of an
|
||||
* Allele.
|
||||
|
||||
* @author ebanks, depristo
|
||||
*/
|
||||
public class Allele implements Comparable<Allele> {
|
||||
private static final byte[] EMPTY_ALLELE_BASES = new byte[0];
|
||||
|
||||
private boolean isRef = false;
|
||||
private boolean isNull = false;
|
||||
private boolean isNoCall = false;
|
||||
|
||||
private byte[] bases = null;
|
||||
|
||||
public final static String NULL_ALLELE_STRING = "-";
|
||||
public final static String NO_CALL_STRING = ".";
|
||||
/** A generic static NO_CALL allele for use */
|
||||
|
||||
// no public way to create an allele
|
||||
private Allele(byte[] bases, boolean isRef) {
|
||||
// standardize our representation of null allele and bases
|
||||
if ( wouldBeNullAllele(bases) ) {
|
||||
bases = EMPTY_ALLELE_BASES;
|
||||
isNull = true;
|
||||
} else if ( wouldBeNoCallAllele(bases) ) {
|
||||
bases = EMPTY_ALLELE_BASES;
|
||||
isNoCall = true;
|
||||
if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele");
|
||||
}
|
||||
// else
|
||||
// bases = new String(bases).toUpperCase().getBytes(); // todo -- slow performance
|
||||
|
||||
this.isRef = isRef;
|
||||
this.bases = bases;
|
||||
|
||||
if ( ! acceptableAlleleBases(bases,isRef) )
|
||||
throw new IllegalArgumentException("Unexpected base in allele bases " + new String(bases));
|
||||
}
|
||||
|
||||
private Allele(String bases, boolean isRef) {
|
||||
this(bases.getBytes(), isRef);
|
||||
}
|
||||
|
||||
|
||||
private final static Allele REF_A = new Allele("A", true);
|
||||
private final static Allele ALT_A = new Allele("A", false);
|
||||
private final static Allele REF_C = new Allele("C", true);
|
||||
private final static Allele ALT_C = new Allele("C", false);
|
||||
private final static Allele REF_G = new Allele("G", true);
|
||||
private final static Allele ALT_G = new Allele("G", false);
|
||||
private final static Allele REF_T = new Allele("T", true);
|
||||
private final static Allele ALT_T = new Allele("T", false);
|
||||
private final static Allele REF_N = new Allele("N", true);
|
||||
private final static Allele ALT_N = new Allele("N", false);
|
||||
private final static Allele REF_NULL = new Allele("-", true);
|
||||
private final static Allele ALT_NULL = new Allele("-", false);
|
||||
public final static Allele NO_CALL = new Allele(NO_CALL_STRING, false);
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// creation routines
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Create a new Allele that includes bases and if tagged as the reference allele if isRef == true. If bases
|
||||
* == '-', a Null allele is created. If bases == '.', a no call Allele is created.
|
||||
*
|
||||
* @param bases the DNA sequence of this variation, '-', of '.'
|
||||
* @param isRef should we make this a reference allele?
|
||||
* @throws IllegalArgumentException if bases contains illegal characters or is otherwise malformated
|
||||
*/
|
||||
public static Allele create(byte[] bases, boolean isRef) {
|
||||
if ( bases == null )
|
||||
throw new IllegalArgumentException("create: the Allele base string cannot be null; use new Allele() or new Allele(\"\") to create a Null allele");
|
||||
|
||||
if ( bases.length == 1 ) {
|
||||
// optimization to return a static constant Allele for each single base object
|
||||
switch (bases[0]) {
|
||||
case '.':
|
||||
if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele");
|
||||
return NO_CALL;
|
||||
case '-': return isRef ? REF_NULL : ALT_NULL;
|
||||
case 'A': return isRef ? REF_A : ALT_A;
|
||||
case 'C': return isRef ? REF_C : ALT_C;
|
||||
case 'G': return isRef ? REF_G : ALT_G;
|
||||
case 'T': return isRef ? REF_T : ALT_T;
|
||||
case 'N': return isRef ? REF_N : ALT_N;
|
||||
default: throw new IllegalArgumentException("Illegal base: " + (char)bases[0]);
|
||||
}
|
||||
} else {
|
||||
return new Allele(bases, isRef);
|
||||
}
|
||||
}
|
||||
|
||||
public static Allele create(byte base, boolean isRef) {
|
||||
// public Allele(byte base, boolean isRef) {
|
||||
return create( new byte[]{ base }, isRef);
|
||||
}
|
||||
|
||||
public static Allele extend(Allele left, byte[] right) {
|
||||
byte[] bases = null;
|
||||
if ( left.length() == 0 )
|
||||
bases = right;
|
||||
else {
|
||||
bases = new byte[left.length() + right.length];
|
||||
System.arraycopy(left.getBases(), 0, bases, 0, left.length());
|
||||
System.arraycopy(right, 0, bases, left.length(), right.length);
|
||||
}
|
||||
|
||||
return create(bases, left.isReference());
|
||||
}
|
||||
|
||||
/**
|
||||
* @param bases bases representing an allele
|
||||
* @return true if the bases represent the null allele
|
||||
*/
|
||||
public static boolean wouldBeNullAllele(byte[] bases) {
|
||||
return (bases.length == 1 && bases[0] == '-') || bases.length == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param bases bases representing an allele
|
||||
* @return true if the bases represent the NO_CALL allele
|
||||
*/
|
||||
public static boolean wouldBeNoCallAllele(byte[] bases) {
|
||||
return bases.length == 1 && bases[0] == '.';
|
||||
}
|
||||
|
||||
/**
|
||||
* @param bases bases representing an allele
|
||||
* @param reference is this the reference allele
|
||||
* @return true if the bases represent the well formatted allele
|
||||
*/
|
||||
public static boolean acceptableAlleleBases(String bases, boolean reference) {
|
||||
return acceptableAlleleBases(bases.getBytes(),reference);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param bases bases representing an allele
|
||||
* @param reference are we the reference (we allow n's in the reference allele)
|
||||
* @return true if the bases represent the well formatted allele
|
||||
*/
|
||||
public static boolean acceptableAlleleBases(byte[] bases, boolean reference) {
|
||||
if ( wouldBeNullAllele(bases) || wouldBeNoCallAllele(bases) )
|
||||
return true;
|
||||
|
||||
for ( int i = 0; i < bases.length; i++ ) {
|
||||
switch (bases[i]) {
|
||||
case 'A': case 'C': case 'G': case 'T': case 'N' : break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see Allele(byte[], boolean)
|
||||
*
|
||||
* @param bases bases representing an allele
|
||||
* @param isRef is this the reference allele?
|
||||
*/
|
||||
public static Allele create(String bases, boolean isRef) {
|
||||
//public Allele(String bases, boolean isRef) {
|
||||
return create(bases.getBytes(), isRef);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates a non-Ref allele. @see Allele(byte[], boolean) for full information
|
||||
*
|
||||
* @param bases bases representing an allele
|
||||
*/
|
||||
public static Allele create(String bases) {
|
||||
return create(bases, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a non-Ref allele. @see Allele(byte[], boolean) for full information
|
||||
*
|
||||
* @param bases bases representing an allele
|
||||
*/
|
||||
public static Allele create(byte[] bases) {
|
||||
return create(bases, false);
|
||||
//this(bases, false);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// accessor routines
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
//Returns true if this is the null allele
|
||||
public boolean isNull() { return isNull; }
|
||||
// Returns true if this is not the null allele
|
||||
public boolean isNonNull() { return ! isNull(); }
|
||||
|
||||
// Returns true if this is the NO_CALL allele
|
||||
public boolean isNoCall() { return isNoCall; }
|
||||
// Returns true if this is the not the NO_CALL allele
|
||||
public boolean isCalled() { return ! isNoCall(); }
|
||||
|
||||
// Returns true if this Allele is the reference allele
|
||||
public boolean isReference() { return isRef; }
|
||||
// Returns true if this Allele is not the reference allele
|
||||
public boolean isNonReference() { return ! isReference(); }
|
||||
|
||||
// Returns a nice string representation of this object
|
||||
public String toString() {
|
||||
return (isNull() ? "-" : ( isNoCall() ? "." : new String(getBases()))) + (isReference() ? "*" : "");
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the DNA bases segregating in this allele. Note this isn't reference polarized,
|
||||
* so the Null allele is represented by a vector of length 0
|
||||
*
|
||||
* @return the segregating bases
|
||||
*/
|
||||
public byte[] getBases() { return bases; }
|
||||
|
||||
/**
|
||||
* @param other the other allele
|
||||
*
|
||||
* @return true if these alleles are equal
|
||||
*/
|
||||
public boolean equals(Object other) {
|
||||
return ( ! (other instanceof Allele) ? false : equals((Allele)other, false) );
|
||||
}
|
||||
|
||||
/**
|
||||
* @return hash code
|
||||
*/
|
||||
public int hashCode() {
|
||||
int hash = 1;
|
||||
for (int i = 0; i < bases.length; i++)
|
||||
hash += (i+1) * bases[i];
|
||||
return hash;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if this and other are equal. If ignoreRefState is true, then doesn't require both alleles has the
|
||||
* same ref tag
|
||||
*
|
||||
* @param other allele to compare to
|
||||
* @param ignoreRefState if true, ignore ref state in comparison
|
||||
* @return true if this and other are equal
|
||||
*/
|
||||
public boolean equals(Allele other, boolean ignoreRefState) {
|
||||
return this == other || (isRef == other.isRef || ignoreRefState) && isNull == other.isNull && isNoCall == other.isNoCall && basesMatch(other.getBases());
|
||||
}
|
||||
|
||||
/**
|
||||
* @param test bases to test against
|
||||
*
|
||||
* @return true if this Alelle contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
|
||||
*/
|
||||
public boolean basesMatch(byte[] test) { return bases == test || Arrays.equals(bases, test); }
|
||||
|
||||
/**
|
||||
* @param test bases to test against
|
||||
*
|
||||
* @return true if this Alelle contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
|
||||
*/
|
||||
public boolean basesMatch(String test) { return basesMatch(test.toUpperCase().getBytes()); }
|
||||
|
||||
/**
|
||||
* @param test allele to test against
|
||||
*
|
||||
* @return true if this Alelle contains the same bases as test, regardless of its reference status; handles Null and NO_CALL alleles
|
||||
*/
|
||||
public boolean basesMatch(Allele test) { return basesMatch(test.getBases()); }
|
||||
|
||||
/**
|
||||
* @return the length of this allele. Null and NO_CALL alleles have 0 length.
|
||||
*/
|
||||
public int length() {
|
||||
return bases.length;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// useful static functions
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
public static Allele getMatchingAllele(Collection<Allele> allAlleles, String alleleBases) {
|
||||
return getMatchingAllele(allAlleles, alleleBases.getBytes());
|
||||
}
|
||||
|
||||
public static Allele getMatchingAllele(Collection<Allele> allAlleles, byte[] alleleBases) {
|
||||
for ( Allele a : allAlleles ) {
|
||||
if ( a.basesMatch(alleleBases) ) {
|
||||
return a;
|
||||
}
|
||||
}
|
||||
|
||||
if ( wouldBeNoCallAllele(alleleBases) )
|
||||
return NO_CALL;
|
||||
else
|
||||
return null; // couldn't find anything
|
||||
}
|
||||
|
||||
public static List<Allele> resolveAlleles(List<Allele> possibleAlleles, List<String> alleleStrings) {
|
||||
List<Allele> myAlleles = new ArrayList<Allele>(alleleStrings.size());
|
||||
|
||||
for ( String alleleString : alleleStrings ) {
|
||||
Allele allele = getMatchingAllele(possibleAlleles, alleleString);
|
||||
|
||||
if ( allele == null ) {
|
||||
if ( Allele.wouldBeNoCallAllele(alleleString.getBytes()) ) {
|
||||
allele = create(alleleString);
|
||||
} else {
|
||||
throw new IllegalArgumentException("Allele " + alleleString + " not present in the list of alleles " + possibleAlleles);
|
||||
}
|
||||
}
|
||||
|
||||
myAlleles.add(allele);
|
||||
}
|
||||
|
||||
return myAlleles;
|
||||
}
|
||||
|
||||
public int compareTo(Allele other) {
|
||||
if ( isReference() && other.isNonReference() )
|
||||
return -1;
|
||||
else if ( isNonReference() && other.isReference() )
|
||||
return 1;
|
||||
else
|
||||
return new String(getBases()).compareTo(new String(other.getBases())); // todo -- potential performance issue
|
||||
}
|
||||
}
|
||||
|
|
@ -1,216 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.contexts.variantcontext;
|
||||
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* This class encompasses all the basic information about a genotype. It is immutable.
|
||||
*
|
||||
* @author Mark DePristo
|
||||
*/
|
||||
public class Genotype {
|
||||
|
||||
public final static String PHASED_ALLELE_SEPARATOR = "|";
|
||||
public final static String UNPHASED_ALLELE_SEPARATOR = "/";
|
||||
|
||||
protected InferredGeneticContext commonInfo;
|
||||
public final static double NO_NEG_LOG_10PERROR = InferredGeneticContext.NO_NEG_LOG_10PERROR;
|
||||
protected List<Allele> alleles = null; // new ArrayList<Allele>();
|
||||
|
||||
private boolean genotypesArePhased = false;
|
||||
private boolean filtersWereAppliedToContext;
|
||||
|
||||
public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, boolean genotypesArePhased) {
|
||||
this.alleles = Collections.unmodifiableList(alleles);
|
||||
commonInfo = new InferredGeneticContext(sampleName, negLog10PError, filters, attributes);
|
||||
filtersWereAppliedToContext = filters != null;
|
||||
this.genotypesArePhased = genotypesArePhased;
|
||||
validate();
|
||||
}
|
||||
|
||||
public Genotype(String sampleName, List<Allele> alleles, double negLog10PError) {
|
||||
this(sampleName, alleles, negLog10PError, null, null, false);
|
||||
}
|
||||
|
||||
public Genotype(String sampleName, List<Allele> alleles) {
|
||||
this(sampleName, alleles, NO_NEG_LOG_10PERROR, null, null, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the alleles for this genotype
|
||||
*/
|
||||
public List<Allele> getAlleles() {
|
||||
return alleles;
|
||||
}
|
||||
|
||||
public List<Allele> getAlleles(Allele allele) {
|
||||
List<Allele> al = new ArrayList<Allele>();
|
||||
for ( Allele a : alleles )
|
||||
if ( a.equals(allele) )
|
||||
al.add(a);
|
||||
|
||||
return Collections.unmodifiableList(al);
|
||||
}
|
||||
|
||||
public Allele getAllele(int i) {
|
||||
return alleles.get(i);
|
||||
}
|
||||
|
||||
public boolean genotypesArePhased() { return genotypesArePhased; }
|
||||
|
||||
/**
|
||||
* @return the ploidy of this genotype
|
||||
*/
|
||||
public int getPloidy() { return alleles.size(); }
|
||||
|
||||
public enum Type {
|
||||
NO_CALL,
|
||||
HOM_REF,
|
||||
HET,
|
||||
HOM_VAR
|
||||
}
|
||||
|
||||
public Type getType() {
|
||||
Allele firstAllele = alleles.get(0);
|
||||
|
||||
if ( firstAllele.isNoCall() ) {
|
||||
return Type.NO_CALL;
|
||||
}
|
||||
|
||||
for (Allele a : alleles) {
|
||||
if ( ! firstAllele.equals(a) )
|
||||
return Type.HET;
|
||||
}
|
||||
return firstAllele.isReference() ? Type.HOM_REF : Type.HOM_VAR;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if all observed alleles are the same (regardless of whether they are ref or alt)
|
||||
*/
|
||||
public boolean isHom() { return isHomRef() || isHomVar(); }
|
||||
public boolean isHomRef() { return getType() == Type.HOM_REF; }
|
||||
public boolean isHomVar() { return getType() == Type.HOM_VAR; }
|
||||
|
||||
/**
|
||||
* @return true if we're het (observed alleles differ)
|
||||
*/
|
||||
public boolean isHet() { return getType() == Type.HET; }
|
||||
|
||||
/**
|
||||
* @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF)
|
||||
*/
|
||||
public boolean isNoCall() { return getType() == Type.NO_CALL; }
|
||||
public boolean isCalled() { return getType() != Type.NO_CALL; }
|
||||
|
||||
public void validate() {
|
||||
// todo -- add validation checking here
|
||||
|
||||
if ( alleles == null ) throw new IllegalArgumentException("BUG: alleles cannot be null in setAlleles");
|
||||
if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0 in setAlleles");
|
||||
|
||||
int nNoCalls = 0;
|
||||
for ( Allele allele : alleles ) {
|
||||
if ( allele == null )
|
||||
throw new IllegalArgumentException("BUG: allele cannot be null in Genotype");
|
||||
nNoCalls += allele.isNoCall() ? 1 : 0;
|
||||
}
|
||||
if ( nNoCalls > 0 && nNoCalls != alleles.size() )
|
||||
throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this);
|
||||
}
|
||||
|
||||
public String getGenotypeString() {
|
||||
return getGenotypeString(true);
|
||||
}
|
||||
|
||||
public String getGenotypeString(boolean ignoreRefState) {
|
||||
// Notes:
|
||||
// 1. Make sure to use the appropriate separator depending on whether the genotype is phased
|
||||
// 2. If ignoreRefState is true, then we want just the bases of the Alleles (ignoring the '*' indicating a ref Allele)
|
||||
// 3. So that everything is deterministic with regards to integration tests, we sort Alleles (when the genotype isn't phased, of course)
|
||||
return Utils.join(genotypesArePhased() ? PHASED_ALLELE_SEPARATOR : UNPHASED_ALLELE_SEPARATOR,
|
||||
ignoreRefState ? getAlleleStrings() : (genotypesArePhased() ? getAlleles() : Utils.sorted(getAlleles())));
|
||||
}
|
||||
|
||||
private List<String> getAlleleStrings() {
|
||||
List<String> al = new ArrayList<String>();
|
||||
for ( Allele a : alleles )
|
||||
al.add(new String(a.getBases()));
|
||||
|
||||
return al;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("[GT: %s %s %s Q%.2f %s]", getSampleName(), getGenotypeString(false), getType(), getPhredScaledQual(), Utils.sortedString(getAttributes()));
|
||||
}
|
||||
|
||||
public String toBriefString() {
|
||||
return String.format("%s:Q%.2f", getGenotypeString(false), getPhredScaledQual());
|
||||
}
|
||||
|
||||
public boolean sameGenotype(Genotype other) {
|
||||
return sameGenotype(other, true);
|
||||
}
|
||||
|
||||
public boolean sameGenotype(Genotype other, boolean ignorePhase) {
|
||||
if ( getPloidy() != other.getPloidy() )
|
||||
return false; // gotta have the same number of allele to be equal for gods sake
|
||||
|
||||
// algorithms are wildly different if phase is kept of ignored
|
||||
if ( ignorePhase ) {
|
||||
for ( int i = 0; i < getPloidy(); i++) {
|
||||
Allele myAllele = getAllele(i);
|
||||
Allele otherAllele = other.getAllele(i);
|
||||
if ( ! myAllele.basesMatch(otherAllele) )
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
List<Allele> otherAlleles = new ArrayList<Allele>(other.getAlleles());
|
||||
for ( Allele myAllele : getAlleles() ) {
|
||||
Allele alleleToRemove = null;
|
||||
for ( Allele otherAllele : otherAlleles ) {
|
||||
if ( myAllele.basesMatch(otherAllele) ) {
|
||||
alleleToRemove = otherAllele;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ( alleleToRemove != null )
|
||||
otherAlleles.remove(alleleToRemove);
|
||||
else
|
||||
return false; // we couldn't find our allele
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// get routines to access context info fields
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
public String getSampleName() { return commonInfo.getName(); }
|
||||
public Set<String> getFilters() { return commonInfo.getFilters(); }
|
||||
public boolean isFiltered() { return commonInfo.isFiltered(); }
|
||||
public boolean isNotFiltered() { return commonInfo.isNotFiltered(); }
|
||||
public boolean filtersWereApplied() { return filtersWereAppliedToContext; }
|
||||
public boolean hasNegLog10PError() { return commonInfo.hasNegLog10PError(); }
|
||||
public double getNegLog10PError() { return commonInfo.getNegLog10PError(); }
|
||||
public double getPhredScaledQual() { return commonInfo.getPhredScaledQual(); }
|
||||
|
||||
public Map<String, Object> getAttributes() { return commonInfo.getAttributes(); }
|
||||
public boolean hasAttribute(String key) { return commonInfo.hasAttribute(key); }
|
||||
public Object getAttribute(String key) { return commonInfo.getAttribute(key); }
|
||||
|
||||
public Object getAttribute(String key, Object defaultValue) {
|
||||
return commonInfo.getAttribute(key, defaultValue);
|
||||
}
|
||||
|
||||
public String getAttributeAsString(String key) { return commonInfo.getAttributeAsString(key); }
|
||||
public String getAttributeAsString(String key, String defaultValue) { return commonInfo.getAttributeAsString(key, defaultValue); }
|
||||
public int getAttributeAsInt(String key) { return commonInfo.getAttributeAsInt(key); }
|
||||
public int getAttributeAsInt(String key, int defaultValue) { return commonInfo.getAttributeAsInt(key, defaultValue); }
|
||||
public double getAttributeAsDouble(String key) { return commonInfo.getAttributeAsDouble(key); }
|
||||
public double getAttributeAsDouble(String key, double defaultValue) { return commonInfo.getAttributeAsDouble(key, defaultValue); }
|
||||
}
|
||||
|
|
@ -1,223 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.contexts.variantcontext;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
* Common utility routines for VariantContext and Genotype
|
||||
*
|
||||
* @author depristo
|
||||
*/
|
||||
final class InferredGeneticContext {
|
||||
public static final double NO_NEG_LOG_10PERROR = -1.0;
|
||||
|
||||
private static Set<String> NO_FILTERS = Collections.unmodifiableSet(new HashSet<String>());
|
||||
private static Map<String, Object> NO_ATTRIBUTES = Collections.unmodifiableMap(new HashMap<String, Object>());
|
||||
|
||||
private double negLog10PError = NO_NEG_LOG_10PERROR;
|
||||
private String name = null;
|
||||
private Set<String> filters = NO_FILTERS;
|
||||
private Map<String, Object> attributes = NO_ATTRIBUTES;
|
||||
|
||||
// public InferredGeneticContext(String name) {
|
||||
// this.name = name;
|
||||
// }
|
||||
//
|
||||
// public InferredGeneticContext(String name, double negLog10PError) {
|
||||
// this(name);
|
||||
// setNegLog10PError(negLog10PError);
|
||||
// }
|
||||
|
||||
public InferredGeneticContext(String name, double negLog10PError, Set<String> filters, Map<String, ?> attributes) {
|
||||
this.name = name;
|
||||
setNegLog10PError(negLog10PError);
|
||||
if ( filters != null )
|
||||
setFilters(filters);
|
||||
if ( attributes != null )
|
||||
setAttributes(attributes);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the name
|
||||
*/
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the name
|
||||
*
|
||||
* @param name the name associated with this information
|
||||
*/
|
||||
public void setName(String name) {
|
||||
if ( name == null ) throw new IllegalArgumentException("Name cannot be null " + this);
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Filter
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
public Set<String> getFilters() {
|
||||
return Collections.unmodifiableSet(filters);
|
||||
}
|
||||
|
||||
public boolean isFiltered() {
|
||||
return filters.size() > 0;
|
||||
}
|
||||
|
||||
public boolean isNotFiltered() {
|
||||
return ! isFiltered();
|
||||
}
|
||||
|
||||
public void addFilter(String filter) {
|
||||
if ( filters == NO_FILTERS ) // immutable -> mutable
|
||||
filters = new HashSet<String>(filters);
|
||||
|
||||
if ( filter == null ) throw new IllegalArgumentException("BUG: Attempting to add null filter " + this);
|
||||
if ( getFilters().contains(filter) ) throw new IllegalArgumentException("BUG: Attempting to add duplicate filter " + filter + " at " + this);
|
||||
filters.add(filter);
|
||||
}
|
||||
|
||||
public void addFilters(Collection<String> filters) {
|
||||
if ( filters == null ) throw new IllegalArgumentException("BUG: Attempting to add null filters at" + this);
|
||||
for ( String f : filters )
|
||||
addFilter(f);
|
||||
}
|
||||
|
||||
public void clearFilters() {
|
||||
if ( filters == NO_FILTERS )
|
||||
filters = new HashSet<String>();
|
||||
else
|
||||
filters.clear();
|
||||
}
|
||||
|
||||
public void setFilters(Collection<String> filters) {
|
||||
clearFilters();
|
||||
addFilters(filters);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Working with log error rates
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
public boolean hasNegLog10PError() {
|
||||
return getNegLog10PError() != NO_NEG_LOG_10PERROR;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the -1 * log10-based error estimate
|
||||
*/
|
||||
public double getNegLog10PError() { return negLog10PError; }
|
||||
public double getPhredScaledQual() { return getNegLog10PError() * 10; }
|
||||
|
||||
public void setNegLog10PError(double negLog10PError) {
|
||||
if ( negLog10PError < 0 && negLog10PError != NO_NEG_LOG_10PERROR ) throw new IllegalArgumentException("BUG: negLog10PError cannot be < than 0 : " + negLog10PError);
|
||||
if ( Double.isInfinite(negLog10PError) ) throw new IllegalArgumentException("BUG: negLog10PError should not be Infinity");
|
||||
if ( Double.isNaN(negLog10PError) ) throw new IllegalArgumentException("BUG: negLog10PError should not be NaN");
|
||||
|
||||
this.negLog10PError = negLog10PError;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Working with attributes
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
public void clearAttributes() {
|
||||
if ( attributes == NO_ATTRIBUTES )
|
||||
attributes = new HashMap<String, Object>();
|
||||
else
|
||||
this.attributes.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the attribute map
|
||||
*/
|
||||
public Map<String, Object> getAttributes() {
|
||||
return Collections.unmodifiableMap(attributes);
|
||||
}
|
||||
|
||||
// todo -- define common attributes as enum
|
||||
|
||||
public void setAttributes(Map<String, ?> map) {
|
||||
clearAttributes();
|
||||
putAttributes(map);
|
||||
}
|
||||
|
||||
public void putAttribute(String key, Object value) {
|
||||
putAttribute(key, value, false);
|
||||
}
|
||||
|
||||
public void putAttribute(String key, Object value, boolean allowOverwrites) {
|
||||
if ( hasAttribute(key) && ! allowOverwrites )
|
||||
throw new StingException("Attempting to overwrite key->value binding: key = " + key + " this = " + this);
|
||||
|
||||
if ( attributes == NO_ATTRIBUTES ) // immutable -> mutable
|
||||
attributes = new HashMap<String, Object>(attributes);
|
||||
|
||||
this.attributes.put(key, value);
|
||||
}
|
||||
|
||||
public void removeAttribute(String key) {
|
||||
if ( attributes == NO_ATTRIBUTES ) // immutable -> mutable
|
||||
attributes = new HashMap<String, Object>(attributes);
|
||||
this.attributes.remove(key);
|
||||
}
|
||||
|
||||
public void putAttributes(Map<String, ?> map) {
|
||||
if ( map != null ) {
|
||||
for ( Map.Entry<String, ?> elt : map.entrySet() ) {
|
||||
putAttribute(elt.getKey(), elt.getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public boolean hasAttribute(String key) {
|
||||
return attributes.containsKey(key);
|
||||
}
|
||||
|
||||
public int getNumAttributes() {
|
||||
return attributes.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param key the attribute key
|
||||
*
|
||||
* @return the attribute value for the given key (or null if not set)
|
||||
*/
|
||||
public Object getAttribute(String key) {
|
||||
return attributes.get(key);
|
||||
}
|
||||
|
||||
public Object getAttribute(String key, Object defaultValue) {
|
||||
if ( hasAttribute(key) )
|
||||
return attributes.get(key);
|
||||
else
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
// public AttributedObject getAttributes(Collection<Object> keys) {
|
||||
// AttributedObject selected = new AttributedObject();
|
||||
//
|
||||
// for ( Object key : keys )
|
||||
// selected.putAttribute(key, this.getAttribute(key));
|
||||
//
|
||||
// return selected;
|
||||
// }
|
||||
|
||||
public String getAttributeAsString(String key) { return (String.valueOf(getAttribute(key))); }
|
||||
public int getAttributeAsInt(String key) { return (Integer)getAttribute(key); }
|
||||
public double getAttributeAsDouble(String key) { return (Double)getAttribute(key); }
|
||||
|
||||
public String getAttributeAsString(String key, String defaultValue) { return (String)getAttribute(key, defaultValue); }
|
||||
public int getAttributeAsInt(String key, int defaultValue) { return (Integer)getAttribute(key, defaultValue); }
|
||||
public double getAttributeAsDouble(String key, double defaultValue) { return (Double)getAttribute(key, defaultValue); }
|
||||
}
|
||||
|
|
@ -1,75 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.contexts.variantcontext;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* This class emcompasses all the basic information about a genotype. It is immutable.
|
||||
*
|
||||
* @author Mark DePristo
|
||||
*/
|
||||
public class MutableGenotype extends Genotype {
|
||||
public MutableGenotype(Genotype parent) {
|
||||
super(parent.getSampleName(), parent.getAlleles(), parent.getNegLog10PError(), parent.getFilters(), parent.getAttributes(), parent.genotypesArePhased());
|
||||
}
|
||||
|
||||
public MutableGenotype(String sampleName, Genotype parent) {
|
||||
super(sampleName, parent.getAlleles(), parent.getNegLog10PError(), parent.getFilters(), parent.getAttributes(), parent.genotypesArePhased());
|
||||
}
|
||||
|
||||
|
||||
public MutableGenotype(String sampleName, List<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, boolean genotypesArePhased) {
|
||||
super(sampleName, alleles, negLog10PError, filters, attributes, genotypesArePhased);
|
||||
}
|
||||
|
||||
public MutableGenotype(String sampleName, List<Allele> alleles, double negLog10PError) {
|
||||
super(sampleName, alleles, negLog10PError);
|
||||
}
|
||||
|
||||
public MutableGenotype(String sampleName, List<Allele> alleles) {
|
||||
super(sampleName, alleles);
|
||||
}
|
||||
|
||||
public Genotype unmodifiableGenotype() {
|
||||
return new Genotype(getSampleName(), getAlleles(), getNegLog10PError(), getFilters(), getAttributes(), genotypesArePhased());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @param alleles list of alleles
|
||||
*/
|
||||
public void setAlleles(List<Allele> alleles) {
|
||||
this.alleles = new ArrayList<Allele>(alleles);
|
||||
|
||||
// todo -- add validation checking here
|
||||
|
||||
if ( alleles == null ) throw new IllegalArgumentException("BUG: alleles cannot be null in setAlleles");
|
||||
if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0 in setAlleles");
|
||||
|
||||
int nNoCalls = 0;
|
||||
for ( Allele allele : alleles ) { nNoCalls += allele.isNoCall() ? 1 : 0; }
|
||||
if ( nNoCalls > 0 && nNoCalls != alleles.size() )
|
||||
throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this);
|
||||
|
||||
for ( Allele allele : alleles )
|
||||
if ( allele == null ) throw new IllegalArgumentException("BUG: Cannot add a null allele to a genotype");
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// InferredGeneticContext mutation operators
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
public void setName(String name) { commonInfo.setName(name); }
|
||||
public void addFilter(String filter) { commonInfo.addFilter(filter); }
|
||||
public void addFilters(Collection<String> filters) { commonInfo.addFilters(filters); }
|
||||
public void clearFilters() { commonInfo.clearFilters(); }
|
||||
public void setFilters(Collection<String> filters) { commonInfo.setFilters(filters); }
|
||||
public void setAttributes(Map<String, ?> map) { commonInfo.setAttributes(map); }
|
||||
public void putAttribute(String key, Object value) { commonInfo.putAttribute(key, value); }
|
||||
public void removeAttribute(String key) { commonInfo.removeAttribute(key); }
|
||||
public void putAttributes(Map<String, ?> map) { commonInfo.putAttributes(map); }
|
||||
public void setNegLog10PError(double negLog10PError) { commonInfo.setNegLog10PError(negLog10PError); }
|
||||
public void putAttribute(String key, Object value, boolean allowOverwrites) { commonInfo.putAttribute(key, value, allowOverwrites); }
|
||||
|
||||
}
|
||||
|
|
@ -1,209 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.contexts.variantcontext;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Mutable version of VariantContext
|
||||
*
|
||||
* @author depristo
|
||||
*/
|
||||
public class MutableVariantContext extends VariantContext {
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// constructors
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
public MutableVariantContext(String name, GenomeLoc loc, Collection<Allele> alleles, Collection<Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes) {
|
||||
super(name, loc, alleles, genotypes, negLog10PError, filters, attributes);
|
||||
}
|
||||
|
||||
public MutableVariantContext(String name, GenomeLoc loc, Collection<Allele> alleles, Map<String, Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes) {
|
||||
super(name, loc, alleles, genotypes, negLog10PError, filters, attributes);
|
||||
}
|
||||
|
||||
public MutableVariantContext(String name, GenomeLoc loc, Collection<Allele> alleles) {
|
||||
this(name, loc, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null);
|
||||
}
|
||||
|
||||
public MutableVariantContext(String name, GenomeLoc loc, Collection<Allele> alleles, Collection<Genotype> genotypes) {
|
||||
this(name, loc, alleles, genotypes, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null);
|
||||
}
|
||||
|
||||
public MutableVariantContext(VariantContext parent) {
|
||||
this(parent.getName(), parent.getLocation(), parent.getAlleles(), parent.getGenotypes(), parent.getNegLog10PError(), parent.getFilters(), parent.getAttributes());
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the alleles segregating in this context to the collect of alleles. Each of which must be unique according
|
||||
* to equals() in Allele. Validate() should be called when you are done modifying the context.
|
||||
*
|
||||
* @param alleles
|
||||
*/
|
||||
public void setAlleles(Collection<Allele> alleles) {
|
||||
this.alleles.clear();
|
||||
for ( Allele a : alleles )
|
||||
addAllele(a);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds allele to the segregating allele list in this context to the collection of alleles. The new
|
||||
* allele must be be unique according to equals() in Allele.
|
||||
* Validate() should be called when you are done modifying the context.
|
||||
*
|
||||
* @param allele
|
||||
*/
|
||||
public void addAllele(Allele allele) {
|
||||
final boolean allowDuplicates = false; // used to be a parameter
|
||||
|
||||
type = null;
|
||||
|
||||
for ( Allele a : alleles ) {
|
||||
if ( a.basesMatch(allele) && ! allowDuplicates )
|
||||
throw new IllegalArgumentException("Duplicate allele added to VariantContext" + this);
|
||||
}
|
||||
|
||||
// we are a novel allele
|
||||
alleles.add(allele);
|
||||
}
|
||||
|
||||
public void clearGenotypes() {
|
||||
this.genotypes.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds this single genotype to the context, not allowing duplicate genotypes to be added
|
||||
* @param genotype
|
||||
*/
|
||||
public void addGenotypes(Genotype genotype) {
|
||||
putGenotype(genotype.getSampleName(), genotype, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds these genotypes to the context, not allowing duplicate genotypes to be added
|
||||
* @param genotypes
|
||||
*/
|
||||
public void addGenotypes(Collection<Genotype> genotypes) {
|
||||
for ( Genotype g : genotypes ) {
|
||||
addGenotype(g);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds these genotype to the context, not allowing duplicate genotypes to be added.
|
||||
* @param genotypes
|
||||
*/
|
||||
public void addGenotypes(Map<String, Genotype> genotypes) {
|
||||
|
||||
for ( Map.Entry<String, Genotype> elt : genotypes.entrySet() ) {
|
||||
addGenotype(elt.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds these genotypes to the context.
|
||||
*
|
||||
* @param genotypes
|
||||
*/
|
||||
public void putGenotypes(Map<String, Genotype> genotypes) {
|
||||
for ( Map.Entry<String, Genotype> g : genotypes.entrySet() )
|
||||
putGenotype(g.getKey(), g.getValue());
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds these genotypes to the context.
|
||||
*
|
||||
* @param genotypes
|
||||
*/
|
||||
public void putGenotypes(Collection<Genotype> genotypes) {
|
||||
for ( Genotype g : genotypes )
|
||||
putGenotype(g);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds this genotype to the context, throwing an error if it's already bound.
|
||||
*
|
||||
* @param genotype
|
||||
*/
|
||||
public void addGenotype(Genotype genotype) {
|
||||
addGenotype(genotype.getSampleName(), genotype);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds this genotype to the context, throwing an error if it's already bound.
|
||||
*
|
||||
* @param genotype
|
||||
*/
|
||||
public void addGenotype(String sampleName, Genotype genotype) {
|
||||
putGenotype(sampleName, genotype, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds this genotype to the context.
|
||||
*
|
||||
* @param genotype
|
||||
*/
|
||||
public void putGenotype(Genotype genotype) {
|
||||
putGenotype(genotype.getSampleName(), genotype);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds this genotype to the context.
|
||||
*
|
||||
* @param genotype
|
||||
*/
|
||||
public void putGenotype(String sampleName, Genotype genotype) {
|
||||
putGenotype(sampleName, genotype, true);
|
||||
}
|
||||
|
||||
private void putGenotype(String sampleName, Genotype genotype, boolean allowOverwrites) {
|
||||
if ( hasGenotype(sampleName) && ! allowOverwrites )
|
||||
throw new StingException("Attempting to overwrite sample->genotype binding: " + sampleName + " this=" + this);
|
||||
|
||||
if ( ! sampleName.equals(genotype.getSampleName()) )
|
||||
throw new StingException("Sample name doesn't equal genotype.getSample(): " + sampleName + " genotype=" + genotype);
|
||||
|
||||
this.genotypes.put(sampleName, genotype);
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes the binding from sampleName to genotype. If this doesn't exist, throws an IllegalArgumentException
|
||||
* @param sampleName
|
||||
*/
|
||||
public void removeGenotype(String sampleName) {
|
||||
if ( ! this.genotypes.containsKey(sampleName) )
|
||||
throw new IllegalArgumentException("Sample name isn't contained in genotypes " + sampleName + " genotypes =" + genotypes);
|
||||
|
||||
this.genotypes.remove(sampleName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes genotype from the context. If this doesn't exist, throws an IllegalArgumentException
|
||||
* @param genotype
|
||||
*/
|
||||
public void removeGenotype(Genotype genotype) {
|
||||
removeGenotype(genotype.getSampleName());
|
||||
}
|
||||
|
||||
// todo -- add replace genotype routine
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// InferredGeneticContext mutation operators
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
public void setName(String name) { commonInfo.setName(name); }
|
||||
public void addFilter(String filter) { commonInfo.addFilter(filter); }
|
||||
public void addFilters(Collection<String> filters) { commonInfo.addFilters(filters); }
|
||||
public void clearFilters() { commonInfo.clearFilters(); }
|
||||
public void setFilters(Collection<String> filters) { commonInfo.setFilters(filters); }
|
||||
public void setAttributes(Map<String, ?> map) { commonInfo.setAttributes(map); }
|
||||
public void putAttribute(String key, Object value) { commonInfo.putAttribute(key, value); }
|
||||
public void removeAttribute(String key) { commonInfo.removeAttribute(key); }
|
||||
public void putAttributes(Map<String, ?> map) { commonInfo.putAttributes(map); }
|
||||
public void setNegLog10PError(double negLog10PError) { commonInfo.setNegLog10PError(negLog10PError); }
|
||||
public void putAttribute(String key, Object value, boolean allowOverwrites) { commonInfo.putAttribute(key, value, allowOverwrites); }
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -26,16 +26,59 @@ package org.broadinstitute.sting.gatk.contexts.variantcontext;
|
|||
import java.io.Serializable;
|
||||
import java.util.*;
|
||||
import org.apache.commons.jexl2.*;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broad.tribble.util.variantcontext.*;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.genotype.HardyWeinbergCalculation;
|
||||
import org.broad.tribble.vcf.VCFConstants;
|
||||
|
||||
public class VariantContextUtils {
|
||||
final public static JexlEngine engine = new JexlEngine();
|
||||
|
||||
/**
|
||||
* Create a new VariantContext
|
||||
*
|
||||
* @param name name
|
||||
* @param loc location
|
||||
* @param alleles alleles
|
||||
* @param genotypes genotypes set
|
||||
* @param negLog10PError qual
|
||||
* @param filters filters: use null for unfiltered and empty set for passes filters
|
||||
* @param attributes attributes
|
||||
*/
|
||||
public static VariantContext toVC(String name, GenomeLoc loc, Collection<Allele> alleles, Collection<Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes) {
|
||||
return new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes != null ? VariantContext.genotypeCollectionToMap(new TreeMap<String, Genotype>(), genotypes) : null, negLog10PError, filters, attributes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new variant context without genotypes and no Perror, no filters, and no attributes
|
||||
* @param name name
|
||||
* @param loc location
|
||||
* @param alleles alleles
|
||||
*/
|
||||
public static VariantContext toVC(String name, GenomeLoc loc, Collection<Allele> alleles) {
|
||||
return new VariantContext (name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, VariantContext.NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new variant context without genotypes and no Perror, no filters, and no attributes
|
||||
* @param name name
|
||||
* @param loc location
|
||||
* @param alleles alleles
|
||||
* @param genotypes genotypes
|
||||
*/
|
||||
public static VariantContext toVC(String name, GenomeLoc loc, Collection<Allele> alleles, Collection<Genotype> genotypes) {
|
||||
return new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy constructor
|
||||
*
|
||||
* @param other the VariantContext to copy
|
||||
*/
|
||||
public static VariantContext toVC(VariantContext other) {
|
||||
return new VariantContext(other.getName(), other.getChr(), other.getStart(), other.getEnd(), other.getAlleles(), other.getGenotypes(), other.getNegLog10PError(), other.getFilters(), other.getAttributes());
|
||||
}
|
||||
|
||||
/**
|
||||
* A simple but common wrapper for matching VariantContext objects using JEXL expressions
|
||||
*/
|
||||
|
|
@ -230,7 +273,7 @@ public class VariantContextUtils {
|
|||
// establish the baseline info from the first VC
|
||||
VariantContext first = VCs.get(0);
|
||||
String name = first.getName();
|
||||
GenomeLoc loc = first.getLocation();
|
||||
GenomeLoc loc = getLocation(first);
|
||||
|
||||
Set<Allele> alleles = new TreeSet<Allele>();
|
||||
Map<String, Genotype> genotypes = new TreeMap<String, Genotype>();
|
||||
|
|
@ -250,11 +293,11 @@ public class VariantContextUtils {
|
|||
// cycle through and add info from the other VCs, making sure the loc/reference matches
|
||||
|
||||
for ( VariantContext vc : VCs ) {
|
||||
if ( loc.getStart() != vc.getLocation().getStart() ) // || !first.getReference().equals(vc.getReference()) )
|
||||
if ( loc.getStart() != vc.getStart() ) // || !first.getReference().equals(vc.getReference()) )
|
||||
throw new StingException("BUG: attempting to merge VariantContexts with different start sites: first="+ first.toString() + " second=" + vc.toString());
|
||||
|
||||
if ( vc.getLocation().size() > loc.size() )
|
||||
loc = vc.getLocation(); // get the longest location
|
||||
if ( getLocation(vc).size() > loc.size() )
|
||||
loc = getLocation(vc); // get the longest location
|
||||
|
||||
nFiltered += vc.isFiltered() ? 1 : 0;
|
||||
nVariant += vc.isVariant() ? 1 : 0;
|
||||
|
|
@ -331,7 +374,7 @@ public class VariantContextUtils {
|
|||
if ( rsID != null )
|
||||
attributes.put(VariantContext.ID_KEY, rsID);
|
||||
|
||||
VariantContext merged = new VariantContext(name, loc, alleles, genotypes, negLog10PError, filters, attributes);
|
||||
VariantContext merged = new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, negLog10PError, filters, attributes);
|
||||
if ( printMessages && remapped ) System.out.printf("Remapped => %s%n", merged);
|
||||
return merged;
|
||||
}
|
||||
|
|
@ -483,7 +526,7 @@ public class VariantContextUtils {
|
|||
g.getFilters(),g.getAttributes(),g.genotypesArePhased()));
|
||||
|
||||
}
|
||||
return new VariantContext(inputVC.getName(), inputVC.getLocation(), alleles, genotypes, inputVC.getNegLog10PError(),
|
||||
return new VariantContext(inputVC.getName(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(),
|
||||
inputVC.getFilters(), attributes);
|
||||
|
||||
}
|
||||
|
|
@ -500,7 +543,7 @@ public class VariantContextUtils {
|
|||
boolean padVC;
|
||||
|
||||
// We need to pad a VC with a common base if the reference allele length is less than the vc location span.
|
||||
long locLength = inputVC.getLocation().size();
|
||||
long locLength = getLocation(inputVC).size();
|
||||
if (refAllele.length() == locLength)
|
||||
padVC = false;
|
||||
else if (refAllele.length() == locLength-1)
|
||||
|
|
@ -552,7 +595,7 @@ public class VariantContextUtils {
|
|||
g.getFilters(),g.getAttributes(),g.genotypesArePhased()));
|
||||
|
||||
}
|
||||
return new VariantContext(inputVC.getName(), inputVC.getLocation(), alleles, genotypes, inputVC.getNegLog10PError(),
|
||||
return new VariantContext(inputVC.getName(), inputVC.getChr(), inputVC.getStart(), inputVC.getEnd(), alleles, genotypes, inputVC.getNegLog10PError(),
|
||||
inputVC.getFilters(), attributes);
|
||||
|
||||
|
||||
|
|
@ -616,19 +659,19 @@ public class VariantContextUtils {
|
|||
}
|
||||
|
||||
public static VariantContext modifyGenotypes(VariantContext vc, Map<String, Genotype> genotypes) {
|
||||
return new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes());
|
||||
return new VariantContext(vc.getName(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes());
|
||||
}
|
||||
|
||||
public static VariantContext modifyLocation(VariantContext vc, GenomeLoc loc) {
|
||||
return new VariantContext(vc.getName(), loc, vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes());
|
||||
return new VariantContext(vc.getName(), loc.getContig(), loc.getStart(), loc.getStop(), vc.getAlleles(), vc.getGenotypes(), vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, vc.getAttributes());
|
||||
}
|
||||
|
||||
public static VariantContext modifyFilters(VariantContext vc, Set<String> filters) {
|
||||
return new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), filters, vc.getAttributes());
|
||||
return new VariantContext(vc.getName(), vc.getChr(), vc.getStart(), vc.getEnd() , vc.getAlleles(), vc.getGenotypes(), vc.getNegLog10PError(), filters, vc.getAttributes());
|
||||
}
|
||||
|
||||
public static VariantContext modifyAttributes(VariantContext vc, Map<String, Object> attributes) {
|
||||
return new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), vc.genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, attributes);
|
||||
return new VariantContext(vc.getName(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.getGenotypes(), vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, attributes);
|
||||
}
|
||||
|
||||
public static Genotype modifyName(Genotype g, String name) {
|
||||
|
|
@ -655,4 +698,34 @@ public class VariantContextUtils {
|
|||
|
||||
return VariantContextUtils.modifyGenotypes(vc, newGenotypes);
|
||||
}
|
||||
|
||||
public static BaseUtils.BaseSubstitutionType getSNPSubstitutionType(VariantContext context) {
|
||||
if (!context.isSNP() || !context.isBiallelic())
|
||||
throw new IllegalStateException("Requested SNP substitution type for bialleic non-SNP " + context);
|
||||
return BaseUtils.SNPSubstitutionType(context.getReference().getBases()[0], context.getAlternateAllele(0).getBases()[0]);
|
||||
}
|
||||
|
||||
/**
|
||||
* If this is a BiAlleic SNP, is it a transition?
|
||||
*/
|
||||
public static boolean isTransition(VariantContext context) {
|
||||
return getSNPSubstitutionType(context) == BaseUtils.BaseSubstitutionType.TRANSITION;
|
||||
}
|
||||
|
||||
/**
|
||||
* If this is a BiAlleic SNP, is it a transversion?
|
||||
*/
|
||||
public static boolean isTransversion(VariantContext context) {
|
||||
return getSNPSubstitutionType(context) == BaseUtils.BaseSubstitutionType.TRANSVERSION;
|
||||
}
|
||||
|
||||
/**
|
||||
* create a genome location, given a variant context
|
||||
* @param vc the variant context
|
||||
* @return the genomeLoc
|
||||
*/
|
||||
public static final GenomeLoc getLocation(VariantContext vc) {
|
||||
return GenomeLocParser.createGenomeLoc(vc.getChr(),(int)vc.getStart(),(int)vc.getEnd());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -25,7 +25,8 @@ package org.broadinstitute.sting.gatk.contexts.variantcontext;
|
|||
|
||||
import org.apache.commons.jexl2.JexlContext;
|
||||
import org.apache.commons.jexl2.MapContext;
|
||||
//import org.apache.commons.jexl2.JexlHelper;
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broad.tribble.vcf.VCFConstants;
|
||||
|
|
@ -58,8 +59,8 @@ class VariantJEXLContext implements JexlContext {
|
|||
private static Map<String, AttributeGetter> x = new HashMap<String, AttributeGetter>();
|
||||
|
||||
static {
|
||||
x.put("CHROM", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getLocation().getContig(); }});
|
||||
x.put("POS", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getLocation().getStart(); }});
|
||||
x.put("CHROM", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getChr(); }});
|
||||
x.put("POS", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getStart(); }});
|
||||
x.put("TYPE", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getType().toString(); }});
|
||||
x.put("QUAL", new AttributeGetter() { public Object get(VariantContext vc) { return 10 * vc.getNegLog10PError(); }});
|
||||
x.put("ALLELES", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getAlleles(); }});
|
||||
|
|
@ -164,8 +165,8 @@ class JEXLMap implements Map<VariantContextUtils.JexlVCMatchExp, Boolean> {
|
|||
|
||||
if ( vc != null ) {
|
||||
// create a mapping of what we know about the variant context, its Chromosome, positions, etc.
|
||||
infoMap.put("CHROM", vc.getLocation().getContig());
|
||||
infoMap.put("POS", String.valueOf(vc.getLocation().getStart()));
|
||||
infoMap.put("CHROM", VariantContextUtils.getLocation(vc).getContig());
|
||||
infoMap.put("POS", String.valueOf(VariantContextUtils.getLocation(vc).getStart()));
|
||||
infoMap.put("TYPE", vc.getType().toString());
|
||||
infoMap.put("QUAL", String.valueOf(vc.getPhredScaledQual()));
|
||||
|
||||
|
|
|
|||
|
|
@ -28,8 +28,8 @@ package org.broadinstitute.sting.gatk.io.storage;
|
|||
import java.io.*;
|
||||
import java.util.Set;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
|
|
|||
|
|
@ -28,9 +28,9 @@ package org.broadinstitute.sting.gatk.io.stubs;
|
|||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
|
|
|
|||
|
|
@ -1,36 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
public class HapMapROD extends TabularROD
|
||||
{
|
||||
public HapMapROD(final String name) {
|
||||
super(name);
|
||||
}
|
||||
|
||||
public GenomeLoc getLocation() {
|
||||
// For converting from Hg18 to b36 format:
|
||||
// return GenomeLocParser.createGenomeLoc(this.get("chrom").replaceAll("chr", ""), Long.parseLong(this.get("pos")));
|
||||
return GenomeLocParser.createGenomeLoc(this.get("chrom"), Long.parseLong(this.get("pos")));
|
||||
}
|
||||
|
||||
public String[] getSampleIDs() {
|
||||
ArrayList<String> header = getHeader();
|
||||
String[] sample_ids = new String[header.size()-11];
|
||||
for (int i = 11; i < header.size(); i++)
|
||||
sample_ids[i-11] = header.get(i);
|
||||
return sample_ids;
|
||||
}
|
||||
|
||||
public String[] getGenotypes() {
|
||||
ArrayList<String> header = getHeader();
|
||||
String[] genotypes = new String[header.size()-11];
|
||||
for (int i = 11; i < header.size(); i++)
|
||||
genotypes[i-11] = get(header.get(i));
|
||||
return genotypes;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
|
|
|
|||
|
|
@ -4,12 +4,14 @@ import edu.mit.broad.picard.genotype.DiploidGenotype;
|
|||
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
|
||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broad.tribble.gelitext.GeliTextFeature;
|
||||
import org.broad.tribble.hapmap.HapMapFeature;
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.MutableGenotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableGenotype;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.genotype.CalledGenotype;
|
||||
|
|
@ -42,9 +44,8 @@ public class VariantContextAdaptors {
|
|||
|
||||
static {
|
||||
adaptors.put(DbSNPFeature.class, new DBSnpAdaptor());
|
||||
adaptors.put(VCFRecord.class, new VCFRecordAdaptor());
|
||||
adaptors.put(PlinkRod.class, new PlinkRodAdaptor());
|
||||
adaptors.put(HapMapROD.class, new HapMapAdaptor());
|
||||
adaptors.put(HapMapFeature.class, new HapMapAdaptor());
|
||||
adaptors.put(GeliTextFeature.class, new GeliTextAdaptor());
|
||||
adaptors.put(rodGELI.class, new GeliAdaptor());
|
||||
adaptors.put(VariantContext.class, new VariantContextAdaptor());
|
||||
|
|
@ -110,128 +111,14 @@ public class VariantContextAdaptors {
|
|||
Map<String, String> attributes = new HashMap<String, String>();
|
||||
attributes.put(VariantContext.ID_KEY, dbsnp.getRsID());
|
||||
Collection<Genotype> genotypes = null;
|
||||
VariantContext vc = new VariantContext(name, GenomeLocParser.createGenomeLoc(dbsnp.getChr(),dbsnp.getStart(),dbsnp.getEnd()), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes);
|
||||
VariantContext vc = new VariantContext(name, dbsnp.getChr(),dbsnp.getStart(),dbsnp.getEnd(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes);
|
||||
return vc;
|
||||
} else
|
||||
return null; // can't handle anything else
|
||||
}
|
||||
}
|
||||
|
||||
private static class VCFRecordAdaptor extends VCAdaptor {
|
||||
VariantContext convert(String name, Object input, ReferenceContext ref) {
|
||||
return vcfToVariantContext(name, (VCFRecord)input, ref);
|
||||
}
|
||||
}
|
||||
|
||||
private static VariantContext vcfToVariantContext(String name, VCFRecord vcf, ReferenceContext ref) {
|
||||
if ( vcf.isReference() || vcf.isSNP() || vcf.isIndel() ) {
|
||||
// add the reference allele
|
||||
if ( ! Allele.acceptableAlleleBases(vcf.getReference(),true) ) {
|
||||
System.out.printf("Excluding vcf record %s%n", vcf);
|
||||
return null;
|
||||
}
|
||||
|
||||
Set<String> filters = vcf.isFiltered() ? new HashSet<String>(Arrays.asList(vcf.getFilteringCodes())) : null;
|
||||
Map<String, String> attributes = new HashMap<String, String>(vcf.getInfoValues());
|
||||
attributes.put(VariantContext.ID_KEY, vcf.getID());
|
||||
|
||||
// add all of the alt alleles
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
Allele refAllele = determineRefAllele(vcf, ref);
|
||||
alleles.add(refAllele);
|
||||
|
||||
for ( VCFGenotypeEncoding alt : vcf.getAlternateAlleles() ) {
|
||||
if ( ! Allele.acceptableAlleleBases(alt.getBases(),false) ) {
|
||||
//System.out.printf("Excluding vcf record %s%n", vcf);
|
||||
return null;
|
||||
}
|
||||
|
||||
Allele allele;
|
||||
// special case: semi-deletion
|
||||
if ( vcf.isDeletion() && refAllele.length() > alt.getLength() ) {
|
||||
byte[] semiDeletion = new byte[refAllele.length() - alt.getLength()];
|
||||
System.arraycopy(ref.getBases(), alt.getLength(), semiDeletion, 0, refAllele.length() - alt.getLength());
|
||||
allele = Allele.create(new String(semiDeletion), false);
|
||||
} else {
|
||||
allele = Allele.create(alt.getBases(), false);
|
||||
}
|
||||
if ( ! allele.isNoCall() )
|
||||
alleles.add(allele);
|
||||
}
|
||||
|
||||
Map<String, Genotype> genotypes = new HashMap<String, Genotype>();
|
||||
for ( VCFGenotypeRecord vcfG : vcf.getVCFGenotypeRecords() ) {
|
||||
List<Allele> genotypeAlleles = new ArrayList<Allele>();
|
||||
for ( VCFGenotypeEncoding s : vcfG.getAlleles() ) {
|
||||
Allele a = Allele.getMatchingAllele(alleles, s.getBases());
|
||||
if ( a == null ) {
|
||||
if ( vcf.isIndel() )
|
||||
genotypeAlleles.add(refAllele);
|
||||
else
|
||||
throw new StingException("Invalid VCF genotype allele " + s + " in VCF " + vcf);
|
||||
} else {
|
||||
genotypeAlleles.add(a);
|
||||
}
|
||||
}
|
||||
|
||||
Map<String, String> fields = new HashMap<String, String>();
|
||||
for ( Map.Entry<String, String> e : vcfG.getFields().entrySet() ) {
|
||||
// todo -- fixme if we put GQ and FT into key itself
|
||||
if ( ! e.getKey().equals(VCFConstants.GENOTYPE_QUALITY_KEY) && ! e.getKey().equals(VCFConstants.GENOTYPE_FILTER_KEY) )
|
||||
fields.put(e.getKey(), e.getValue());
|
||||
}
|
||||
|
||||
Set<String> genotypeFilters = new HashSet<String>();
|
||||
if ( vcfG.isFiltered() ) // setup the genotype filter fields
|
||||
genotypeFilters.addAll(Arrays.asList(vcfG.getFields().get(VCFConstants.GENOTYPE_FILTER_KEY).split(";")));
|
||||
|
||||
double qual = vcfG.isMissingQual() ? VariantContext.NO_NEG_LOG_10PERROR : vcfG.getNegLog10PError();
|
||||
Genotype g = new Genotype(vcfG.getSampleName(), genotypeAlleles, qual, genotypeFilters, fields, vcfG.getPhaseType() == VCFGenotypeRecord.PHASE.PHASED);
|
||||
genotypes.put(g.getSampleName(), g);
|
||||
}
|
||||
|
||||
double qual = vcf.isMissingQual() ? VariantContext.NO_NEG_LOG_10PERROR : vcf.getNegLog10PError();
|
||||
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(vcf.getChr(),vcf.getStart());
|
||||
if ( vcf.isDeletion() )
|
||||
loc = GenomeLocParser.createGenomeLoc(loc.getContig(), loc.getStart(), loc.getStart()+refAllele.length()-1);
|
||||
|
||||
VariantContext vc = new VariantContext(name, loc, alleles, genotypes, qual, filters, attributes);
|
||||
return vc;
|
||||
} else
|
||||
return null; // can't handle anything else
|
||||
}
|
||||
|
||||
private static Allele determineRefAllele(VCFRecord vcf, ReferenceContext ref) {
|
||||
if ( ref == null )
|
||||
throw new StingException("Illegal determineRefAllele call!");
|
||||
|
||||
Allele refAllele;
|
||||
if ( vcf.isInsertion() ) {
|
||||
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
||||
// } else if ( ref == null ) {
|
||||
// refAllele = Allele.create(vcf.getReference(), true);
|
||||
} else if ( !vcf.isIndel() ) {
|
||||
refAllele = Allele.create(ref.getBase(), true);
|
||||
if ( (char)ref.getBase() != vcf.getReference().charAt(0) )
|
||||
throw new StingException("The VCF reference base (" + vcf.getReference().charAt(0) + ") doesn't match the actual reference base (" + (char)ref.getBase() + "); please check that you are using the appropriate reference file");
|
||||
} else if ( vcf.isDeletion() ) {
|
||||
int start = vcf.getPosition() - (int)ref.getWindow().getStart() + 1;
|
||||
int delLength = 0;
|
||||
for ( VCFGenotypeEncoding enc : vcf.getAlternateAlleles() ) {
|
||||
if ( enc.getLength() > delLength )
|
||||
delLength = enc.getLength();
|
||||
}
|
||||
if ( delLength > ref.getWindow().getStop() - vcf.getPosition() )
|
||||
throw new IllegalArgumentException("Length of deletion is larger than reference context provided at " + ref.getLocus());
|
||||
|
||||
refAllele = deletionAllele(ref, start, delLength);
|
||||
} else {
|
||||
throw new UnsupportedOperationException("Conversion of VCF type " + vcf.getType() + " is not supported.");
|
||||
}
|
||||
|
||||
return refAllele;
|
||||
}
|
||||
|
||||
private static Allele deletionAllele(ReferenceContext ref, int start, int len) {
|
||||
byte[] deletion = new byte[len];
|
||||
|
|
@ -314,7 +201,7 @@ public class VariantContextAdaptors {
|
|||
// create the variant context
|
||||
try {
|
||||
GenomeLoc loc = GenomeLocParser.setStop(plink.getLocation(), plink.getLocation().getStop() + plink.getLength()-1);
|
||||
VariantContext vc = new VariantContext(plink.getVariantName(), loc, VCAlleles, genotypes);
|
||||
VariantContext vc = VariantContextUtils.toVC(plink.getVariantName(), loc, VCAlleles, genotypes);
|
||||
return vc;
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new IllegalArgumentException(e.getMessage() + "; please make sure that e.g. a sample isn't present more than one time in your ped file");
|
||||
|
|
@ -399,7 +286,7 @@ public class VariantContextAdaptors {
|
|||
// add the call to the genotype list, and then use this list to create a VariantContext
|
||||
genotypes.add(call);
|
||||
alleles.add(refAllele);
|
||||
VariantContext vc = new VariantContext(name, GenomeLocParser.createGenomeLoc(geli.getChr(),geli.getStart()), alleles, genotypes, geli.getLODBestToReference(), null, attributes);
|
||||
VariantContext vc = VariantContextUtils.toVC(name, GenomeLocParser.createGenomeLoc(geli.getChr(),geli.getStart()), alleles, genotypes, geli.getLODBestToReference(), null, attributes);
|
||||
return vc;
|
||||
} else
|
||||
return null; // can't handle anything else
|
||||
|
|
@ -472,7 +359,7 @@ public class VariantContextAdaptors {
|
|||
|
||||
// add the call to the genotype list, and then use this list to create a VariantContext
|
||||
genotypes.add(call);
|
||||
VariantContext vc = new VariantContext(name, ((rodGELI) input).getLocation(), alleles, genotypes, geli.getBestToReferenceLod(), null, attributes);
|
||||
VariantContext vc = VariantContextUtils.toVC(name, ((rodGELI) input).getLocation(), alleles, genotypes, geli.getBestToReferenceLod(), null, attributes);
|
||||
return vc;
|
||||
|
||||
}
|
||||
|
|
@ -506,7 +393,7 @@ public class VariantContextAdaptors {
|
|||
if ( ref == null )
|
||||
throw new UnsupportedOperationException("Conversion from HapMap to VariantContext requires a reference context");
|
||||
|
||||
HapMapROD hapmap = (HapMapROD)input;
|
||||
HapMapFeature hapmap = (HapMapFeature)input;
|
||||
|
||||
// add the reference allele
|
||||
HashSet<Allele> alleles = new HashSet<Allele>();
|
||||
|
|
@ -539,7 +426,7 @@ public class VariantContextAdaptors {
|
|||
genotypes.put(samples[i], g);
|
||||
}
|
||||
|
||||
VariantContext vc = new VariantContext(name, hapmap.getLocation(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, new HashMap<String, String>());
|
||||
VariantContext vc = new VariantContext(name, hapmap.getChr(), hapmap.getStart(), hapmap.getEnd(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, new HashMap<String, String>());
|
||||
return vc;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,15 +27,11 @@ package org.broadinstitute.sting.gatk.refdata.features.beagle;
|
|||
import org.broad.tribble.Feature;
|
||||
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
|
||||
public class BeagleFeature implements Feature {
|
||||
|
||||
|
|
|
|||
|
|
@ -1,602 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.refdata.features.vcf4;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broad.tribble.exception.CodecLineParsingException;
|
||||
import org.broad.tribble.readers.LineReader;
|
||||
import org.broad.tribble.util.ParsingUtils;
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
* a feature codec for the VCF 4 specification. Our aim is to read in the records and convert to VariantContext as
|
||||
* quickly as possible, relying on VariantContext to do the validation of any contradictory (or malformed) record parameters.
|
||||
*/
|
||||
public class VCF4Codec implements FeatureCodec, NameAwareCodec {
|
||||
|
||||
|
||||
// we have to store the list of strings that make up the header until they're needed
|
||||
private VCFHeader header = null;
|
||||
|
||||
private VCFHeaderVersion version = VCFHeaderVersion.VCF4_0;
|
||||
// used to convert the index of the alternate allele in genotypes to a integer index
|
||||
private static int ZERO_CHAR = (byte)'0';
|
||||
|
||||
// a mapping of the allele
|
||||
private static Map<String, List<Allele>> alleleMap = new HashMap<String, List<Allele>>(3);
|
||||
|
||||
// cache the genotyope values
|
||||
private static String[] GTValueArray = new String[100];
|
||||
|
||||
// for performance testing purposes
|
||||
public static boolean validate = true;
|
||||
|
||||
// a key optimization -- we need a per thread string parts array, so we don't allocate a big array over and over
|
||||
// todo: make this thread safe?
|
||||
private String[] parts = null;
|
||||
|
||||
// for performance we cache the hashmap of filter encodings for quick lookup
|
||||
private HashMap<String,LinkedHashSet<String>> filterHash = new HashMap<String,LinkedHashSet<String>>();
|
||||
|
||||
// a set of the genotype keys?
|
||||
private String[] genotypeKeyArray = new String[100];
|
||||
|
||||
// a mapping of the VCF fields to their type, filter fields, and format fields, for quick lookup to validate against
|
||||
TreeMap<String, VCFHeaderLineType> infoFields = new TreeMap<String, VCFHeaderLineType>();
|
||||
TreeMap<String, VCFHeaderLineType> formatFields = new TreeMap<String, VCFHeaderLineType>();
|
||||
ArrayList<String> filterFields = new ArrayList<String>();
|
||||
|
||||
// do we want to validate the info, format, and filter fields
|
||||
private final boolean validateFromHeader = false;
|
||||
|
||||
// we store a name to give to each of the variant contexts we emit
|
||||
private String name = "Unknown";
|
||||
|
||||
private int lineNo = 0;
|
||||
|
||||
// some classes need to transform the line before
|
||||
private LineTransform transformer = null;
|
||||
|
||||
/**
|
||||
* @param reader the line reader to take header lines from
|
||||
* @return the number of header lines
|
||||
*/
|
||||
@Override
|
||||
public Object readHeader(LineReader reader) {
|
||||
List<String> headerStrings = new ArrayList<String>();
|
||||
|
||||
String line;
|
||||
try {
|
||||
boolean foundHeaderVersion = false;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
lineNo++;
|
||||
if (line.startsWith(VCFHeader.METADATA_INDICATOR)) {
|
||||
String[] lineFields = line.substring(2).split("=");
|
||||
if (lineFields.length == 2 &&
|
||||
VCFHeaderVersion.isVersionString(lineFields[1]) && VCFHeaderVersion.isFormatString(lineFields[0])) {
|
||||
foundHeaderVersion = true;
|
||||
this.version = VCFHeaderVersion.toHeaderVersion(lineFields[1]);
|
||||
}
|
||||
headerStrings.add(line);
|
||||
}
|
||||
else if (line.startsWith(VCFHeader.HEADER_INDICATOR)) {
|
||||
if (!foundHeaderVersion) {
|
||||
throw new CodecLineParsingException("We never saw a header line specifying VCF version");
|
||||
}
|
||||
return createHeader(headerStrings, line);
|
||||
}
|
||||
else {
|
||||
throw new CodecLineParsingException("We never saw the required header line (starting with one #) for the input VCF file");
|
||||
}
|
||||
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("IO Exception ", e);
|
||||
}
|
||||
throw new CodecLineParsingException("We never saw the required header line (starting with one #) for the input VCF file");
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* create a VCF header
|
||||
* @param headerStrings a list of strings that represent all the ## entries
|
||||
* @param line the single # line (column names)
|
||||
* @return the count of header lines
|
||||
*/
|
||||
public Object createHeader(List<String> headerStrings, String line) {
|
||||
headerStrings.add(line);
|
||||
header = VCFReaderUtils.createHeader(headerStrings, this.version);
|
||||
|
||||
// setup our look-up lists for validation
|
||||
for ( VCFHeaderLine hl : header.getMetaData() ) {
|
||||
if ( hl instanceof VCFFilterHeaderLine )
|
||||
this.filterFields.add(((VCFFilterHeaderLine)hl).getName());
|
||||
if ( hl instanceof VCFFormatHeaderLine )
|
||||
this.formatFields.put(((VCFFormatHeaderLine)hl).getName(), ((VCFFormatHeaderLine)hl).getType());
|
||||
if ( hl instanceof VCFInfoHeaderLine )
|
||||
this.infoFields.put(((VCFInfoHeaderLine)hl).getName(), ((VCFInfoHeaderLine)hl).getType());
|
||||
}
|
||||
// sort the lists so we can binary search them later on
|
||||
Collections.sort(filterFields);
|
||||
|
||||
return header;
|
||||
}
|
||||
|
||||
/**
|
||||
* the fast decode function
|
||||
* @param line the line of text for the record
|
||||
* @return a feature, (not guaranteed complete) that has the correct start and stop
|
||||
*/
|
||||
public Feature decodeLoc(String line) {
|
||||
return reallyDecode(line, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* decode the line into a feature (VariantContext)
|
||||
* @param line the line
|
||||
* @return a VariantContext
|
||||
*/
|
||||
public Feature decode(String line) {
|
||||
return reallyDecode(line, true);
|
||||
}
|
||||
|
||||
private Feature reallyDecode(String line, boolean parseGenotypes) {
|
||||
// the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line
|
||||
if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null;
|
||||
|
||||
if (parts == null)
|
||||
parts = new String[header.getColumnCount()];
|
||||
|
||||
int nParts = ParsingUtils.split(line, parts, VCFConstants.FIELD_SEPARATOR.charAt(0));
|
||||
|
||||
// our header cannot be null, we need the genotype sample names and counts
|
||||
if (header == null) throw new IllegalStateException("VCF Header cannot be null");
|
||||
|
||||
// check to make sure the split resulted in the correct number of fields (8 + (1 + genotytpe counts if it has genotypes)
|
||||
if (nParts != header.getColumnCount())
|
||||
throw new IllegalArgumentException("we expected " + header.getColumnCount() + " columns and we got " + nParts + " for line " + line);
|
||||
|
||||
return parseVCFLine(parts, parseGenotypes);
|
||||
}
|
||||
|
||||
/**
|
||||
* create a an allele from an index and an array of alleles
|
||||
* @param index the index
|
||||
* @param alleles the alleles
|
||||
* @return an Allele
|
||||
*/
|
||||
private static Allele oneAllele(char index, List<Allele> alleles) {
|
||||
if ( index == VCFConstants.EMPTY_ALLELE.charAt(0) )
|
||||
return Allele.NO_CALL;
|
||||
int i = ((byte)index) - ZERO_CHAR;
|
||||
return alleles.get(i);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* parse genotype alleles from the genotype string
|
||||
* @param GT GT string
|
||||
* @param alleles list of possible alleles
|
||||
* @param cache cache of alleles for GT
|
||||
* @return the allele list for the GT string
|
||||
*/
|
||||
private List<Allele> parseGenotypeAlleles(String GT, List<Allele> alleles, Map<String, List<Allele>> cache) {
|
||||
// this should cache results [since they are immutable] and return a single object for each genotype
|
||||
if ( GT.length() != 3 && GT.length() != 1 )
|
||||
throw new VCFParserException("Unreasonable number of alleles: " + "GT=" + GT + " length=" + GT.length()); // 0/1 => barf on 10/0
|
||||
|
||||
List<Allele> GTAlleles = cache.get(GT);
|
||||
|
||||
if ( GTAlleles == null ) {
|
||||
Allele allele1 = oneAllele(GT.charAt(0), alleles);
|
||||
GTAlleles = GT.length() == 3 ? Arrays.asList(allele1, oneAllele(GT.charAt(2), alleles)) : Arrays.asList(allele1);
|
||||
cache.put(GT, GTAlleles);
|
||||
}
|
||||
|
||||
return GTAlleles;
|
||||
}
|
||||
|
||||
/**
|
||||
* parse out the info fields
|
||||
* @param infoField the fields
|
||||
* @param id the indentifier
|
||||
* @return a mapping of keys to objects
|
||||
*/
|
||||
private Map<String, Object> parseInfo(String infoField, String id) {
|
||||
Map<String, Object> attributes = new HashMap<String, Object>();
|
||||
|
||||
if ( !infoField.equals(VCFConstants.EMPTY_INFO_FIELD) ) {
|
||||
for ( String field : Utils.split(infoField, VCFConstants.INFO_FIELD_SEPARATOR) ) {
|
||||
String key;
|
||||
Object value;
|
||||
|
||||
int eqI = field.indexOf("=");
|
||||
if ( eqI != -1 ) {
|
||||
key = field.substring(0, eqI);
|
||||
String str = field.substring(eqI+1, field.length());
|
||||
|
||||
// lets see if the string contains a , separator
|
||||
if ( str.contains(",") )
|
||||
value = Arrays.asList(str.split(","));
|
||||
else
|
||||
value = str;
|
||||
} else {
|
||||
key = field;
|
||||
value = new Boolean(true);
|
||||
}
|
||||
|
||||
attributes.put(key, value);
|
||||
}
|
||||
}
|
||||
// validate the fields
|
||||
validateFields(attributes.keySet(), new ArrayList<String>(infoFields.keySet()));
|
||||
|
||||
attributes.put(VariantContext.ID_KEY, id);
|
||||
return attributes;
|
||||
}
|
||||
|
||||
/**
|
||||
* validate the attributes against the stored fields of the appopriate type
|
||||
* @param attributes the list of fields to check for inclusion against the field array
|
||||
* @param fields the master list; all attributes must be in this list to validate
|
||||
*/
|
||||
private void validateFields(Set<String> attributes, List<String> fields) {
|
||||
// validate the info fields
|
||||
if (validateFromHeader) {
|
||||
for (String attr : attributes)
|
||||
if (Collections.binarySearch(fields,attr) < 0)
|
||||
throw new VCFParserException("Unable to find field describing attribute " + attr);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* parse out the qual value
|
||||
* @param qualString the quality string
|
||||
* @return return a double
|
||||
*/
|
||||
private Double parseQual(String qualString) {
|
||||
if ( qualString.equals(VCFConstants.MISSING_VALUE_v4) || qualString.equals(VCFConstants.MISSING_QUALITY_v3) )
|
||||
return VariantContext.NO_NEG_LOG_10PERROR;
|
||||
return Double.valueOf(qualString) / 10.0;
|
||||
}
|
||||
|
||||
/**
|
||||
* parse out the alleles
|
||||
* @param ref the reference base
|
||||
* @param alts a string of alternates to break into alleles
|
||||
* @return a list of alleles, and a pair of the shortest and longest sequence
|
||||
*/
|
||||
private List<Allele> parseAlleles(String ref, String alts) {
|
||||
List<Allele> alleles = new ArrayList<Allele>(2); // we are almost always biallelic
|
||||
// ref
|
||||
if (!checkAllele(ref, true))
|
||||
throw new VCFParserException("Unable to parse out correct reference allele, we saw = " + ref);
|
||||
Allele refAllele = Allele.create(ref, true);
|
||||
alleles.add(refAllele);
|
||||
|
||||
if ( alts.indexOf(",") == -1 ) // only 1 alternatives, don't call string split
|
||||
parseSingleAllele(alleles, alts, false);
|
||||
else
|
||||
for ( String alt : Utils.split(alts, ",") )
|
||||
parseSingleAllele(alleles, alt, false);
|
||||
|
||||
return alleles;
|
||||
}
|
||||
|
||||
/**
|
||||
* check to make sure the allele is an acceptable allele
|
||||
* @param allele the allele to check
|
||||
* @param isRef are we the reference allele?
|
||||
* @return true if the allele is fine, false otherwise
|
||||
*/
|
||||
private boolean checkAllele(String allele,boolean isRef) {
|
||||
if (allele.contains("<")) {
|
||||
Utils.warnUser("We are currently unable to parse out CNV encodings in VCF, we saw the following allele = " + allele);
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
// check for VCF3.3 insertions or deletions
|
||||
if (this.version != VCFHeaderVersion.VCF4_0) {
|
||||
if ((allele.toUpperCase().charAt(0) == 'D') || (allele.toUpperCase().charAt(0) == 'D'))
|
||||
throw new VCFParserException("Insertions/Deletions are not supported when reading 3.x VCF's. Please" +
|
||||
" convert your file to VCF 4.0 using VCFTools, available at http://vcftools.sourceforge.net/index.html");
|
||||
}
|
||||
|
||||
if ( ! Allele.acceptableAlleleBases(allele,isRef) )
|
||||
throw new VCFParserException("Unparsable vcf record with allele " + allele);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* parse a single allele, given the allele list
|
||||
* @param alleles the alleles available
|
||||
* @param alt the allele to parse
|
||||
* @param isRef are we the reference allele?
|
||||
*/
|
||||
private void parseSingleAllele(List<Allele> alleles, String alt, boolean isRef) {
|
||||
if (!checkAllele(alt,isRef))
|
||||
throw new VCFParserException("Unable to parse out correct alt allele, we saw = " + alt);
|
||||
|
||||
Allele allele = Allele.create(alt, false);
|
||||
if ( ! allele.isNoCall() )
|
||||
alleles.add(allele);
|
||||
}
|
||||
|
||||
/**
|
||||
* parse the filter string, first checking to see if we already have parsed it in a previous attempt
|
||||
* @param filterString the string to parse
|
||||
* @return a set of the filters applied
|
||||
*/
|
||||
private Set<String> parseFilters(String filterString) {
|
||||
|
||||
// null for unfiltered
|
||||
if ( filterString.equals(VCFConstants.UNFILTERED) )
|
||||
return null;
|
||||
|
||||
// empty set for passes filters
|
||||
LinkedHashSet<String> fFields = new LinkedHashSet<String>();
|
||||
|
||||
if ( this.version == VCFHeaderVersion.VCF4_0 ) {
|
||||
if ( filterString.equals(VCFConstants.PASSES_FILTERS_v4) )
|
||||
return fFields;
|
||||
if ( filterString.equals(VCFConstants.PASSES_FILTERS_v3) )
|
||||
throw new StingException(VCFConstants.PASSES_FILTERS_v3 + " is an invalid filter name in vcf4.0");
|
||||
} else if ( filterString.equals(VCFConstants.PASSES_FILTERS_v3) ) {
|
||||
return fFields;
|
||||
}
|
||||
|
||||
// do we have the filter string cached?
|
||||
if ( filterHash.containsKey(filterString) )
|
||||
return filterHash.get(filterString);
|
||||
|
||||
// otherwise we have to parse and cache the value
|
||||
if ( filterString.indexOf(VCFConstants.FILTER_CODE_SEPARATOR) == -1 )
|
||||
fFields.add(filterString);
|
||||
else
|
||||
fFields.addAll(Utils.split(filterString, VCFConstants.FILTER_CODE_SEPARATOR));
|
||||
|
||||
filterHash.put(filterString, fFields);
|
||||
|
||||
validateFields(fFields, filterFields);
|
||||
return fFields;
|
||||
}
|
||||
|
||||
/**
|
||||
* parse out the VCF line
|
||||
*
|
||||
* @param parts the parts split up
|
||||
* @param parseGenotypes whether to parse genotypes or not
|
||||
* @return a variant context object
|
||||
*/
|
||||
private VariantContext parseVCFLine(String[] parts, boolean parseGenotypes) {
|
||||
// try {
|
||||
// increment the line count
|
||||
lineNo++;
|
||||
|
||||
// parse out the required fields
|
||||
String contig = parts[0];
|
||||
long pos = Long.valueOf(parts[1]);
|
||||
String id = parts[2];
|
||||
String ref = parts[3].toUpperCase();
|
||||
String alts = parts[4].toUpperCase();
|
||||
Double qual = parseQual(parts[5]);
|
||||
String filter = parts[6];
|
||||
String info = parts[7];
|
||||
|
||||
// get our alleles, filters, and setup an attribute map
|
||||
List<Allele> alleles = parseAlleles(ref, alts);
|
||||
Set<String> filters = parseFilters(filter);
|
||||
Map<String, Object> attributes = parseInfo(info, id);
|
||||
|
||||
// find out our current location, and clip the alleles down to their minimum length
|
||||
Pair<GenomeLoc, List<Allele>> locAndAlleles;
|
||||
if ( !isSingleNucleotideEvent(alleles) ) {
|
||||
locAndAlleles = clipAlleles(contig, pos, ref, alleles);
|
||||
} else {
|
||||
locAndAlleles = new Pair<GenomeLoc, List<Allele>>(GenomeLocParser.createGenomeLoc(contig, pos), alleles);
|
||||
}
|
||||
|
||||
// a map to store our genotypes
|
||||
Map<String, Genotype> genotypes = null;
|
||||
|
||||
// do we have genotyping data
|
||||
if (parts.length > 8 && parseGenotypes) {
|
||||
genotypes = createGenotypeMap(parts, locAndAlleles, 8);
|
||||
}
|
||||
|
||||
VariantContext vc = new VariantContext(name, locAndAlleles.first, locAndAlleles.second, genotypes, qual, filters, attributes);
|
||||
|
||||
// Trim bases of all alleles if necessary
|
||||
return VariantContextUtils.createVariantContextWithTrimmedAlleles(vc);
|
||||
}
|
||||
|
||||
private boolean isSingleNucleotideEvent(List<Allele> alleles) {
|
||||
for ( Allele a : alleles ) {
|
||||
if ( a.length() > 1 )
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
class VCFParserException extends StingException {
|
||||
public VCFParserException(String msg) {
|
||||
super("Line " + lineNo + " generated parser exception " + msg);
|
||||
}
|
||||
|
||||
public VCFParserException(String msg, Throwable throwable) {
|
||||
super("Line " + lineNo + " generated parser exception " + msg, throwable);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* create a genotype map
|
||||
* @param parts the string parts
|
||||
* @param locAndAlleles the locations and the list of alleles
|
||||
* @param formatFieldLocation the position in the parts array that the genotype strings start
|
||||
* @return a mapping of sample name to genotype object
|
||||
*/
|
||||
protected Map<String, Genotype> createGenotypeMap(String[] parts, Pair<GenomeLoc, List<Allele>> locAndAlleles, int formatFieldLocation) {
|
||||
Map<String, Genotype> genotypes = new LinkedHashMap<String, Genotype>(Math.max(parts.length - formatFieldLocation, 1));
|
||||
|
||||
// get the format keys
|
||||
int nGTKeys = ParsingUtils.split(parts[formatFieldLocation], genotypeKeyArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR.charAt(0));
|
||||
|
||||
// cycle through the sample names
|
||||
Iterator<String> sampleNameIterator = header.getGenotypeSamples().iterator();
|
||||
|
||||
// clear out our allele mapping
|
||||
alleleMap.clear();
|
||||
|
||||
// cycle through the genotype strings
|
||||
for (int genotypeOffset = formatFieldLocation + 1; genotypeOffset < parts.length; genotypeOffset++) {
|
||||
int GTValueSplitSize = ParsingUtils.split(parts[genotypeOffset], GTValueArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR.charAt(0));
|
||||
|
||||
double GTQual = VariantContext.NO_NEG_LOG_10PERROR;
|
||||
Set<String> genotypeFilters = null;
|
||||
Map<String, String> gtAttributes = null;
|
||||
String sampleName = sampleNameIterator.next();
|
||||
|
||||
// check to see if the value list is longer than the key list, which is a problem
|
||||
if (nGTKeys < GTValueSplitSize)
|
||||
throw new VCFParserException("Too few keys for compared to the value string " + sampleName + ", keys = " + parts[8] + " values = " + parts[genotypeOffset]);
|
||||
|
||||
int genotypeAlleleLocation = -1;
|
||||
if (nGTKeys >= 1) {
|
||||
gtAttributes = new HashMap<String, String>(nGTKeys - 1);
|
||||
for (int i = 0; i < nGTKeys; i++) {
|
||||
if (i >= GTValueSplitSize) {
|
||||
if (genotypeKeyArray[i].equals(VCFConstants.GENOTYPE_QUALITY_KEY))
|
||||
GTQual = parseQual(VCFConstants.MISSING_VALUE_v4);
|
||||
else if (genotypeKeyArray[i].equals(VCFConstants.GENOTYPE_FILTER_KEY))
|
||||
genotypeFilters = parseFilters(VCFConstants.MISSING_VALUE_v4);
|
||||
else
|
||||
gtAttributes.put(genotypeKeyArray[i],VCFConstants.MISSING_VALUE_v4);
|
||||
}
|
||||
else if (genotypeKeyArray[i].equals(VCFConstants.GENOTYPE_KEY))
|
||||
if (i != 0)
|
||||
throw new VCFParserException("Saw GT at position " + i + ", it must be at the first position for genotypes. At location = " + locAndAlleles.first);
|
||||
else
|
||||
genotypeAlleleLocation = i;
|
||||
else if (genotypeKeyArray[i].equals(VCFConstants.GENOTYPE_QUALITY_KEY))
|
||||
GTQual = parseQual(GTValueArray[i]);
|
||||
else if (genotypeKeyArray[i].equals(VCFConstants.GENOTYPE_FILTER_KEY))
|
||||
genotypeFilters = parseFilters(GTValueArray[i]);
|
||||
else {
|
||||
if (this.version != VCFHeaderVersion.VCF4_0 && GTValueArray[i].equals(VCFConstants.MISSING_GENOTYPE_QUALITY_v3))
|
||||
GTValueArray[i] = VCFConstants.MISSING_VALUE_v4;
|
||||
gtAttributes.put(genotypeKeyArray[i], GTValueArray[i]);
|
||||
}
|
||||
}
|
||||
// validate the format fields
|
||||
validateFields(gtAttributes.keySet(), new ArrayList<String>(formatFields.keySet()));
|
||||
}
|
||||
// check to make sure we found a gentoype field
|
||||
if (genotypeAlleleLocation < 0) throw new VCFParserException("Unable to find required field GT for record " + locAndAlleles.first);
|
||||
|
||||
// assuming allele list length in the single digits, could be bad. Check for > 1 for haploid genotypes
|
||||
boolean phased = GTValueArray[genotypeAlleleLocation].length() > 1 && GTValueArray[genotypeAlleleLocation].charAt(1) == '|';
|
||||
|
||||
// add it to the list
|
||||
genotypes.put(sampleName, new Genotype(sampleName,
|
||||
parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], locAndAlleles.second, alleleMap),
|
||||
GTQual,
|
||||
genotypeFilters,
|
||||
gtAttributes,
|
||||
phased));
|
||||
|
||||
}
|
||||
return genotypes;
|
||||
}
|
||||
|
||||
/**
|
||||
* clip the alleles, based on the reference
|
||||
*
|
||||
* @param contig our contig position
|
||||
* @param position the unadjusted start position (pre-clipping)
|
||||
* @param ref the reference string
|
||||
* @param unclippedAlleles the list of unclipped alleles
|
||||
* @return a list of alleles, clipped to the reference
|
||||
*/
|
||||
static Pair<GenomeLoc,List<Allele>> clipAlleles(String contig, long position, String ref, List<Allele> unclippedAlleles) {
|
||||
List<Allele> newAlleleList = new ArrayList<Allele>();
|
||||
|
||||
// find the preceeding string common to all alleles and the reference
|
||||
boolean clipping = true;
|
||||
for (Allele a : unclippedAlleles)
|
||||
if (a.length() < 1 || (a.getBases()[0] != ref.getBytes()[0])) {
|
||||
clipping = false;
|
||||
}
|
||||
int forwardClipping = (clipping) ? 1 : 0;
|
||||
|
||||
int reverseClipped = 0;
|
||||
clipping = true;
|
||||
while (clipping) {
|
||||
for (Allele a : unclippedAlleles)
|
||||
if (a.length() - reverseClipped <= forwardClipping || a.length() - forwardClipping == 0)
|
||||
clipping = false;
|
||||
else if (a.getBases()[a.length()-reverseClipped-1] != ref.getBytes()[ref.length()-reverseClipped-1])
|
||||
clipping = false;
|
||||
if (clipping) reverseClipped++;
|
||||
}
|
||||
|
||||
for (Allele a : unclippedAlleles)
|
||||
newAlleleList.add(Allele.create(Arrays.copyOfRange(a.getBases(),forwardClipping,a.getBases().length-reverseClipped),a.isReference()));
|
||||
|
||||
// the new reference length
|
||||
int refLength = ref.length() - reverseClipped;
|
||||
|
||||
return new Pair<GenomeLoc,List<Allele>>(GenomeLocParser.createGenomeLoc(contig,position,(position+Math.max(refLength - 1,0))),
|
||||
newAlleleList);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return the type of record
|
||||
*/
|
||||
@Override
|
||||
public Class getFeatureType() {
|
||||
return VariantContext.class;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the name of this codec
|
||||
* @return our set name
|
||||
*/
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
/**
|
||||
* set the name of this codec
|
||||
* @param name new name
|
||||
*/
|
||||
public void setName(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public static interface LineTransform {
|
||||
public String lineTransform(String line);
|
||||
}
|
||||
|
||||
public LineTransform getTransformer() {
|
||||
return transformer;
|
||||
}
|
||||
|
||||
public void setTransformer(LineTransform transformer) {
|
||||
this.transformer = transformer;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -55,7 +55,6 @@ public class RODTrackBuilder implements RMDTrackBuilder {
|
|||
// All known ROD types
|
||||
Types.put("GELI", rodGELI.class);
|
||||
Types.put("Table", TabularROD.class);
|
||||
Types.put("HapMap", HapMapROD.class);
|
||||
Types.put("Intervals", IntervalRod.class);
|
||||
Types.put("Plink", PlinkRod.class);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,7 +37,6 @@ import org.broad.tribble.index.linear.LinearIndexCreator;
|
|||
import org.broad.tribble.source.BasicFeatureSource;
|
||||
import org.broad.tribble.util.LittleEndianInputStream;
|
||||
import org.broad.tribble.util.LittleEndianOutputStream;
|
||||
import org.broad.tribble.vcf.NameAwareCodec;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.TribbleTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException;
|
||||
|
|
@ -70,6 +69,9 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
|
|||
// what index to use
|
||||
static boolean useLinearIndex = true;
|
||||
|
||||
// our bin size
|
||||
static int binSize = 1600;
|
||||
|
||||
// the linear index extension
|
||||
public static final String indexExtension = ".idx";
|
||||
|
||||
|
|
@ -239,11 +241,11 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
|
|||
Index index = IndexFactory.loadIndex(indexFile.getAbsolutePath());
|
||||
|
||||
// check if the file is up-to date (filestamp and version check)
|
||||
if (index.isCurrentVersion() && indexFile.lastModified() > inputFile.lastModified())
|
||||
if (/*index.isCurrentVersion() && */ indexFile.lastModified() > inputFile.lastModified())
|
||||
return index;
|
||||
else if (indexFile.lastModified() < inputFile.lastModified())
|
||||
logger.warn("Index file " + indexFile + " is out of date (index older than input file), deleting and updating the index file");
|
||||
else // we've loaded an old version of the index, we want to remove it
|
||||
else // we've loaded an old version of the index, we want to remove it <-- currently not used, but may re-enable
|
||||
logger.warn("Index file " + indexFile + " is out of date (old version), deleting and updating the index file");
|
||||
|
||||
// however we got here, remove the index and return null
|
||||
|
|
@ -300,10 +302,13 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
|
|||
// this can take a while, let them know what we're doing
|
||||
logger.info("Creating Tribble index in memory for file " + inputFile);
|
||||
IndexCreator creator;
|
||||
if (useLinearIndex)
|
||||
if (useLinearIndex) {
|
||||
creator = new LinearIndexCreator(inputFile,codec,null);
|
||||
else
|
||||
((LinearIndexCreator)creator).setBinWidth(binSize);
|
||||
} else {
|
||||
creator = new IntervalIndexCreator(inputFile, codec, null);
|
||||
((IntervalIndexCreator)creator).setFeaturesPerInterval(binSize);
|
||||
}
|
||||
return creator.createIndex();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.refdata.utils;
|
|||
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.iterators.CloseableTribbleIterator;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
|
|
@ -38,10 +39,10 @@ import java.util.Iterator;
|
|||
* a wrapper on Tribble feature iterators so that they produce GATKFeatures (which produce GenomeLocs)
|
||||
*/
|
||||
public class FeatureToGATKFeatureIterator implements CloseableIterator<GATKFeature> {
|
||||
private final CloseableIterator<Feature> iterator;
|
||||
private final CloseableTribbleIterator<Feature> iterator;
|
||||
private final String name;
|
||||
|
||||
public FeatureToGATKFeatureIterator(CloseableIterator<Feature> iter, String name) {
|
||||
public FeatureToGATKFeatureIterator(CloseableTribbleIterator<Feature> iter, String name) {
|
||||
this.name = name;
|
||||
this.iterator = iter;
|
||||
}
|
||||
|
|
@ -63,6 +64,6 @@ public class FeatureToGATKFeatureIterator implements CloseableIterator<GATKFeatu
|
|||
|
||||
@Override
|
||||
public void close() {
|
||||
this.iterator.close();
|
||||
// we don't close them anymore
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,12 +26,13 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broad.tribble.hapmap.HapMapFeature;
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
|
|
@ -117,8 +118,8 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
throw new IllegalStateException("No rod data is present");
|
||||
|
||||
Object rod = rods.get(0);
|
||||
if ( rod instanceof HapMapROD )
|
||||
samples.addAll(Arrays.asList(((HapMapROD)rod).getSampleIDs()));
|
||||
if ( rod instanceof HapMapFeature)
|
||||
samples.addAll(Arrays.asList(((HapMapFeature)rod).getSampleIDs()));
|
||||
else
|
||||
samples.addAll(vc.getSampleNames());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFHeaderLineType;
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.TabularROD;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
|
|
|
|||
|
|
@ -25,11 +25,12 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFHeaderLineType;
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import java.util.Map;
|
||||
|
||||
|
|
|
|||
|
|
@ -25,12 +25,13 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFHeaderLineType;
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFHeaderLineType;
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,9 +1,11 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFFormatHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.gatk.contexts.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||
import org.broadinstitute.sting.utils.pileup.*;
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFHeaderLineType;
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFHeaderLineType;
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFHeaderLineType;
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||
import org.broadinstitute.sting.utils.genotype.HardyWeinbergCalculation;
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFHeaderLineType;
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFHeaderLineType;
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFHeaderLineType;
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFHeaderLineType;
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFHeaderLineType;
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFConstants;
|
||||
import org.broad.tribble.vcf.VCFHeaderLineType;
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFHeaderLineType;
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
|
|
|
|||
|
|
@ -25,13 +25,14 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
import org.broad.tribble.vcf.VCFCompoundHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
|
|
@ -222,7 +223,7 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
|
|||
vcfWriter.add(annotatedVC, ref.getBase());
|
||||
} else {
|
||||
// check to see if the buffered context is different (in location) this context
|
||||
if ( indelBufferContext != null && ! indelBufferContext.iterator().next().getLocation().equals(annotatedVCs.iterator().next().getLocation()) ) {
|
||||
if ( indelBufferContext != null && ! VariantContextUtils.getLocation(indelBufferContext.iterator().next()).equals(VariantContextUtils.getLocation(annotatedVCs.iterator().next())) ) {
|
||||
for ( VariantContext annotatedVC : indelBufferContext )
|
||||
vcfWriter.add(annotatedVC, ref.getBase());
|
||||
indelBufferContext = annotatedVCs;
|
||||
|
|
|
|||
|
|
@ -38,12 +38,12 @@ import java.util.Set;
|
|||
import java.util.Map.Entry;
|
||||
|
||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
|
|
@ -252,7 +252,7 @@ public class VariantAnnotatorEngine {
|
|||
//Create a separate VariantContext (aka. output line) for each element in infoAnnotationOutputsList
|
||||
Collection<VariantContext> returnValue = new LinkedList<VariantContext>();
|
||||
for(Map<String, Object> infoAnnotationOutput : infoAnnotationOutputsList) {
|
||||
returnValue.add( new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, infoAnnotationOutput) );
|
||||
returnValue.add( new VariantContext(vc.getName(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), vc.filtersWereApplied() ? vc.getFilters() : null, infoAnnotationOutput) );
|
||||
}
|
||||
|
||||
return returnValue;
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFFormatHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.List;
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.List;
|
||||
|
|
|
|||
|
|
@ -25,9 +25,9 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.fasta;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.filters;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
public class ClusteredSnps {
|
||||
|
|
@ -27,7 +28,7 @@ public class ClusteredSnps {
|
|||
continue;
|
||||
|
||||
// find the nth variant
|
||||
GenomeLoc left = variants[i].getVariantContext().getLocation();
|
||||
GenomeLoc left = VariantContextUtils.getLocation(variants[i].getVariantContext());
|
||||
GenomeLoc right = null;
|
||||
int snpsSeen = 1;
|
||||
|
||||
|
|
@ -35,7 +36,7 @@ public class ClusteredSnps {
|
|||
while ( ++currentIndex < variants.length ) {
|
||||
if ( variants[currentIndex] != null && variants[currentIndex].getVariantContext() != null && variants[currentIndex].getVariantContext().isVariant() ) {
|
||||
if ( ++snpsSeen == snpThreshold ) {
|
||||
right = variants[currentIndex].getVariantContext().getLocation();
|
||||
right = VariantContextUtils.getLocation(variants[currentIndex].getVariantContext());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,8 +25,8 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.filters;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -25,11 +25,11 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.filters;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -208,7 +208,7 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
|||
filters.add(exp.name);
|
||||
}
|
||||
|
||||
VariantContext filteredVC = new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), filters, vc.getAttributes());
|
||||
VariantContext filteredVC = new VariantContext(vc.getName(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), genotypes, vc.getNegLog10PError(), filters, vc.getAttributes());
|
||||
|
||||
writeVCF(filteredVC, context.getReferenceContext().getBase());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,10 +24,10 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.contexts.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
|
|
|
|||
|
|
@ -24,11 +24,11 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.io.PrintWriter;
|
||||
|
|
|
|||
|
|
@ -25,6 +25,8 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
|
@ -32,8 +34,6 @@ import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
|||
import org.broadinstitute.sting.utils.genotype.CalledGenotype;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,11 +1,13 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.pileup.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -410,7 +412,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
|||
attributes.put("SB", Double.valueOf(strandScore));
|
||||
}
|
||||
|
||||
VariantContext vc = new VariantContext("UG_SNP_call", loc, alleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence) ? null : filter, attributes);
|
||||
VariantContext vc = new VariantContext("UG_SNP_call", loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, phredScaledConfidence/10.0, passesCallThreshold(phredScaledConfidence) ? null : filter, attributes);
|
||||
|
||||
return new VariantCallContext(vc, passesCallThreshold(phredScaledConfidence));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,11 +1,13 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -72,7 +74,7 @@ public class SimpleIndelCalculationModel extends GenotypeCalculationModel {
|
|||
throw new StingException("Internal error (probably a bug): event does not conform to expected format: "+ bestEvent);
|
||||
}
|
||||
|
||||
VariantContext vc = new VariantContext("UG_Indel_call", loc, alleles, new HashMap<String,Genotype>() /* genotypes */,
|
||||
VariantContext vc = new VariantContext("UG_Indel_call", loc.getContig(), loc.getStart(), loc.getStop(), alleles, new HashMap<String, Genotype>() /* genotypes */,
|
||||
-1.0 /* log error */, null /* filters */, null /* attributes */);
|
||||
|
||||
vcc = new VariantCallContext(vc,true);
|
||||
|
|
|
|||
|
|
@ -147,7 +147,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.STRAND_BIAS_KEY, 1, VCFHeaderLineType.Float, "Strand Bias"));
|
||||
|
||||
// FORMAT and INFO fields
|
||||
headerInfo.addAll(VCFGenotypeRecord.getSupportedHeaderStrings(VCFHeaderVersion.VCF3_3));
|
||||
headerInfo.addAll(VCFUtils.getSupportedHeaderStrings());
|
||||
|
||||
// FILTER fields
|
||||
if ( UAC.STANDARD_CONFIDENCE_FOR_EMITTING < UAC.STANDARD_CONFIDENCE_FOR_CALLING ||
|
||||
|
|
|
|||
|
|
@ -26,12 +26,12 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
|
|||
|
|
@ -27,9 +27,9 @@ package org.broadinstitute.sting.gatk.walkers.indels;
|
|||
|
||||
import net.sf.samtools.*;
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
|
|
@ -612,7 +612,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
if ( knownIndel == null || !knownIndel.isIndel() )
|
||||
continue;
|
||||
byte[] indelStr = knownIndel.isInsertion() ? knownIndel.getAlternateAllele(0).getBases() : Utils.dupBytes((byte)'-', knownIndel.getReference().length());
|
||||
int start = (int)(knownIndel.getLocation().getStart() - leftmostIndex) + 1;
|
||||
int start = (int)(knownIndel.getStart() - leftmostIndex) + 1;
|
||||
Consensus c = createAlternateConsensus(start, reference, indelStr, knownIndel.isDeletion());
|
||||
if ( c != null )
|
||||
altConsensesToPopulate.add(c);
|
||||
|
|
|
|||
|
|
@ -25,9 +25,10 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.filters.Platform454Filter;
|
||||
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.BadMateFilter;
|
||||
|
|
@ -123,7 +124,7 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
|
|||
break;
|
||||
}
|
||||
if ( hasIndel )
|
||||
furthestStopPos = vc.getLocation().getStop();
|
||||
furthestStopPos = vc.getEnd();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,9 +25,9 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -35,13 +35,11 @@ import org.broadinstitute.sting.gatk.walkers.*;
|
|||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.classloader.PackageUtils;
|
||||
import org.broadinstitute.sting.utils.collections.NestedHashMap;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.ArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.sequenom;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
|
||||
|
|
|
|||
|
|
@ -27,10 +27,11 @@ package org.broadinstitute.sting.gatk.walkers.sequenom;
|
|||
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broad.tribble.dbsnp.DbSNPCodec;
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
|
|
@ -111,9 +112,9 @@ public class PickSequenomProbes extends RodWalker<String, String> {
|
|||
// we don't want to see the same multi-base deletion multiple times
|
||||
if ( positionOfLastVariant != null &&
|
||||
positionOfLastVariant.size() > 1 &&
|
||||
positionOfLastVariant.equals(vc.getLocation()) )
|
||||
positionOfLastVariant.equals(VariantContextUtils.getLocation(vc)) )
|
||||
return "";
|
||||
positionOfLastVariant = vc.getLocation();
|
||||
positionOfLastVariant = VariantContextUtils.getLocation(vc);
|
||||
|
||||
String contig = context.getLocation().getContig();
|
||||
long offset = context.getLocation().getStart();
|
||||
|
|
|
|||
|
|
@ -25,11 +25,11 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.sequenom;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.PlinkRod;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.playground.utils.report.tags.Analysis;
|
||||
import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.playground.utils.report.tags.Analysis;
|
||||
import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
||||
@Analysis(name = "Count Functional Classes", description = "Counts instances of different functional variant classes (provided the variants are annotated with that information)")
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.playground.utils.report.tags.Analysis;
|
||||
import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
|
||||
import org.broadinstitute.sting.playground.utils.report.tags.Analysis;
|
||||
import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
|
||||
import org.broadinstitute.sting.playground.utils.report.utils.TableType;
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.playground.utils.report.tags.Analysis;
|
||||
import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.playground.utils.report.tags.Analysis;
|
||||
import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.playground.utils.report.tags.Analysis;
|
||||
import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
|
||||
|
|
@ -58,7 +59,7 @@ public class SimpleMetricsByAC extends VariantEvaluator implements StandardEval
|
|||
public MetricsAtAC(int ac) { this.ac = ac; }
|
||||
|
||||
public void update(VariantContext eval) {
|
||||
if ( eval.isTransition() )
|
||||
if ( VariantContextUtils.isTransition(eval) )
|
||||
nTi++;
|
||||
else
|
||||
nTv++;
|
||||
|
|
|
|||
|
|
@ -1,9 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.playground.utils.report.tags.Analysis;
|
||||
import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
|
||||
|
||||
|
|
@ -41,7 +42,7 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
|
|||
|
||||
public void updateTiTv(VariantContext vc, boolean updateStandard) {
|
||||
if (vc != null && vc.isSNP() && vc.isBiallelic()) {
|
||||
if (vc.isTransition()) {
|
||||
if (VariantContextUtils.isTransition(vc)) {
|
||||
if (updateStandard) nTiInComp++;
|
||||
else nTi++;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -26,10 +26,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.util.variantcontext.MutableVariantContext;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableVariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
|
||||
/**
|
||||
* The Broad Institute
|
||||
|
|
|
|||
|
|
@ -25,10 +25,11 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.varianteval;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.playground.utils.report.tags.Analysis;
|
||||
import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
|
||||
|
|
@ -233,7 +234,7 @@ public class VariantQualityScore extends VariantEvaluator {
|
|||
|
||||
if( eval != null && eval.isSNP() && eval.isBiallelic() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites)
|
||||
if( titvStats == null ) { titvStats = new TiTvStats(); }
|
||||
titvStats.incrValue(eval.getPhredScaledQual(), eval.isTransition());
|
||||
titvStats.incrValue(eval.getPhredScaledQual(), VariantContextUtils.isTransition(eval));
|
||||
|
||||
if( alleleCountStats == null ) { alleleCountStats = new AlleleCountStats(); }
|
||||
int alternateAlleleCount = 0;
|
||||
|
|
|
|||
|
|
@ -25,10 +25,10 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
|
|
@ -194,7 +194,7 @@ public class ApplyVariantCuts extends RodWalker<Integer, Integer> {
|
|||
if ( !filterString.equals(VCFConstants.PASSES_FILTERS_v4) ) {
|
||||
Set<String> filters = new HashSet<String>();
|
||||
filters.add(filterString);
|
||||
vc = new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), vc.getGenotypes(), vc.getNegLog10PError(), filters, vc.getAttributes());
|
||||
vc = new VariantContext(vc.getName(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.getGenotypes(), vc.getNegLog10PError(), filters, vc.getAttributes());
|
||||
}
|
||||
}
|
||||
vcfWriter.add( vc, ref.getBase() );
|
||||
|
|
|
|||
|
|
@ -26,9 +26,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
||||
|
||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
|
|
@ -41,7 +42,6 @@ import org.broadinstitute.sting.utils.Utils;
|
|||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
|
@ -164,7 +164,7 @@ public class GenerateVariantClustersWalker extends RodWalker<ExpandingArrayList<
|
|||
|
||||
final VariantDatum variantDatum = new VariantDatum();
|
||||
variantDatum.annotations = annotationValues;
|
||||
variantDatum.isTransition = vc.getSNPSubstitutionType().compareTo(BaseUtils.BaseSubstitutionType.TRANSITION) == 0;
|
||||
variantDatum.isTransition = VariantContextUtils.getSNPSubstitutionType(vc).compareTo(BaseUtils.BaseSubstitutionType.TRANSITION) == 0;
|
||||
variantDatum.alleleCount = vc.getChromosomeCount(vc.getAlternateAllele(0)); // BUGBUG: assumes file has genotypes
|
||||
if( variantDatum.alleleCount > maxAC ) {
|
||||
maxAC = variantDatum.alleleCount;
|
||||
|
|
|
|||
|
|
@ -26,7 +26,8 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
|
@ -522,7 +523,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
|
|||
value = Double.parseDouble( (String)vc.getAttribute( annotationKey ) );
|
||||
} catch( Exception e ) {
|
||||
throw new StingException("No double value detected for annotation = " + annotationKey +
|
||||
" in variant at " + vc.getLocation() + ", reported annotation value = " + vc.getAttribute( annotationKey ) );
|
||||
" in variant at " + VariantContextUtils.getLocation(vc) + ", reported annotation value = " + vc.getAttribute( annotationKey ) );
|
||||
}
|
||||
}
|
||||
return value;
|
||||
|
|
|
|||
|
|
@ -26,10 +26,11 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
||||
|
||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
|
|
@ -180,7 +181,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
if( vc != null && !vc.getName().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) && vc.isSNP() ) {
|
||||
if( !vc.isFiltered() || IGNORE_ALL_INPUT_FILTERS || (ignoreInputFilterSet != null && ignoreInputFilterSet.containsAll(vc.getFilters())) ) {
|
||||
final VariantDatum variantDatum = new VariantDatum();
|
||||
variantDatum.isTransition = vc.getSNPSubstitutionType().compareTo(BaseUtils.BaseSubstitutionType.TRANSITION) == 0;
|
||||
variantDatum.isTransition = VariantContextUtils.getSNPSubstitutionType(vc).compareTo(BaseUtils.BaseSubstitutionType.TRANSITION) == 0;
|
||||
|
||||
final DbSNPFeature dbsnp = DbSNPHelper.getFirstRealSNP(tracker.getReferenceMetaData(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME));
|
||||
variantDatum.isKnown = dbsnp != null;
|
||||
|
|
@ -213,7 +214,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
attrs.put("OQ", String.format("%.2f", ((Double)vc.getPhredScaledQual())));
|
||||
Set<String> filters = new HashSet<String>();
|
||||
filters.add(VCFConstants.PASSES_FILTERS_v4);
|
||||
VariantContext newVC = new VariantContext(vc.getName(), vc.getLocation(), vc.getAlleles(), vc.getGenotypes(), variantDatum.qual / 10.0, filters, attrs);
|
||||
VariantContext newVC = new VariantContext(vc.getName(), vc.getChr(), vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.getGenotypes(), variantDatum.qual / 10.0, filters, attrs);
|
||||
|
||||
vcfWriter.add( newVC, ref.getBase() );
|
||||
|
||||
|
|
|
|||
|
|
@ -25,10 +25,10 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Reference;
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
|
|
@ -32,7 +33,6 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
|||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
|
||||
import java.util.*;
|
||||
|
|
|
|||
|
|
@ -24,9 +24,10 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -34,7 +35,6 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
|||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
|
|
|
|||
|
|
@ -25,10 +25,10 @@
|
|||
|
||||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationType;
|
||||
|
|
|
|||
|
|
@ -25,20 +25,15 @@
|
|||
|
||||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFConstants;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
import org.broad.tribble.vcf.VCFGenotypeRecord;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RMD;
|
||||
import org.broadinstitute.sting.gatk.walkers.Requires;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
|
|
@ -138,7 +133,7 @@ public class BeagleOutputByDepthWalker extends RodWalker<Integer, Integer> {
|
|||
Genotype compGenotype = compGenotypes.get(sample);
|
||||
|
||||
|
||||
outputWriter.format("%d %d %d %d %d ", vc_postbgl.getLocation().getStart(), alleleCountH, chrCountH,
|
||||
outputWriter.format("%d %d %d %d %d ", vc_postbgl.getStart(), alleleCountH, chrCountH,
|
||||
alleleCountEmp, chrCountEmp);
|
||||
|
||||
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue