No more new Allele() -- must use create. Allelel simple alleles are now cached for efficiency reasons. VCF4 codec optimizations -- 4x performance in general. Now working in general but hooked up to the ROD system now as VCF4. WARNING -- does not actually work with indels, genotype filters, etc.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3489 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
ef47a69c50
commit
3ea506fe52
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.contexts.variantcontext;
|
||||
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
|
@ -87,20 +88,9 @@ public class Allele implements Comparable<Allele> {
|
|||
public final static String NULL_ALLELE_STRING = "-";
|
||||
public final static String NO_CALL_STRING = ".";
|
||||
/** A generic static NO_CALL allele for use */
|
||||
public final static Allele NO_CALL = new Allele(NO_CALL_STRING);
|
||||
|
||||
/**
|
||||
* Create a new Allele that includes bases and if tagged as the reference allele if isRef == true. If bases
|
||||
* == '-', a Null allele is created. If bases == '.', a no call Allele is created.
|
||||
*
|
||||
* @param bases the DNA sequence of this variation, '-', of '.'
|
||||
* @param isRef should we make this a reference allele?
|
||||
* @throws IllegalArgumentException if bases contains illegal characters or is otherwise malformated
|
||||
*/
|
||||
public Allele(byte[] bases, boolean isRef) {
|
||||
if ( bases == null )
|
||||
throw new IllegalArgumentException("Constructor: the Allele base string cannot be null; use new Allele() or new Allele(\"\") to create a Null allele");
|
||||
|
||||
// no public way to create an allele
|
||||
private Allele(byte[] bases, boolean isRef) {
|
||||
// standardize our representation of null allele and bases
|
||||
if ( wouldBeNullAllele(bases) ) {
|
||||
bases = EMPTY_ALLELE_BASES;
|
||||
|
|
@ -110,8 +100,8 @@ public class Allele implements Comparable<Allele> {
|
|||
isNoCall = true;
|
||||
if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele");
|
||||
}
|
||||
else
|
||||
bases = new String(bases).toUpperCase().getBytes(); // todo -- slow performance
|
||||
// else
|
||||
// bases = new String(bases).toUpperCase().getBytes(); // todo -- slow performance
|
||||
|
||||
this.isRef = isRef;
|
||||
this.bases = bases;
|
||||
|
|
@ -120,9 +110,62 @@ public class Allele implements Comparable<Allele> {
|
|||
throw new IllegalArgumentException("Unexpected base in allele bases " + new String(bases));
|
||||
}
|
||||
|
||||
private Allele(String bases, boolean isRef) {
|
||||
this(bases.getBytes(), isRef);
|
||||
}
|
||||
|
||||
public Allele(byte base, boolean isRef) {
|
||||
this( new byte[]{ base }, isRef);
|
||||
|
||||
private final static Allele REF_A = new Allele("A", true);
|
||||
private final static Allele ALT_A = new Allele("A", false);
|
||||
private final static Allele REF_C = new Allele("C", true);
|
||||
private final static Allele ALT_C = new Allele("C", false);
|
||||
private final static Allele REF_G = new Allele("G", true);
|
||||
private final static Allele ALT_G = new Allele("G", false);
|
||||
private final static Allele REF_T = new Allele("T", true);
|
||||
private final static Allele ALT_T = new Allele("T", false);
|
||||
private final static Allele REF_NULL = new Allele("-", true);
|
||||
private final static Allele ALT_NULL = new Allele("-", false);
|
||||
public final static Allele NO_CALL = new Allele(NO_CALL_STRING, false);
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// creation routines
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Create a new Allele that includes bases and if tagged as the reference allele if isRef == true. If bases
|
||||
* == '-', a Null allele is created. If bases == '.', a no call Allele is created.
|
||||
*
|
||||
* @param bases the DNA sequence of this variation, '-', of '.'
|
||||
* @param isRef should we make this a reference allele?
|
||||
* @throws IllegalArgumentException if bases contains illegal characters or is otherwise malformated
|
||||
*/
|
||||
public static Allele create(byte[] bases, boolean isRef) {
|
||||
if ( bases == null )
|
||||
throw new IllegalArgumentException("create: the Allele base string cannot be null; use new Allele() or new Allele(\"\") to create a Null allele");
|
||||
|
||||
if ( bases.length == 1 ) {
|
||||
// optimization to return a static constant Allele for each single base object
|
||||
switch (bases[0]) {
|
||||
case '.':
|
||||
if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele");
|
||||
return NO_CALL;
|
||||
case '-': return isRef ? REF_NULL : ALT_NULL;
|
||||
case 'A': return isRef ? REF_A : ALT_A;
|
||||
case 'C': return isRef ? REF_C : ALT_C;
|
||||
case 'G': return isRef ? REF_G : ALT_G;
|
||||
case 'T': return isRef ? REF_T : ALT_T;
|
||||
default: throw new IllegalArgumentException("Illegal lower-case base: " + (char)bases[0]);
|
||||
}
|
||||
} else {
|
||||
return new Allele(bases, isRef);
|
||||
}
|
||||
}
|
||||
|
||||
public static Allele create(byte base, boolean isRef) {
|
||||
// public Allele(byte base, boolean isRef) {
|
||||
return create( new byte[]{ base }, isRef);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -157,10 +200,14 @@ public class Allele implements Comparable<Allele> {
|
|||
if ( wouldBeNullAllele(bases) || wouldBeNoCallAllele(bases) )
|
||||
return true;
|
||||
|
||||
for ( byte b : bases ) {
|
||||
if ( ! BaseUtils.isRegularBase(b) ) {
|
||||
return false;
|
||||
for ( int i = 0; i < bases.length; i++ ) {
|
||||
switch (bases[i]) {
|
||||
case 'A': case 'C': case 'G': case 'T': break;
|
||||
default: return false;
|
||||
}
|
||||
// if ( ! BaseUtils.isRegularBase(bases[i]) ) {
|
||||
// return false;
|
||||
// }
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
@ -172,8 +219,9 @@ public class Allele implements Comparable<Allele> {
|
|||
* @param bases bases representing an allele
|
||||
* @param isRef is this the reference allele?
|
||||
*/
|
||||
public Allele(String bases, boolean isRef) {
|
||||
this(bases.getBytes(), isRef);
|
||||
public static Allele create(String bases, boolean isRef) {
|
||||
//public Allele(String bases, boolean isRef) {
|
||||
return create(bases.getBytes(), isRef);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -182,14 +230,19 @@ public class Allele implements Comparable<Allele> {
|
|||
*
|
||||
* @param bases bases representing an allele
|
||||
*/
|
||||
public Allele(String bases) { this(bases, false); }
|
||||
public static Allele create(String bases) {
|
||||
return create(bases, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a non-Ref allele. @see Allele(byte[], boolean) for full information
|
||||
*
|
||||
* @param bases bases representing an allele
|
||||
*/
|
||||
public Allele(byte[] bases) { this(bases, false); }
|
||||
public static Allele create(byte[] bases) {
|
||||
return create(bases, false);
|
||||
//this(bases, false);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -253,7 +306,7 @@ public class Allele implements Comparable<Allele> {
|
|||
* @return true if this and other are equal
|
||||
*/
|
||||
public boolean equals(Allele other, boolean ignoreRefState) {
|
||||
return (isRef == other.isRef || ignoreRefState) && isNull == other.isNull && isNoCall == other.isNoCall && basesMatch(other.getBases());
|
||||
return this == other || (isRef == other.isRef || ignoreRefState) && isNull == other.isNull && isNoCall == other.isNoCall && basesMatch(other.getBases());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -315,7 +368,7 @@ public class Allele implements Comparable<Allele> {
|
|||
|
||||
if ( allele == null ) {
|
||||
if ( Allele.wouldBeNoCallAllele(alleleString.getBytes()) ) {
|
||||
allele = new Allele(alleleString);
|
||||
allele = create(alleleString);
|
||||
} else {
|
||||
throw new IllegalArgumentException("Allele " + alleleString + " not present in the list of alleles " + possibleAlleles);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -182,6 +182,12 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
|
||||
protected final static Map<String, Genotype> NO_GENOTYPES = Collections.unmodifiableMap(new HashMap<String, Genotype>());
|
||||
|
||||
// a fast cached access point to the ref / alt alleles for biallelic case
|
||||
private Allele REF = null;
|
||||
|
||||
// set to the alt allele when biallelic, otherwise == null
|
||||
private Allele ALT = null;
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// constructors
|
||||
|
|
@ -205,7 +211,16 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
|
||||
if ( genotypes == null ) { genotypes = NO_GENOTYPES; }
|
||||
this.genotypes = Collections.unmodifiableMap(genotypes);
|
||||
calculateGenotypeCounts();
|
||||
|
||||
// cache the REF and ALT alleles
|
||||
int nAlleles = alleles.size();
|
||||
for ( Allele a : alleles ) {
|
||||
if ( a.isReference() ) {
|
||||
REF = a;
|
||||
} else if ( nAlleles == 2 ) { // only cache ALT when biallelic
|
||||
ALT = a;
|
||||
}
|
||||
}
|
||||
|
||||
validate();
|
||||
}
|
||||
|
|
@ -371,6 +386,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
public enum Type {
|
||||
NO_VARIATION,
|
||||
SNP,
|
||||
MNP, // a multi-nucleotide polymorphism
|
||||
INDEL,
|
||||
MIXED,
|
||||
}
|
||||
|
|
@ -492,19 +508,23 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
* @return the reference allele for this context
|
||||
*/
|
||||
public Allele getReference() {
|
||||
Allele ref = getReferenceWithoutError();
|
||||
Allele ref = REF;
|
||||
if ( ref == null )
|
||||
throw new StingException("BUG: no reference allele found at " + this);
|
||||
return ref;
|
||||
}
|
||||
|
||||
/** Private helper routine that grabs the reference allele but doesn't throw an error if there's no such allele */
|
||||
private Allele getReferenceWithoutError() {
|
||||
for ( Allele allele : getAlleles() )
|
||||
if ( allele.isReference() )
|
||||
return allele;
|
||||
return null;
|
||||
}
|
||||
|
||||
// private Allele getReferenceWithoutError() {
|
||||
// for ( Allele allele : getAlleles() ) {
|
||||
// if ( allele.isReference() ) {
|
||||
// return allele;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// return null;
|
||||
// }
|
||||
|
||||
/**
|
||||
* @return true if the context is strictly bi-allelic
|
||||
|
|
@ -542,6 +562,9 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
}
|
||||
|
||||
public boolean hasAllele(Allele allele, boolean ignoreRefState) {
|
||||
if ( allele == REF || allele == ALT ) // optimization for cached cases
|
||||
return true;
|
||||
|
||||
for ( Allele a : getAlleles() ) {
|
||||
if ( a.equals(allele, ignoreRefState) )
|
||||
return true;
|
||||
|
|
@ -750,19 +773,21 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
}
|
||||
|
||||
private void calculateGenotypeCounts() {
|
||||
genotypeCounts = new int[Genotype.Type.values().length];
|
||||
if ( genotypeCounts == null ) {
|
||||
genotypeCounts = new int[Genotype.Type.values().length];
|
||||
|
||||
for ( Genotype g : getGenotypes().values() ) {
|
||||
if ( g.isNoCall() )
|
||||
genotypeCounts[Genotype.Type.NO_CALL.ordinal()]++;
|
||||
else if ( g.isHomRef() )
|
||||
genotypeCounts[Genotype.Type.HOM_REF.ordinal()]++;
|
||||
else if ( g.isHet() )
|
||||
genotypeCounts[Genotype.Type.HET.ordinal()]++;
|
||||
else if ( g.isHomVar() )
|
||||
genotypeCounts[Genotype.Type.HOM_VAR.ordinal()]++;
|
||||
else
|
||||
throw new StingException("Genotype of unknown type: " + g);
|
||||
for ( Genotype g : getGenotypes().values() ) {
|
||||
if ( g.isNoCall() )
|
||||
genotypeCounts[Genotype.Type.NO_CALL.ordinal()]++;
|
||||
else if ( g.isHomRef() )
|
||||
genotypeCounts[Genotype.Type.HOM_REF.ordinal()]++;
|
||||
else if ( g.isHet() )
|
||||
genotypeCounts[Genotype.Type.HET.ordinal()]++;
|
||||
else if ( g.isHomVar() )
|
||||
genotypeCounts[Genotype.Type.HOM_VAR.ordinal()]++;
|
||||
else
|
||||
throw new StingException("Genotype of unknown type: " + g);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -772,6 +797,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
* @return number of no calls
|
||||
*/
|
||||
public int getNoCallCount() {
|
||||
calculateGenotypeCounts();
|
||||
return genotypeCounts[Genotype.Type.NO_CALL.ordinal()];
|
||||
}
|
||||
|
||||
|
|
@ -781,6 +807,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
* @return number of hom ref calls
|
||||
*/
|
||||
public int getHomRefCount() {
|
||||
calculateGenotypeCounts();
|
||||
return genotypeCounts[Genotype.Type.HOM_REF.ordinal()];
|
||||
}
|
||||
|
||||
|
|
@ -790,6 +817,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
* @return number of het calls
|
||||
*/
|
||||
public int getHetCount() {
|
||||
calculateGenotypeCounts();
|
||||
return genotypeCounts[Genotype.Type.HET.ordinal()];
|
||||
}
|
||||
|
||||
|
|
@ -811,9 +839,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
/**
|
||||
* To be called by any modifying routines
|
||||
*/
|
||||
//private void invalidate() { validatedP = false; }
|
||||
|
||||
public boolean validate() {
|
||||
private boolean validate() {
|
||||
return validate(true);
|
||||
}
|
||||
|
||||
|
|
@ -888,70 +914,40 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
public static VariantContext simpleMerge(Set<VariantContext> VCs) {
|
||||
if ( VCs == null || VCs.size() == 0 )
|
||||
return null;
|
||||
|
||||
Iterator<VariantContext> iter = VCs.iterator();
|
||||
|
||||
// establish the baseline info from the first VC
|
||||
VariantContext first = iter.next();
|
||||
String name = first.getName();
|
||||
GenomeLoc loc = first.getLocation();
|
||||
Set<Allele> alleles = new HashSet<Allele>(first.getAlleles());
|
||||
Map<String, Genotype> genotypes = new HashMap<String, Genotype>(first.getGenotypes());
|
||||
double negLog10PError = first.isVariant() ? first.getNegLog10PError() : -1;
|
||||
Set<String> filters = new HashSet<String>(first.getFilters());
|
||||
Map<String, String> attributes = new HashMap<String, String>();
|
||||
int depth = 0;
|
||||
if ( first.hasAttribute(VCFRecord.DEPTH_KEY) )
|
||||
depth = Integer.valueOf(first.getAttribute(VCFRecord.DEPTH_KEY).toString());
|
||||
|
||||
// cycle through and add info from the other VCs, making sure the loc/reference matches
|
||||
while ( iter.hasNext() ) {
|
||||
VariantContext vc = iter.next();
|
||||
if ( !loc.equals(vc.getLocation()) || !first.getReference().equals(vc.getReference()) )
|
||||
return null;
|
||||
|
||||
alleles.addAll(vc.getAlleles());
|
||||
genotypes.putAll(vc.getGenotypes());
|
||||
|
||||
negLog10PError = Math.max(negLog10PError, vc.isVariant() ? vc.getNegLog10PError() : -1);
|
||||
|
||||
filters.addAll(vc.getFilters());
|
||||
if ( vc.hasAttribute(VCFRecord.DEPTH_KEY) )
|
||||
depth += Integer.valueOf(vc.getAttribute(VCFRecord.DEPTH_KEY).toString());
|
||||
}
|
||||
|
||||
if ( depth > 0 )
|
||||
attributes.put(VCFRecord.DEPTH_KEY, String.valueOf(depth));
|
||||
return new VariantContext(name, loc, alleles, genotypes, negLog10PError, filters, attributes);
|
||||
}
|
||||
|
||||
private void determineType() {
|
||||
if ( type == null ) {
|
||||
if ( alleles.size() == 0 ) {
|
||||
throw new StingException("Unexpected requested type of VariantContext with no alleles!" + this);
|
||||
} else if ( alleles.size() == 1 ) {
|
||||
type = Type.NO_VARIATION;
|
||||
// note that this doesn't require a reference allele. You can be monomorphic independent of having a
|
||||
// reference allele
|
||||
} else if ( isSNPAllele(alleles) ) {
|
||||
type = Type.SNP;
|
||||
} else if ( isDIPAllele(alleles) ) {
|
||||
type = Type.INDEL;
|
||||
} else {
|
||||
type = Type.MIXED;
|
||||
switch ( getNAlleles() ) {
|
||||
case 0:
|
||||
throw new StingException("Unexpected requested type of VariantContext with no alleles!" + this);
|
||||
case 1:
|
||||
type = Type.NO_VARIATION;
|
||||
// note that this doesn't require a reference allele. You can be monomorphic independent of having a
|
||||
// reference allele
|
||||
break;
|
||||
default:
|
||||
if ( isMNPAllele(alleles, 1) ) {
|
||||
type = Type.SNP;
|
||||
} else if ( isMNPAllele(alleles, -1) ) {
|
||||
type = Type.MNP;
|
||||
} else if ( isDIPAllele(alleles) ) {
|
||||
type = Type.INDEL;
|
||||
} else {
|
||||
type = Type.MIXED;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean isSNPAllele(Set<Allele> alleles) {
|
||||
if ( alleles.size() < 2 )
|
||||
return false;
|
||||
private static boolean isMNPAllele(Set<Allele> alleles, int requiredLength ) { // requireLength == -1 if you don't care
|
||||
// if ( alleles.size() < 2 )
|
||||
// return false;
|
||||
|
||||
int l = requiredLength;
|
||||
for ( Allele allele : alleles ) {
|
||||
if ( allele.length() != 1 )
|
||||
if ( l == -1 ) // remember the length of the first allele
|
||||
l = allele.length();
|
||||
|
||||
if ( allele.length() != l )
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -26,7 +26,9 @@ package org.broadinstitute.sting.gatk.contexts.variantcontext;
|
|||
import java.util.*;
|
||||
import org.apache.commons.jexl.*;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.genotype.HardyWeinbergCalculation;
|
||||
import org.broad.tribble.vcf.VCFRecord;
|
||||
|
||||
public class VariantContextUtils {
|
||||
/**
|
||||
|
|
@ -154,108 +156,50 @@ public class VariantContextUtils {
|
|||
}
|
||||
}
|
||||
|
||||
// private static final String UNIQUIFIED_SUFFIX = ".unique";
|
||||
|
||||
/**
|
||||
* @param other another variant context
|
||||
*
|
||||
* throws an exception if there is a collision such that the same sample exists in both contexts
|
||||
* @return a context representing the merge of this context and the other provided context
|
||||
*/
|
||||
// public VariantContext merge(VariantContext left, VariantContext other) {
|
||||
// return merge(left, other, false);
|
||||
// }
|
||||
|
||||
/**
|
||||
* @param other another variant context
|
||||
* @param uniquifySamples if true and there is a collision such that the same sample exists in both contexts,
|
||||
* the samples will be uniquified(based on their sources);
|
||||
* otherwise, an exception will be thrown
|
||||
*
|
||||
* @return a context representing the merge of this context and the other provided context
|
||||
*/
|
||||
// public VariantContext merge(VariantContext left, VariantContext other, boolean uniquifySamples) {
|
||||
// // todo -- make functional
|
||||
//
|
||||
// if ( !left.getLocation().equals(other.getLocation()) )
|
||||
// throw new IllegalArgumentException("The locations must be identical for two contexts to be merged");
|
||||
//
|
||||
// Set<String> samples = left.getSampleNames();
|
||||
// Set<Genotype> Gs = new HashSet<Genotype>(left.getGenotypes().values());
|
||||
//
|
||||
// for ( Genotype g : other.getGenotypes().values() ) {
|
||||
// if ( samples.contains(g.getSampleName()) ) {
|
||||
// if ( uniquifySamples )
|
||||
// g.setSampleName(g.getSampleName() + UNIQUIFIED_SUFFIX);
|
||||
// else
|
||||
// throw new IllegalStateException("The same sample name exists in both contexts when attempting to merge");
|
||||
// }
|
||||
// Gs.add(g);
|
||||
// }
|
||||
//
|
||||
// HashMap<Object, Object> attrs = new HashMap<Object, Object>(left.getAttributes());
|
||||
// attrs.putAll(other.getAttributes());
|
||||
//
|
||||
// return new VariantContext(left, Gs, attrs);
|
||||
// }
|
||||
|
||||
|
||||
/**
|
||||
* @param subclass the name of a subclass of variants to select
|
||||
*
|
||||
* @return a subset of this context which selects based on the given subclass
|
||||
*/
|
||||
// public VariantContextUtils select(String subclass) {
|
||||
// HashSet<Genotype> Gs = new HashSet<Genotype>();
|
||||
// for ( Genotype g : genotypes ) {
|
||||
// if ( g.getAttribute(subclass) != null )
|
||||
// Gs.add(g);
|
||||
// }
|
||||
// return createNewContext(Gs, attributes);
|
||||
// }
|
||||
|
||||
/**
|
||||
* @param expr a jexl expression describing how to filter this context
|
||||
*
|
||||
* @return a subset of this context which is filtered based on the given expression
|
||||
*/
|
||||
// public VariantContextUtils filter(String expr) {
|
||||
// HashSet<Genotype> Gs = new HashSet<Genotype>();
|
||||
// try {
|
||||
// Expression filterExpression = ExpressionFactory.createExpression(expr);
|
||||
//
|
||||
// for ( Genotype g : genotypes ) {
|
||||
// JexlContext jContext = JexlHelper.createContext();
|
||||
// jContext.setVars(g.getAttributes());
|
||||
// if ( (Boolean)filterExpression.evaluate(jContext) )
|
||||
// Gs.add(g);
|
||||
// }
|
||||
//
|
||||
// } catch (Exception e) {
|
||||
// throw new StingException("JEXL error in VariantContext: " + e.getMessage());
|
||||
// }
|
||||
//
|
||||
// return createNewContext(Gs, attributes);
|
||||
// }
|
||||
|
||||
/**
|
||||
* @return a set of new variant contexts, one for each sample from this context
|
||||
*/
|
||||
// public Set<VariantContextUtils> splitBySample() {
|
||||
// Set<VariantContextUtils> contexts = new HashSet<VariantContextUtils>();
|
||||
// for ( Genotype g : genotypes ) {
|
||||
// HashSet<Genotype> gAsSet = new HashSet<Genotype>();
|
||||
// gAsSet.add(g);
|
||||
// contexts.add(createNewContext(gAsSet, attributes));
|
||||
// }
|
||||
// return contexts;
|
||||
// }
|
||||
|
||||
|
||||
public static double computeHardyWeinbergPvalue(VariantContext vc) {
|
||||
if ( vc.getChromosomeCount() == 0 )
|
||||
return 0.0;
|
||||
return HardyWeinbergCalculation.hwCalculate(vc.getHomRefCount(), vc.getHetCount(), vc.getHomVarCount());
|
||||
}
|
||||
|
||||
|
||||
public static VariantContext simpleMerge(Set<VariantContext> VCs) {
|
||||
if ( VCs == null || VCs.size() == 0 )
|
||||
return null;
|
||||
|
||||
Iterator<VariantContext> iter = VCs.iterator();
|
||||
|
||||
// establish the baseline info from the first VC
|
||||
VariantContext first = iter.next();
|
||||
String name = first.getName();
|
||||
GenomeLoc loc = first.getLocation();
|
||||
Set<Allele> alleles = new HashSet<Allele>(first.getAlleles());
|
||||
Map<String, Genotype> genotypes = new HashMap<String, Genotype>(first.getGenotypes());
|
||||
double negLog10PError = first.isVariant() ? first.getNegLog10PError() : -1;
|
||||
Set<String> filters = new HashSet<String>(first.getFilters());
|
||||
Map<String, String> attributes = new HashMap<String, String>();
|
||||
int depth = 0;
|
||||
if ( first.hasAttribute(VCFRecord.DEPTH_KEY) )
|
||||
depth = Integer.valueOf(first.getAttribute(VCFRecord.DEPTH_KEY).toString());
|
||||
|
||||
// cycle through and add info from the other VCs, making sure the loc/reference matches
|
||||
while ( iter.hasNext() ) {
|
||||
VariantContext vc = iter.next();
|
||||
if ( !loc.equals(vc.getLocation()) || !first.getReference().equals(vc.getReference()) )
|
||||
return null;
|
||||
|
||||
alleles.addAll(vc.getAlleles());
|
||||
genotypes.putAll(vc.getGenotypes());
|
||||
|
||||
negLog10PError = Math.max(negLog10PError, vc.isVariant() ? vc.getNegLog10PError() : -1);
|
||||
|
||||
filters.addAll(vc.getFilters());
|
||||
if ( vc.hasAttribute(VCFRecord.DEPTH_KEY) )
|
||||
depth += Integer.valueOf(vc.getAttribute(VCFRecord.DEPTH_KEY).toString());
|
||||
}
|
||||
|
||||
if ( depth > 0 )
|
||||
attributes.put(VCFRecord.DEPTH_KEY, String.valueOf(depth));
|
||||
return new VariantContext(name, loc, alleles, genotypes, negLog10PError, filters, attributes);
|
||||
}
|
||||
}
|
||||
|
|
@ -529,7 +529,7 @@ public class PlinkRod extends BasicReferenceOrderedDatum implements Iterator<Pli
|
|||
if ( PlinkRod.SEQUENOM_NO_CALL.equals(alleleString) )
|
||||
allele = Allele.NO_CALL;
|
||||
else
|
||||
allele = new Allele(alleleString);
|
||||
allele = Allele.create(alleleString);
|
||||
alleles.put(alleleString, allele);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -52,27 +52,18 @@ public class VariantContextAdaptors {
|
|||
adaptors.put(RodGLF.class, new GLFAdaptor());
|
||||
adaptors.put(GeliTextFeature.class, new GeliTextAdaptor());
|
||||
adaptors.put(rodGELI.class, new GeliAdaptor());
|
||||
adaptors.put(VariantContext.class, new VariantContextAdaptor());
|
||||
}
|
||||
|
||||
public static boolean canBeConvertedToVariantContext(Object variantContainingObject) {
|
||||
return adaptors.containsKey(variantContainingObject.getClass());
|
||||
// return convertToVariantContext(name, variantContainingObject) != null;
|
||||
}
|
||||
|
||||
/** generic superclass */
|
||||
private static abstract class VCAdaptor {
|
||||
// abstract VariantContext convert(String name, Object input);
|
||||
abstract VariantContext convert(String name, Object input, ReferenceContext ref);
|
||||
}
|
||||
|
||||
// public static VariantContext toVariantContext(String name, Object variantContainingObject) {
|
||||
// if ( ! adaptors.containsKey(variantContainingObject.getClass()) )
|
||||
// return null;
|
||||
// else {
|
||||
// return adaptors.get(variantContainingObject.getClass()).convert(name, variantContainingObject);
|
||||
// }
|
||||
// }
|
||||
|
||||
public static VariantContext toVariantContext(String name, Object variantContainingObject, ReferenceContext ref) {
|
||||
if ( ! adaptors.containsKey(variantContainingObject.getClass()) )
|
||||
return null;
|
||||
|
|
@ -87,7 +78,12 @@ public class VariantContextAdaptors {
|
|||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
|
||||
private static class VariantContextAdaptor extends VCAdaptor {
|
||||
// already a VC, just cast and return it
|
||||
VariantContext convert(String name, Object input, ReferenceContext ref) {
|
||||
return (VariantContext)input;
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -96,15 +92,11 @@ public class VariantContextAdaptors {
|
|||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private static class DBSnpAdaptor extends VCAdaptor {
|
||||
// VariantContext convert(String name, Object input) {
|
||||
// return convert(name, input, null);
|
||||
// }
|
||||
|
||||
VariantContext convert(String name, Object input, ReferenceContext ref) {
|
||||
DbSNPFeature dbsnp = (DbSNPFeature)input;
|
||||
if ( ! Allele.acceptableAlleleBases(DbSNPHelper.getReference(dbsnp)) )
|
||||
return null;
|
||||
Allele refAllele = new Allele(DbSNPHelper.getReference(dbsnp), true);
|
||||
Allele refAllele = Allele.create(DbSNPHelper.getReference(dbsnp), true);
|
||||
|
||||
if ( DbSNPHelper.isSNP(dbsnp) || DbSNPHelper.isIndel(dbsnp) || dbsnp.getVariantType().contains("mixed") ) {
|
||||
// add the reference allele
|
||||
|
|
@ -117,14 +109,13 @@ public class VariantContextAdaptors {
|
|||
//System.out.printf("Excluding dbsnp record %s%n", dbsnp);
|
||||
return null;
|
||||
}
|
||||
alleles.add(new Allele(alt, false));
|
||||
alleles.add(Allele.create(alt, false));
|
||||
}
|
||||
|
||||
Map<String, String> attributes = new HashMap<String, String>();
|
||||
attributes.put("ID", dbsnp.getRsID());
|
||||
Collection<Genotype> genotypes = null;
|
||||
VariantContext vc = new VariantContext(name, GenomeLocParser.createGenomeLoc(dbsnp.getChr(),dbsnp.getStart(),dbsnp.getEnd()), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes);
|
||||
vc.validate();
|
||||
return vc;
|
||||
} else
|
||||
return null; // can't handle anything else
|
||||
|
|
@ -165,9 +156,9 @@ public class VariantContextAdaptors {
|
|||
if ( vcf.isDeletion() && refAllele.length() > alt.getLength() ) {
|
||||
byte[] semiDeletion = new byte[refAllele.length() - alt.getLength()];
|
||||
System.arraycopy(ref.getBases(), alt.getLength(), semiDeletion, 0, refAllele.length() - alt.getLength());
|
||||
allele = new Allele(String.valueOf(semiDeletion), false);
|
||||
allele = Allele.create(String.valueOf(semiDeletion), false);
|
||||
} else {
|
||||
allele = new Allele(alt.getBases(), false);
|
||||
allele = Allele.create(alt.getBases(), false);
|
||||
}
|
||||
if ( ! allele.isNoCall() )
|
||||
alleles.add(allele);
|
||||
|
|
@ -211,7 +202,6 @@ public class VariantContextAdaptors {
|
|||
loc = GenomeLocParser.createGenomeLoc(loc.getContig(), loc.getStart(), loc.getStart()+refAllele.length()-1);
|
||||
|
||||
VariantContext vc = new VariantContext(name, loc, alleles, genotypes, qual, filters, attributes);
|
||||
vc.validate();
|
||||
return vc;
|
||||
} else
|
||||
return null; // can't handle anything else
|
||||
|
|
@ -223,11 +213,11 @@ public class VariantContextAdaptors {
|
|||
|
||||
Allele refAllele;
|
||||
if ( vcf.isInsertion() ) {
|
||||
refAllele = new Allele(Allele.NULL_ALLELE_STRING, true);
|
||||
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
||||
// } else if ( ref == null ) {
|
||||
// refAllele = new Allele(vcf.getReference(), true);
|
||||
// refAllele = Allele.create(vcf.getReference(), true);
|
||||
} else if ( !vcf.isIndel() ) {
|
||||
refAllele = new Allele(ref.getBase(), true);
|
||||
refAllele = Allele.create(ref.getBase(), true);
|
||||
} else if ( vcf.isDeletion() ) {
|
||||
int start = (int)(ref.getLocus().getStart() - ref.getWindow().getStart() + 1);
|
||||
int delLength = 0;
|
||||
|
|
@ -249,7 +239,7 @@ public class VariantContextAdaptors {
|
|||
private static Allele deletionAllele(ReferenceContext ref, int start, int len) {
|
||||
byte[] deletion = new byte[len];
|
||||
System.arraycopy(ref.getBases(), start, deletion, 0, len);
|
||||
return new Allele(deletion, true);
|
||||
return Allele.create(deletion, true);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -458,11 +448,11 @@ public class VariantContextAdaptors {
|
|||
VCAllele = plinkAlleleToVCAllele.get(myPlinkAllele);
|
||||
} else {
|
||||
if ( !plink.isIndel() ) {
|
||||
VCAllele = new Allele(myPlinkAllele.getBases(), refAllele.equals(myPlinkAllele, true));
|
||||
VCAllele = Allele.create(myPlinkAllele.getBases(), refAllele.equals(myPlinkAllele, true));
|
||||
} else if ( myPlinkAllele.isNull() ) {
|
||||
VCAllele = new Allele(Allele.NULL_ALLELE_STRING, plink.isInsertion());
|
||||
VCAllele = Allele.create(Allele.NULL_ALLELE_STRING, plink.isInsertion());
|
||||
} else {
|
||||
VCAllele = new Allele(myPlinkAllele.getBases(), !plink.isInsertion());
|
||||
VCAllele = Allele.create(myPlinkAllele.getBases(), !plink.isInsertion());
|
||||
}
|
||||
plinkAlleleToVCAllele.put(myPlinkAllele, VCAllele);
|
||||
VCAlleles.add(VCAllele);
|
||||
|
|
@ -480,7 +470,6 @@ public class VariantContextAdaptors {
|
|||
try {
|
||||
GenomeLoc loc = GenomeLocParser.setStop(plink.getLocation(), plink.getLocation().getStop() + plink.getLength()-1);
|
||||
VariantContext vc = new VariantContext(plink.getVariantName(), loc, VCAlleles, genotypes);
|
||||
vc.validate();
|
||||
return vc;
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new IllegalArgumentException(e.getMessage() + "; please make sure that e.g. a sample isn't present more than one time in your ped file");
|
||||
|
|
@ -490,9 +479,9 @@ public class VariantContextAdaptors {
|
|||
private Allele determineRefAllele(PlinkRod plink, ReferenceContext ref) {
|
||||
Allele refAllele;
|
||||
if ( !plink.isIndel() ) {
|
||||
refAllele = new Allele(ref.getBase(), true);
|
||||
refAllele = Allele.create(ref.getBase(), true);
|
||||
} else if ( plink.isInsertion() ) {
|
||||
refAllele = new Allele(Allele.NULL_ALLELE_STRING, true);
|
||||
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
||||
} else {
|
||||
long maxLength = ref.getWindow().getStop() - ref.getLocus().getStop();
|
||||
if ( plink.getLength() > maxLength )
|
||||
|
|
@ -532,7 +521,7 @@ public class VariantContextAdaptors {
|
|||
|
||||
if ( ! Allele.acceptableAlleleBases(glf.getReference()) )
|
||||
return null;
|
||||
Allele refAllele = new Allele(glf.getReference(), true);
|
||||
Allele refAllele = Allele.create(glf.getReference(), true);
|
||||
|
||||
// make sure we can convert it
|
||||
if ( glf.isSNP() || glf.isIndel()) {
|
||||
|
|
@ -545,7 +534,7 @@ public class VariantContextAdaptors {
|
|||
if ( ! Allele.acceptableAlleleBases(alt) ) {
|
||||
return null;
|
||||
}
|
||||
Allele allele = new Allele(alt, false);
|
||||
Allele allele = Allele.create(alt, false);
|
||||
if (!alleles.contains(allele)) alleles.add(allele);
|
||||
}
|
||||
|
||||
|
|
@ -570,7 +559,6 @@ public class VariantContextAdaptors {
|
|||
// add the call to the genotype list, and then use this list to create a VariantContext
|
||||
genotypes.add(call);
|
||||
VariantContext vc = new VariantContext(name, glf.getLocation(), alleles, genotypes, glf.getNegLog10PError(), null, attributes);
|
||||
vc.validate();
|
||||
return vc;
|
||||
} else
|
||||
return null; // can't handle anything else
|
||||
|
|
@ -605,7 +593,7 @@ public class VariantContextAdaptors {
|
|||
GeliTextFeature geli = (GeliTextFeature)input;
|
||||
if ( ! Allele.acceptableAlleleBases(String.valueOf(geli.getRefBase())) )
|
||||
return null;
|
||||
Allele refAllele = new Allele(String.valueOf(geli.getRefBase()), true);
|
||||
Allele refAllele = Allele.create(String.valueOf(geli.getRefBase()), true);
|
||||
|
||||
// make sure we can convert it
|
||||
if ( geli.getGenotype().isHet() || !geli.getGenotype().containsBase(geli.getRefBase())) {
|
||||
|
|
@ -617,7 +605,7 @@ public class VariantContextAdaptors {
|
|||
if ( ! Allele.acceptableAlleleBases(String.valueOf(alt)) ) {
|
||||
return null;
|
||||
}
|
||||
Allele allele = new Allele(String.valueOf(alt), false);
|
||||
Allele allele = Allele.create(String.valueOf(alt), false);
|
||||
if (!alleles.contains(allele) && !refAllele.basesMatch(allele.getBases())) alleles.add(allele);
|
||||
|
||||
// add the allele, first checking if it's reference or not
|
||||
|
|
@ -638,7 +626,6 @@ public class VariantContextAdaptors {
|
|||
genotypes.add(call);
|
||||
alleles.add(refAllele);
|
||||
VariantContext vc = new VariantContext(name, GenomeLocParser.createGenomeLoc(geli.getChr(),geli.getStart()), alleles, genotypes, geli.getLODBestToReference(), null, attributes);
|
||||
vc.validate();
|
||||
return vc;
|
||||
} else
|
||||
return null; // can't handle anything else
|
||||
|
|
@ -673,7 +660,7 @@ public class VariantContextAdaptors {
|
|||
GenotypeLikelihoods geli = ((rodGELI)input).getGeliRecord();
|
||||
if ( ! Allele.acceptableAlleleBases(String.valueOf((char)geli.getReferenceBase())) )
|
||||
return null;
|
||||
Allele refAllele = new Allele(String.valueOf((char)geli.getReferenceBase()), true);
|
||||
Allele refAllele = Allele.create(String.valueOf((char)geli.getReferenceBase()), true);
|
||||
|
||||
// add the reference allele
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
|
|
@ -683,14 +670,14 @@ public class VariantContextAdaptors {
|
|||
if (!Allele.acceptableAlleleBases(String.valueOf((char) geli.getBestGenotype().getAllele1()))) {
|
||||
return null;
|
||||
}
|
||||
Allele allele1 = new Allele(String.valueOf((char) geli.getBestGenotype().getAllele1()), false);
|
||||
Allele allele1 = Allele.create(String.valueOf((char) geli.getBestGenotype().getAllele1()), false);
|
||||
if (!alleles.contains(allele1) && !allele1.equals(refAllele, true)) alleles.add(allele1);
|
||||
|
||||
// add the two alt alleles
|
||||
if (!Allele.acceptableAlleleBases(String.valueOf((char) geli.getBestGenotype().getAllele2()))) {
|
||||
return null;
|
||||
}
|
||||
Allele allele2 = new Allele(String.valueOf((char) geli.getBestGenotype().getAllele2()), false);
|
||||
Allele allele2 = Allele.create(String.valueOf((char) geli.getBestGenotype().getAllele2()), false);
|
||||
if (!alleles.contains(allele2) && !allele2.equals(refAllele, true)) alleles.add(allele2);
|
||||
|
||||
|
||||
|
|
@ -712,7 +699,6 @@ public class VariantContextAdaptors {
|
|||
// add the call to the genotype list, and then use this list to create a VariantContext
|
||||
genotypes.add(call);
|
||||
VariantContext vc = new VariantContext(name, ((rodGELI) input).getLocation(), alleles, genotypes, geli.getBestToReferenceLod(), null, attributes);
|
||||
vc.validate();
|
||||
return vc;
|
||||
|
||||
}
|
||||
|
|
@ -750,7 +736,7 @@ public class VariantContextAdaptors {
|
|||
|
||||
// add the reference allele
|
||||
HashSet<Allele> alleles = new HashSet<Allele>();
|
||||
Allele refAllele = new Allele(ref.getBase(), true);
|
||||
Allele refAllele = Allele.create(ref.getBase(), true);
|
||||
alleles.add(refAllele);
|
||||
|
||||
// make a mapping from sample to genotype
|
||||
|
|
@ -766,8 +752,8 @@ public class VariantContextAdaptors {
|
|||
String a1 = genotypeStrings[i].substring(0,1);
|
||||
String a2 = genotypeStrings[i].substring(1);
|
||||
|
||||
Allele allele1 = new Allele(a1, refAllele.basesMatch(a1));
|
||||
Allele allele2 = new Allele(a2, refAllele.basesMatch(a2));
|
||||
Allele allele1 = Allele.create(a1, refAllele.basesMatch(a1));
|
||||
Allele allele2 = Allele.create(a2, refAllele.basesMatch(a2));
|
||||
|
||||
ArrayList<Allele> myAlleles = new ArrayList<Allele>(2);
|
||||
myAlleles.add(allele1);
|
||||
|
|
@ -780,7 +766,6 @@ public class VariantContextAdaptors {
|
|||
}
|
||||
|
||||
VariantContext vc = new VariantContext(name, hapmap.getLocation(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, new HashMap<String, String>());
|
||||
vc.validate();
|
||||
return vc;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,10 +14,13 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
|||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
import com.sun.xml.internal.ws.wsdl.parser.ParserUtil;
|
||||
|
||||
/**
|
||||
* a feature codec for the VCF 4 specification. Our aim is to read in the records and convert to VariantContext as
|
||||
* quickly as possible, relying on VariantContext to do the validation of any contradictory (or malformed) record parameters.
|
||||
|
|
@ -29,9 +32,11 @@ public class VCF4Codec implements FeatureCodec {
|
|||
private VCFHeader header = null;
|
||||
|
||||
public VCF4Codec() {
|
||||
throw new StingException("DON'T USE THIS");
|
||||
this(true);
|
||||
//throw new StingException("DON'T USE THIS");
|
||||
}
|
||||
|
||||
// todo -- remove me when done
|
||||
public VCF4Codec(boolean itsOKImTesting) {
|
||||
if ( ! itsOKImTesting )
|
||||
throw new StingException("DON'T USE THIS");
|
||||
|
|
@ -83,8 +88,6 @@ public class VCF4Codec implements FeatureCodec {
|
|||
|
||||
private static Map<String, List<Allele>> alleleMap = new HashMap<String, List<Allele>>(3);
|
||||
|
||||
static long cacheHit = 0, gtParse = 0;
|
||||
|
||||
private static List<Allele> parseGenotypeAlleles(String GT, List<Allele> alleles, Map<String, List<Allele>> cache) {
|
||||
// this should cache results [since they are immutable] and return a single object for each genotype
|
||||
if ( GT.length() != 3 ) throw new StingException("Unreasonable number of alleles"); // 0/1 => barf on 10/0
|
||||
|
|
@ -93,13 +96,6 @@ public class VCF4Codec implements FeatureCodec {
|
|||
GTAlleles = Arrays.asList(oneAllele(GT.charAt(0), alleles), oneAllele(GT.charAt(2), alleles));
|
||||
cache.put(GT, GTAlleles);
|
||||
}
|
||||
// else {
|
||||
// cacheHit++;
|
||||
// }
|
||||
// gtParse++;
|
||||
//
|
||||
// if ( cacheHit % 10000 == 0 )
|
||||
// System.out.printf("Cache hit %d %d %.2f%n", cacheHit, gtParse, (100.0*cacheHit) / gtParse);
|
||||
|
||||
return GTAlleles;
|
||||
}
|
||||
|
|
@ -108,10 +104,11 @@ public class VCF4Codec implements FeatureCodec {
|
|||
Map<String, Object> attributes = new HashMap<String, Object>();
|
||||
|
||||
if ( ! infoField.equals(".") ) { // empty info field
|
||||
for ( String field : infoField.split(";") ) {
|
||||
for ( String field : Utils.split(infoField, ";") ) {
|
||||
String key;
|
||||
Object value;
|
||||
|
||||
int eqI = field.indexOf("=");
|
||||
String key = null;
|
||||
Object value = null;
|
||||
if ( eqI != -1 ) {
|
||||
key = field.substring(0, eqI);
|
||||
value = field.substring(eqI+1, field.length()); // todo -- needs to convert to int, double, etc
|
||||
|
|
@ -124,6 +121,7 @@ public class VCF4Codec implements FeatureCodec {
|
|||
attributes.put(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
attributes.put("ID", id);
|
||||
return attributes;
|
||||
}
|
||||
|
|
@ -156,89 +154,121 @@ public class VCF4Codec implements FeatureCodec {
|
|||
}
|
||||
|
||||
private static Double parseQual(String qualString) {
|
||||
return qualString.equals("-1") ? VariantContext.NO_NEG_LOG_10PERROR : Double.valueOf(qualString) / 10;
|
||||
// todo -- remove double once we deal with annoying VCFs from 1KG
|
||||
return qualString.equals("-1") || qualString.equals("-1.0") ? VariantContext.NO_NEG_LOG_10PERROR : Double.valueOf(qualString) / 10;
|
||||
}
|
||||
|
||||
private List<Allele> parseAlleles(String ref, String alts) {
|
||||
List<Allele> alleles = new ArrayList<Allele>(2); // we are almost always biallelic
|
||||
|
||||
// ref
|
||||
checkAllele(ref);
|
||||
Allele refAllele = Allele.create(ref, true);
|
||||
alleles.add(refAllele);
|
||||
|
||||
if ( alts.indexOf(",") == -1 ) { // only 1 alternatives, don't call string split
|
||||
parse1Allele(alleles, alts);
|
||||
} else {
|
||||
for ( String alt : Utils.split(alts, ",") ) {
|
||||
parse1Allele(alleles, alt);
|
||||
}
|
||||
}
|
||||
|
||||
return alleles;
|
||||
}
|
||||
|
||||
private static void checkAllele(String allele) {
|
||||
if ( ! Allele.acceptableAlleleBases(allele) ) {
|
||||
throw new StingException("Unparsable vcf record with allele " + allele);
|
||||
}
|
||||
}
|
||||
|
||||
private void parse1Allele(List<Allele> alleles, String alt) {
|
||||
checkAllele(alt);
|
||||
|
||||
Allele allele = Allele.create(alt, false);
|
||||
if ( ! allele.isNoCall() )
|
||||
alleles.add(allele);
|
||||
}
|
||||
|
||||
// todo -- check a static map from filter String to HashSets to reuse objects and avoid parsing
|
||||
private Set<String> parseFilters(String filterString) {
|
||||
if ( filterString.equals(".") )
|
||||
return null;
|
||||
else {
|
||||
HashSet<String> s = new HashSet<String>(1);
|
||||
if ( filterString.indexOf(";") == -1 ) {
|
||||
s.add(filterString);
|
||||
} else {
|
||||
s.addAll(Utils.split(filterString, ";"));
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
}
|
||||
|
||||
private String[] GTKeys = new String[100];
|
||||
|
||||
private VariantContext parseVCFLine(String[] parts, int nParts) {
|
||||
// chr5 157273992 rs1211159 C T 0.00 0 . GT 0/0 0/0 0
|
||||
String contig = parts[0];
|
||||
long pos = Long.valueOf(parts[1]);
|
||||
String id = parts[2];
|
||||
String ref = parts[3];
|
||||
String alts = parts[4];
|
||||
String ref = parts[3].toUpperCase();
|
||||
String alts = parts[4].toUpperCase();
|
||||
Double qual = parseQual(parts[5]);
|
||||
String filter = parts[6];
|
||||
String info = parts[7];
|
||||
String GT = parts[8];
|
||||
int genotypesStart = 9;
|
||||
|
||||
// add the reference allele
|
||||
if ( ! Allele.acceptableAlleleBases(ref) ) {
|
||||
System.out.printf("Excluding vcf record %s%n", ref);
|
||||
return null;
|
||||
}
|
||||
|
||||
Set<String> filters = ! filter.equals(".") ? new HashSet<String>(Arrays.asList(filter.split(";"))) : null;
|
||||
List<Allele> alleles = parseAlleles(ref, alts);
|
||||
Set<String> filters = parseFilters(filter);
|
||||
Map<String, Object> attributes = parseInfo(info, id);
|
||||
|
||||
// add all of the alt alleles
|
||||
|
||||
// todo -- use Allele factor method, not new, so we can keep a cache of the alleles since they are always the same
|
||||
List<Allele> alleles = new ArrayList<Allele>(2); // we are almost always biallelic
|
||||
Allele refAllele = new Allele(ref, true);
|
||||
alleles.add(refAllele);
|
||||
|
||||
for ( String alt : alts.split(",") ) {
|
||||
if ( ! Allele.acceptableAlleleBases(alt) ) {
|
||||
//System.out.printf("Excluding vcf record %s%n", vcf);
|
||||
return null;
|
||||
}
|
||||
|
||||
Allele allele = new Allele(alt, false);
|
||||
if ( ! allele.isNoCall() )
|
||||
alleles.add(allele);
|
||||
}
|
||||
|
||||
String[] GTKeys = GT.split(":"); // to performance issue
|
||||
|
||||
Map<String, Genotype> genotypes = new HashMap<String, Genotype>(nParts);
|
||||
// parse genotypes
|
||||
int nGTKeys = ParsingUtils.split(GT, GTKeys, ':');
|
||||
Map<String, Genotype> genotypes = new HashMap<String, Genotype>(Math.max(nParts - genotypesStart, 1));
|
||||
if ( parseGenotypesToo ) {
|
||||
alleleMap.clear();
|
||||
for ( int genotypeOffset = genotypesStart; genotypeOffset < nParts; genotypeOffset++ ) {
|
||||
String sample = parts[genotypeOffset];
|
||||
String[] GTValues = CachedGTValues;
|
||||
ParsingUtils.split(sample, GTValues, ':'); // to performance issue
|
||||
ParsingUtils.split(sample, GTValues, ':');
|
||||
List<Allele> genotypeAlleles = parseGenotypeAlleles(GTValues[0], alleles, alleleMap);
|
||||
double GTQual = VariantContext.NO_NEG_LOG_10PERROR;
|
||||
|
||||
Set<String> genotypeFilters = null;
|
||||
|
||||
// todo -- the parsing of attributes could be made lazy for performance
|
||||
Map<String, String> gtAttributes = null;
|
||||
if ( GTKeys.length > 1 ) {
|
||||
gtAttributes = new HashMap<String, String>(GTKeys.length - 1);
|
||||
for ( int i = 1; i < GTKeys.length; i++ ) {
|
||||
if ( nGTKeys > 1 ) {
|
||||
gtAttributes = new HashMap<String, String>(nGTKeys - 1);
|
||||
for ( int i = 1; i < nGTKeys; i++ ) {
|
||||
if ( GTKeys[i].equals("GQ") ) {
|
||||
GTQual = parseQual(GTValues[i]);
|
||||
} if ( GTKeys[i].equals("FL") ) { // deal with genotype filters here
|
||||
// todo -- get genotype filters working
|
||||
// genotypeFilters = new HashSet<String>();
|
||||
// if ( vcfG.isFiltered() ) // setup the FL genotype filter fields
|
||||
// genotypeFilters.addAll(Arrays.asList(vcfG.getFields().get(VCFGenotypeRecord.GENOTYPE_FILTER_KEY).split(";")));
|
||||
} else {
|
||||
gtAttributes.put(GTKeys[i], GTValues[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Set<String> genotypeFilters = null;
|
||||
// genotypeFilters = new HashSet<String>();
|
||||
// if ( vcfG.isFiltered() ) // setup the FL genotype filter fields
|
||||
// genotypeFilters.addAll(Arrays.asList(vcfG.getFields().get(VCFGenotypeRecord.GENOTYPE_FILTER_KEY).split(";")));
|
||||
|
||||
boolean phased = GTKeys[0].charAt(1) == '|';
|
||||
|
||||
// todo -- actually parse the header to get the sample name
|
||||
Genotype g = new Genotype("X" + genotypeOffset, genotypeAlleles, GTQual, genotypeFilters, gtAttributes, phased);
|
||||
genotypes.put(g.getSampleName(), g);
|
||||
}
|
||||
}
|
||||
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(contig,pos,pos+refAllele.length()-1);
|
||||
// todo -- doesn't work for indels [the whole reason for VCF4]
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(contig,pos,pos + ref.length() - 1);
|
||||
|
||||
// todo -- we need access to our track name to name the variant context
|
||||
VariantContext vc = new VariantContext("foo", loc, alleles, genotypes, qual, filters, attributes);
|
||||
if ( validate ) vc.validate();
|
||||
return vc;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
|||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.contexts.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
|
|
@ -145,7 +146,7 @@ public class BatchedCallsMerger extends LocusWalker<VariantContext, Integer> imp
|
|||
}
|
||||
|
||||
// merge the variant contexts
|
||||
return VariantContext.simpleMerge(calls);
|
||||
return VariantContextUtils.simpleMerge(calls);
|
||||
}
|
||||
|
||||
private AlignmentContext filterForSamples(ReadBackedPileup pileup, Set<String> samples) {
|
||||
|
|
|
|||
|
|
@ -128,8 +128,8 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
|||
|
||||
// set up some variables we'll need in the loop
|
||||
AlleleFrequencyMatrix matrix = AFMatrixMap.get(alt);
|
||||
Allele refAllele = new Allele(ref, true);
|
||||
Allele altAllele = new Allele(alt, false);
|
||||
Allele refAllele = Allele.create(ref, true);
|
||||
Allele altAllele = Allele.create(alt, false);
|
||||
DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(ref);
|
||||
DiploidGenotype hetGenotype = DiploidGenotype.createDiploidGenotype(ref, alt);
|
||||
DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(alt);
|
||||
|
|
|
|||
|
|
@ -379,9 +379,9 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
|||
|
||||
// next, the variant context data (alleles, attributes, etc.)
|
||||
ArrayList<Allele> alleles = new ArrayList<Allele>();
|
||||
alleles.add(new Allele(ref, true));
|
||||
alleles.add(Allele.create(ref, true));
|
||||
if ( bestAFguess != 0 )
|
||||
alleles.add(new Allele(bestAlternateAllele, false));
|
||||
alleles.add(Allele.create(bestAlternateAllele, false));
|
||||
|
||||
// *** note that calculating strand bias involves overwriting data structures, so we do that last
|
||||
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||
|
|
|
|||
|
|
@ -63,12 +63,12 @@ public class SimpleIndelCalculationModel extends GenotypeCalculationModel {
|
|||
List<Allele> alleles = new ArrayList<Allele>(2);
|
||||
|
||||
if ( bestEvent.charAt(0) == '+') {
|
||||
alleles.add( new Allele(Allele.NULL_ALLELE_STRING,true) );
|
||||
alleles.add( new Allele(bestEvent.substring(1), false ));
|
||||
alleles.add( Allele.create(Allele.NULL_ALLELE_STRING,true) );
|
||||
alleles.add( Allele.create(bestEvent.substring(1), false ));
|
||||
} else {
|
||||
if ( bestEvent.charAt(0) == '-' ) {
|
||||
alleles.add( new Allele(Allele.NULL_ALLELE_STRING,false) );
|
||||
alleles.add( new Allele(bestEvent.substring(1), true ));
|
||||
alleles.add( Allele.create(Allele.NULL_ALLELE_STRING,false) );
|
||||
alleles.add( Allele.create(bestEvent.substring(1), true ));
|
||||
loc = GenomeLocParser.setStop(loc, loc.getStop() + bestEvent.length()-2);
|
||||
} else
|
||||
throw new StingException("Internal error (probably a bug): event does not conform to expected format: "+ bestEvent);
|
||||
|
|
|
|||
|
|
@ -53,17 +53,17 @@ public class AlleleUnitTest extends BaseTest {
|
|||
|
||||
@Before
|
||||
public void before() {
|
||||
del = new Allele("-");
|
||||
delRef = new Allele("-", true);
|
||||
del = Allele.create("-");
|
||||
delRef = Allele.create("-", true);
|
||||
|
||||
A = new Allele("A");
|
||||
ARef = new Allele("A", true);
|
||||
T = new Allele("T");
|
||||
A = Allele.create("A");
|
||||
ARef = Allele.create("A", true);
|
||||
T = Allele.create("T");
|
||||
|
||||
ATIns = new Allele("AT");
|
||||
ATCIns = new Allele("ATC");
|
||||
ATIns = Allele.create("AT");
|
||||
ATCIns = Allele.create("ATC");
|
||||
|
||||
NoCall = new Allele(".");
|
||||
NoCall = Allele.create(".");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -126,10 +126,10 @@ public class AlleleUnitTest extends BaseTest {
|
|||
public void testConstructors1() {
|
||||
logger.warn("testConstructors1");
|
||||
|
||||
Allele a1 = new Allele("A");
|
||||
Allele a2 = new Allele("A".getBytes());
|
||||
Allele a3 = new Allele("a");
|
||||
Allele a4 = new Allele("A", true);
|
||||
Allele a1 = Allele.create("A");
|
||||
Allele a2 = Allele.create("A".getBytes());
|
||||
Allele a3 = Allele.create("A");
|
||||
Allele a4 = Allele.create("A", true);
|
||||
|
||||
Assert.assertTrue(a1.equals(a2));
|
||||
Assert.assertTrue(a1.equals(a3));
|
||||
|
|
@ -140,10 +140,10 @@ public class AlleleUnitTest extends BaseTest {
|
|||
public void testDelConstructors() {
|
||||
logger.warn("testDelConstructors");
|
||||
|
||||
Allele a1 = new Allele("-");
|
||||
Allele a2 = new Allele("-".getBytes());
|
||||
Allele a3 = new Allele("");
|
||||
Allele a4 = new Allele("", true);
|
||||
Allele a1 = Allele.create("-");
|
||||
Allele a2 = Allele.create("-".getBytes());
|
||||
Allele a3 = Allele.create("");
|
||||
Allele a4 = Allele.create("", true);
|
||||
|
||||
Assert.assertTrue(a1.equals(a2));
|
||||
Assert.assertTrue(a1.equals(a3));
|
||||
|
|
@ -154,10 +154,10 @@ public class AlleleUnitTest extends BaseTest {
|
|||
public void testInsConstructors() {
|
||||
logger.warn("testInsConstructors");
|
||||
|
||||
Allele a1 = new Allele("AC");
|
||||
Allele a2 = new Allele("AC".getBytes());
|
||||
Allele a3 = new Allele("Ac");
|
||||
Allele a4 = new Allele("AC", true);
|
||||
Allele a1 = Allele.create("AC");
|
||||
Allele a2 = Allele.create("AC".getBytes());
|
||||
Allele a3 = Allele.create("AC");
|
||||
Allele a4 = Allele.create("AC", true);
|
||||
|
||||
Assert.assertTrue(a1.equals(a2));
|
||||
Assert.assertTrue(a1.equals(a3));
|
||||
|
|
@ -188,38 +188,38 @@ public class AlleleUnitTest extends BaseTest {
|
|||
Assert.assertFalse(ATIns.basesMatch("A"));
|
||||
Assert.assertFalse(ATIns.basesMatch("ATC"));
|
||||
|
||||
Assert.assertTrue(ATIns.basesMatch("at"));
|
||||
Assert.assertFalse(ATIns.basesMatch("atc"));
|
||||
Assert.assertTrue(ATIns.basesMatch("AT"));
|
||||
Assert.assertFalse(ATIns.basesMatch("ATC"));
|
||||
}
|
||||
|
||||
@Test (expected = IllegalArgumentException.class)
|
||||
public void testBadConstructorArgs1() {
|
||||
logger.warn("testBadConstructorArgs1");
|
||||
byte[] foo = null;
|
||||
new Allele(foo);
|
||||
Allele.create(foo);
|
||||
}
|
||||
|
||||
@Test (expected = IllegalArgumentException.class)
|
||||
public void testBadConstructorArgs2() {
|
||||
logger.warn("testBadConstructorArgs2");
|
||||
new Allele("x");
|
||||
Allele.create("x");
|
||||
}
|
||||
|
||||
@Test (expected = IllegalArgumentException.class)
|
||||
public void testBadConstructorArgs3() {
|
||||
logger.warn("testBadConstructorArgs3");
|
||||
new Allele("--");
|
||||
Allele.create("--");
|
||||
}
|
||||
|
||||
@Test (expected = IllegalArgumentException.class)
|
||||
public void testBadConstructorArgs4() {
|
||||
logger.warn("testBadConstructorArgs4");
|
||||
new Allele("-A");
|
||||
Allele.create("-A");
|
||||
}
|
||||
|
||||
@Test (expected = IllegalArgumentException.class)
|
||||
public void testBadConstructorArgs5() {
|
||||
logger.warn("testBadConstructorArgs5");
|
||||
new Allele("A A");
|
||||
Allele.create("A A");
|
||||
}
|
||||
}
|
||||
|
|
@ -50,16 +50,16 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
|
||||
@Before
|
||||
public void before() {
|
||||
del = new Allele("-");
|
||||
delRef = new Allele("-", true);
|
||||
del = Allele.create("-");
|
||||
delRef = Allele.create("-", true);
|
||||
|
||||
A = new Allele("A");
|
||||
Aref = new Allele("A", true);
|
||||
T = new Allele("T");
|
||||
Tref = new Allele("T", true);
|
||||
A = Allele.create("A");
|
||||
Aref = Allele.create("A", true);
|
||||
T = Allele.create("T");
|
||||
Tref = Allele.create("T", true);
|
||||
|
||||
ATC = new Allele("ATC");
|
||||
ATCref = new Allele("ATC", true);
|
||||
ATC = Allele.create("ATC");
|
||||
ATCref = Allele.create("ATC", true);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -101,8 +101,8 @@ public class VariantContextUnitTest extends BaseTest {
|
|||
VariantContext vc = new VariantContext("test", snpLoc, alleles);
|
||||
logger.warn("vc = " + vc);
|
||||
|
||||
Assert.assertEquals(vc.getLocation(), snpLoc);
|
||||
Assert.assertEquals(vc.getType(), VariantContext.Type.NO_VARIATION);
|
||||
Assert.assertEquals(snpLoc, vc.getLocation());
|
||||
Assert.assertEquals(VariantContext.Type.NO_VARIATION, vc.getType());
|
||||
Assert.assertFalse(vc.isSNP());
|
||||
Assert.assertFalse(vc.isIndel());
|
||||
Assert.assertFalse(vc.isInsertion());
|
||||
|
|
|
|||
|
|
@ -80,16 +80,16 @@ public class VariantJEXLContextUnitTest extends BaseTest {
|
|||
|
||||
@Before
|
||||
public void before() {
|
||||
del = new Allele("-");
|
||||
delRef = new Allele("-", true);
|
||||
del = Allele.create("-");
|
||||
delRef = Allele.create("-", true);
|
||||
|
||||
A = new Allele("A");
|
||||
Aref = new Allele("A", true);
|
||||
T = new Allele("T");
|
||||
Tref = new Allele("T", true);
|
||||
A = Allele.create("A");
|
||||
Aref = Allele.create("A", true);
|
||||
T = Allele.create("T");
|
||||
Tref = Allele.create("T", true);
|
||||
|
||||
ATC = new Allele("ATC");
|
||||
ATCref = new Allele("ATC", true);
|
||||
ATC = Allele.create("ATC");
|
||||
ATCref = Allele.create("ATC", true);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue