Allowing VCF records without GTs in vf4.1
This commit is contained in:
parent
a2597e7f00
commit
6007eea3ff
|
|
@ -32,6 +32,7 @@ import org.broad.tribble.index.IndexFactory;
|
|||
import org.broad.tribble.util.LittleEndianOutputStream;
|
||||
import org.broad.tribble.util.ParsingUtils;
|
||||
import org.broad.tribble.util.PositionalStream;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
|
|
@ -300,10 +301,7 @@ public class StandardVCFWriter implements VCFWriter {
|
|||
} else {
|
||||
List<String> genotypeAttributeKeys = new ArrayList<String>();
|
||||
if ( vc.hasGenotypes() ) {
|
||||
genotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY);
|
||||
for ( String key : calcVCFGenotypeKeys(vc) ) {
|
||||
genotypeAttributeKeys.add(key);
|
||||
}
|
||||
genotypeAttributeKeys.addAll(calcVCFGenotypeKeys(vc));
|
||||
} else if ( mHeader.hasGenotypingData() ) {
|
||||
// this needs to be done in case all samples are no-calls
|
||||
genotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY);
|
||||
|
|
@ -387,16 +385,22 @@ public class StandardVCFWriter implements VCFWriter {
|
|||
continue;
|
||||
}
|
||||
|
||||
writeAllele(g.getAllele(0), alleleMap);
|
||||
for (int i = 1; i < g.getPloidy(); i++) {
|
||||
mWriter.write(g.isPhased() ? VCFConstants.PHASED : VCFConstants.UNPHASED);
|
||||
writeAllele(g.getAllele(i), alleleMap);
|
||||
}
|
||||
|
||||
List<String> attrs = new ArrayList<String>(genotypeFormatKeys.size());
|
||||
for ( String key : genotypeFormatKeys ) {
|
||||
if ( key.equals(VCFConstants.GENOTYPE_KEY) )
|
||||
|
||||
if ( key.equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||
if ( !g.isAvailable() ) {
|
||||
throw new ReviewedStingException("GTs cannot be missing for some samples if they are available for others in the record");
|
||||
}
|
||||
|
||||
writeAllele(g.getAllele(0), alleleMap);
|
||||
for (int i = 1; i < g.getPloidy(); i++) {
|
||||
mWriter.write(g.isPhased() ? VCFConstants.PHASED : VCFConstants.UNPHASED);
|
||||
writeAllele(g.getAllele(i), alleleMap);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
Object val = g.hasAttribute(key) ? g.getAttribute(key) : VCFConstants.MISSING_VALUE_v4;
|
||||
|
||||
|
|
@ -488,10 +492,13 @@ public class StandardVCFWriter implements VCFWriter {
|
|||
private static List<String> calcVCFGenotypeKeys(VariantContext vc) {
|
||||
Set<String> keys = new HashSet<String>();
|
||||
|
||||
boolean sawGoodGT = false;
|
||||
boolean sawGoodQual = false;
|
||||
boolean sawGenotypeFilter = false;
|
||||
for ( Genotype g : vc.getGenotypes().values() ) {
|
||||
keys.addAll(g.getAttributes().keySet());
|
||||
if ( g.isAvailable() )
|
||||
sawGoodGT = true;
|
||||
if ( g.hasNegLog10PError() )
|
||||
sawGoodQual = true;
|
||||
if (g.isFiltered() && g.isCalled())
|
||||
|
|
@ -504,7 +511,17 @@ public class StandardVCFWriter implements VCFWriter {
|
|||
if (sawGenotypeFilter)
|
||||
keys.add(VCFConstants.GENOTYPE_FILTER_KEY);
|
||||
|
||||
return ParsingUtils.sortList(new ArrayList<String>(keys));
|
||||
List<String> sortedList = ParsingUtils.sortList(new ArrayList<String>(keys));
|
||||
|
||||
// make sure the GT is first
|
||||
if ( sawGoodGT ) {
|
||||
List<String> newList = new ArrayList<String>(sortedList.size()+1);
|
||||
newList.add(VCFConstants.GENOTYPE_KEY);
|
||||
newList.addAll(sortedList);
|
||||
sortedList = newList;
|
||||
}
|
||||
|
||||
return sortedList;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -141,8 +141,6 @@ public class VCF3Codec extends AbstractVCFCodec {
|
|||
boolean missing = i >= GTValueSplitSize;
|
||||
|
||||
if (gtKey.equals(VCFConstants.GENOTYPE_KEY)) {
|
||||
if (i != 0)
|
||||
generateException("Saw GT at position " + i + ", but it must be at the first position for genotypes");
|
||||
genotypeAlleleLocation = i;
|
||||
} else if (gtKey.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) {
|
||||
GTQual = missing ? parseQual(VCFConstants.MISSING_VALUE_v4) : parseQual(GTValueArray[i]);
|
||||
|
|
@ -156,12 +154,13 @@ public class VCF3Codec extends AbstractVCFCodec {
|
|||
}
|
||||
}
|
||||
|
||||
// check to make sure we found a gentoype field
|
||||
if (genotypeAlleleLocation < 0) generateException("Unable to find required field GT for the record; we don't yet support a missing GT field");
|
||||
// check to make sure we found a genotype field
|
||||
if ( genotypeAlleleLocation < 0 )
|
||||
generateException("Unable to find the GT field for the record; the GT field is required");
|
||||
if ( genotypeAlleleLocation > 0 )
|
||||
generateException("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes");
|
||||
|
||||
// todo -- assuming allele list length in the single digits is bad. Fix me.
|
||||
// Check for > 1 for haploid genotypes
|
||||
boolean phased = GTValueArray[genotypeAlleleLocation].length() > 1 && GTValueArray[genotypeAlleleLocation].charAt(1) == '|';
|
||||
boolean phased = GTValueArray[genotypeAlleleLocation].indexOf(VCFConstants.PHASED) != -1;
|
||||
|
||||
// add it to the list
|
||||
try {
|
||||
|
|
|
|||
|
|
@ -145,8 +145,6 @@ public class VCFCodec extends AbstractVCFCodec {
|
|||
|
||||
// todo -- all of these on the fly parsing of the missing value should be static constants
|
||||
if (gtKey.equals(VCFConstants.GENOTYPE_KEY)) {
|
||||
if (i != 0)
|
||||
generateException("Saw GT at position " + i + ", but it must be at the first position for genotypes");
|
||||
genotypeAlleleLocation = i;
|
||||
} else if (gtKey.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) {
|
||||
GTQual = missing ? parseQual(VCFConstants.MISSING_VALUE_v4) : parseQual(GTValueArray[i]);
|
||||
|
|
@ -160,22 +158,24 @@ public class VCFCodec extends AbstractVCFCodec {
|
|||
}
|
||||
}
|
||||
|
||||
// check to make sure we found a gentoype field
|
||||
// TODO -- This is no longer required in v4.1
|
||||
if (genotypeAlleleLocation < 0) generateException("Unable to find required field GT for the record; we don't yet support a missing GT field");
|
||||
// check to make sure we found a genotype field if we are a VCF4.0 file
|
||||
if ( version == VCFHeaderVersion.VCF4_0 && genotypeAlleleLocation == -1 )
|
||||
generateException("Unable to find the GT field for the record; the GT field is required in VCF4.0");
|
||||
if ( genotypeAlleleLocation > 0 )
|
||||
generateException("Saw GT field at position " + genotypeAlleleLocation + ", but it must be at the first position for genotypes when present");
|
||||
|
||||
// todo -- assuming allele list length in the single digits is bad. Fix me.
|
||||
// Check for > 1 for haploid genotypes
|
||||
boolean phased = GTValueArray[genotypeAlleleLocation].length() > 1 && GTValueArray[genotypeAlleleLocation].charAt(1) == '|';
|
||||
List<Allele> GTalleles = (genotypeAlleleLocation == -1 ? null : parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap));
|
||||
boolean phased = genotypeAlleleLocation != -1 && GTValueArray[genotypeAlleleLocation].indexOf(VCFConstants.PHASED) != -1;
|
||||
|
||||
// add it to the list
|
||||
try {
|
||||
genotypes.put(sampleName, new Genotype(sampleName,
|
||||
parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap),
|
||||
GTQual,
|
||||
genotypeFilters,
|
||||
gtAttributes,
|
||||
phased));
|
||||
genotypes.put(sampleName,
|
||||
new Genotype(sampleName,
|
||||
GTalleles,
|
||||
GTQual,
|
||||
genotypeFilters,
|
||||
gtAttributes,
|
||||
phased));
|
||||
} catch (TribbleException e) {
|
||||
throw new TribbleException.InternalCodecException(e.getMessage() + ", at position " + chr+":"+pos);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils.variantcontext;
|
|||
|
||||
import org.broad.tribble.util.ParsingUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -19,12 +20,14 @@ public class Genotype {
|
|||
protected InferredGeneticContext commonInfo;
|
||||
public final static double NO_NEG_LOG_10PERROR = InferredGeneticContext.NO_NEG_LOG_10PERROR;
|
||||
protected List<Allele> alleles = null; // new ArrayList<Allele>();
|
||||
protected Type type = null;
|
||||
|
||||
protected boolean isPhased = false;
|
||||
private boolean filtersWereAppliedToContext;
|
||||
protected boolean filtersWereAppliedToContext;
|
||||
|
||||
public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, boolean isPhased) {
|
||||
this.alleles = Collections.unmodifiableList(alleles);
|
||||
if ( alleles != null )
|
||||
this.alleles = Collections.unmodifiableList(alleles);
|
||||
commonInfo = new InferredGeneticContext(sampleName, negLog10PError, filters, attributes);
|
||||
filtersWereAppliedToContext = filters != null;
|
||||
this.isPhased = isPhased;
|
||||
|
|
@ -66,6 +69,9 @@ public class Genotype {
|
|||
}
|
||||
|
||||
public List<Allele> getAlleles(Allele allele) {
|
||||
if ( getType() == Type.UNAVAILABLE )
|
||||
throw new ReviewedStingException("Requesting alleles for an UNAVAILABLE genotype");
|
||||
|
||||
List<Allele> al = new ArrayList<Allele>();
|
||||
for ( Allele a : alleles )
|
||||
if ( a.equals(allele) )
|
||||
|
|
@ -75,6 +81,8 @@ public class Genotype {
|
|||
}
|
||||
|
||||
public Allele getAllele(int i) {
|
||||
if ( getType() == Type.UNAVAILABLE )
|
||||
throw new ReviewedStingException("Requesting alleles for an UNAVAILABLE genotype");
|
||||
return alleles.get(i);
|
||||
}
|
||||
|
||||
|
|
@ -89,10 +97,21 @@ public class Genotype {
|
|||
NO_CALL,
|
||||
HOM_REF,
|
||||
HET,
|
||||
HOM_VAR
|
||||
HOM_VAR,
|
||||
UNAVAILABLE
|
||||
}
|
||||
|
||||
public Type getType() {
|
||||
if ( type == null ) {
|
||||
type = determineType();
|
||||
}
|
||||
return type;
|
||||
}
|
||||
|
||||
protected Type determineType() {
|
||||
if ( alleles == null )
|
||||
return Type.UNAVAILABLE;
|
||||
|
||||
Allele firstAllele = alleles.get(0);
|
||||
|
||||
if ( firstAllele.isNoCall() ) {
|
||||
|
|
@ -122,7 +141,8 @@ public class Genotype {
|
|||
* @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF)
|
||||
*/
|
||||
public boolean isNoCall() { return getType() == Type.NO_CALL; }
|
||||
public boolean isCalled() { return getType() != Type.NO_CALL; }
|
||||
public boolean isCalled() { return getType() != Type.NO_CALL && getType() != Type.UNAVAILABLE; }
|
||||
public boolean isAvailable() { return getType() != Type.UNAVAILABLE; }
|
||||
|
||||
//
|
||||
// Useful methods for getting genotype likelihoods for a genotype object, if present
|
||||
|
|
@ -157,8 +177,8 @@ public class Genotype {
|
|||
}
|
||||
|
||||
public void validate() {
|
||||
if ( alleles == null ) throw new IllegalArgumentException("BUG: alleles cannot be null in setAlleles");
|
||||
if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0 in setAlleles");
|
||||
if ( alleles == null ) return;
|
||||
if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0");
|
||||
|
||||
int nNoCalls = 0;
|
||||
for ( Allele allele : alleles ) {
|
||||
|
|
@ -175,6 +195,9 @@ public class Genotype {
|
|||
}
|
||||
|
||||
public String getGenotypeString(boolean ignoreRefState) {
|
||||
if ( alleles == null )
|
||||
return null;
|
||||
|
||||
// Notes:
|
||||
// 1. Make sure to use the appropriate separator depending on whether the genotype is phased
|
||||
// 2. If ignoreRefState is true, then we want just the bases of the Alleles (ignoring the '*' indicating a ref Allele)
|
||||
|
|
|
|||
|
|
@ -1206,9 +1206,11 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
|
||||
if ( ! name.equals(g.getSampleName()) ) throw new IllegalStateException("Bound sample name " + name + " does not equal the name of the genotype " + g.getSampleName());
|
||||
|
||||
for ( Allele gAllele : g.getAlleles() ) {
|
||||
if ( ! hasAllele(gAllele) && gAllele.isCalled() )
|
||||
throw new IllegalStateException("Allele in genotype " + gAllele + " not in the variant context " + alleles);
|
||||
if ( g.isAvailable() ) {
|
||||
for ( Allele gAllele : g.getAlleles() ) {
|
||||
if ( ! hasAllele(gAllele) && gAllele.isCalled() )
|
||||
throw new IllegalStateException("Allele in genotype " + gAllele + " not in the variant context " + alleles);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue