Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
c94c8a9c09
|
|
@ -116,6 +116,7 @@ public class VCFDiffableReader implements DiffableReader {
|
||||||
if ( g.hasDP() ) gRoot.add("DP", g.getDP() );
|
if ( g.hasDP() ) gRoot.add("DP", g.getDP() );
|
||||||
if ( g.hasAD() ) gRoot.add("AD", Utils.join(",", g.getAD()));
|
if ( g.hasAD() ) gRoot.add("AD", Utils.join(",", g.getAD()));
|
||||||
if ( g.hasPL() ) gRoot.add("PL", Utils.join(",", g.getPL()));
|
if ( g.hasPL() ) gRoot.add("PL", Utils.join(",", g.getPL()));
|
||||||
|
if ( g.getFilters() != null ) gRoot.add("FT", g.getFilters());
|
||||||
|
|
||||||
for (Map.Entry<String, Object> attribute : g.getExtendedAttributes().entrySet()) {
|
for (Map.Entry<String, Object> attribute : g.getExtendedAttributes().entrySet()) {
|
||||||
if ( ! attribute.getKey().startsWith("_") )
|
if ( ! attribute.getKey().startsWith("_") )
|
||||||
|
|
|
||||||
|
|
@ -297,7 +297,8 @@ public class VariantFiltrationWalker extends RodWalker<Integer, Integer> {
|
||||||
// for each genotype, check filters then create a new object
|
// for each genotype, check filters then create a new object
|
||||||
for ( final Genotype g : vc.getGenotypes() ) {
|
for ( final Genotype g : vc.getGenotypes() ) {
|
||||||
if ( g.isCalled() ) {
|
if ( g.isCalled() ) {
|
||||||
List<String> filters = new ArrayList<String>(g.getFilters());
|
final List<String> filters = new ArrayList<String>();
|
||||||
|
if ( g.isFiltered() ) filters.add(g.getFilters());
|
||||||
|
|
||||||
for ( VariantContextUtils.JexlVCMatchExp exp : genotypeFilterExps ) {
|
for ( VariantContextUtils.JexlVCMatchExp exp : genotypeFilterExps ) {
|
||||||
if ( VariantContextUtils.match(vc, g, exp) )
|
if ( VariantContextUtils.match(vc, g, exp) )
|
||||||
|
|
|
||||||
|
|
@ -186,7 +186,10 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
final int hsize = ref.getWindow().size() - Math.abs(eventLength) - 1;
|
final int hsize = ref.getWindow().size() - Math.abs(eventLength) - 1;
|
||||||
final int numPrefBases = ref.getLocus().getStart() - ref.getWindow().getStart() + 1;
|
final int numPrefBases = ref.getLocus().getStart() - ref.getWindow().getStart() + 1;
|
||||||
|
|
||||||
haplotypeMap.putAll(Haplotype.makeHaplotypeListFromAlleles(alleleList, loc.getStart(),
|
if (hsize <= 0) // protect against event lengths larger than ref window sizes
|
||||||
|
haplotypeMap.clear();
|
||||||
|
else
|
||||||
|
haplotypeMap.putAll(Haplotype.makeHaplotypeListFromAlleles(alleleList, loc.getStart(),
|
||||||
ref, hsize, numPrefBases));
|
ref, hsize, numPrefBases));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -37,6 +37,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFAlleleClipper;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
@ -188,7 +189,7 @@ public class UnifiedGenotyperEngine {
|
||||||
else {
|
else {
|
||||||
final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model);
|
final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model);
|
||||||
if ( vc != null )
|
if ( vc != null )
|
||||||
results.add(calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model));
|
results.add(calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, true));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -309,6 +310,22 @@ public class UnifiedGenotyperEngine {
|
||||||
}
|
}
|
||||||
|
|
||||||
public VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
|
public VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||||
|
return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main entry function to calculate genotypes of a given VC with corresponding GL's
|
||||||
|
* @param tracker Tracker
|
||||||
|
* @param refContext Reference context
|
||||||
|
* @param rawContext Raw context
|
||||||
|
* @param stratifiedContexts Stratified alignment contexts
|
||||||
|
* @param vc Input VC
|
||||||
|
* @param model GL calculation model
|
||||||
|
* @param inheritAttributesFromInputVC Output VC will contain attributes inherited from input vc
|
||||||
|
* @return VC with assigned genotypes
|
||||||
|
*/
|
||||||
|
public VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model,
|
||||||
|
final boolean inheritAttributesFromInputVC) {
|
||||||
|
|
||||||
boolean limitedContext = tracker == null || refContext == null || rawContext == null || stratifiedContexts == null;
|
boolean limitedContext = tracker == null || refContext == null || rawContext == null || stratifiedContexts == null;
|
||||||
|
|
||||||
|
|
@ -408,6 +425,9 @@ public class UnifiedGenotyperEngine {
|
||||||
// *** note that calculating strand bias involves overwriting data structures, so we do that last
|
// *** note that calculating strand bias involves overwriting data structures, so we do that last
|
||||||
final HashMap<String, Object> attributes = new HashMap<String, Object>();
|
final HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||||
|
|
||||||
|
// inherit attributed from input vc if requested
|
||||||
|
if (inheritAttributesFromInputVC)
|
||||||
|
attributes.putAll(vc.getAttributes());
|
||||||
// if the site was downsampled, record that fact
|
// if the site was downsampled, record that fact
|
||||||
if ( !limitedContext && rawContext.hasPileupBeenDownsampled() )
|
if ( !limitedContext && rawContext.hasPileupBeenDownsampled() )
|
||||||
attributes.put(VCFConstants.DOWNSAMPLED_KEY, true);
|
attributes.put(VCFConstants.DOWNSAMPLED_KEY, true);
|
||||||
|
|
@ -474,7 +494,7 @@ public class UnifiedGenotyperEngine {
|
||||||
// if we are subsetting alleles (either because there were too many or because some were not polymorphic)
|
// if we are subsetting alleles (either because there were too many or because some were not polymorphic)
|
||||||
// then we may need to trim the alleles (because the original VariantContext may have had to pad at the end).
|
// then we may need to trim the alleles (because the original VariantContext may have had to pad at the end).
|
||||||
if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) // TODO - this function doesn't work with mixed records or records that started as mixed and then became non-mixed
|
if ( myAlleles.size() != vc.getAlleles().size() && !limitedContext ) // TODO - this function doesn't work with mixed records or records that started as mixed and then became non-mixed
|
||||||
vcCall = VariantContextUtils.reverseTrimAlleles(vcCall);
|
vcCall = VCFAlleleClipper.reverseTrimAlleles(vcCall);
|
||||||
|
|
||||||
if ( annotationEngine != null && !limitedContext ) {
|
if ( annotationEngine != null && !limitedContext ) {
|
||||||
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
|
// Note: we want to use the *unfiltered* and *unBAQed* context for the annotations
|
||||||
|
|
@ -622,6 +642,9 @@ public class UnifiedGenotyperEngine {
|
||||||
final AlignmentContext rawContext) {
|
final AlignmentContext rawContext) {
|
||||||
|
|
||||||
final List<GenotypeLikelihoodsCalculationModel.Model> models = new ArrayList<GenotypeLikelihoodsCalculationModel.Model>(2);
|
final List<GenotypeLikelihoodsCalculationModel.Model> models = new ArrayList<GenotypeLikelihoodsCalculationModel.Model>(2);
|
||||||
|
String modelPrefix = "";
|
||||||
|
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") )
|
||||||
|
modelPrefix = UAC.GLmodel.name().toUpperCase().replaceAll("BOTH","");
|
||||||
|
|
||||||
// if we're genotyping given alleles and we have a requested SNP at this position, do SNP
|
// if we're genotyping given alleles and we have a requested SNP at this position, do SNP
|
||||||
if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
||||||
|
|
@ -631,24 +654,24 @@ public class UnifiedGenotyperEngine {
|
||||||
|
|
||||||
if ( vcInput.isSNP() ) {
|
if ( vcInput.isSNP() ) {
|
||||||
// ignore SNPs if the user chose INDEL mode only
|
// ignore SNPs if the user chose INDEL mode only
|
||||||
if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH )
|
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") )
|
||||||
models.add(GenotypeLikelihoodsCalculationModel.Model.SNP);
|
models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"SNP"));
|
||||||
else if ( UAC.GLmodel.name().toUpperCase().contains("SNP") )
|
else if ( UAC.GLmodel.name().toUpperCase().contains("SNP") )
|
||||||
models.add(UAC.GLmodel);
|
models.add(UAC.GLmodel);
|
||||||
}
|
}
|
||||||
else if ( vcInput.isIndel() || vcInput.isMixed() ) {
|
else if ( vcInput.isIndel() || vcInput.isMixed() ) {
|
||||||
// ignore INDELs if the user chose SNP mode only
|
// ignore INDELs if the user chose SNP mode only
|
||||||
if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH )
|
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") )
|
||||||
models.add(GenotypeLikelihoodsCalculationModel.Model.INDEL);
|
models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL"));
|
||||||
else if (UAC.GLmodel.name().toUpperCase().contains("INDEL"))
|
else if (UAC.GLmodel.name().toUpperCase().contains("INDEL"))
|
||||||
models.add(UAC.GLmodel);
|
models.add(UAC.GLmodel);
|
||||||
}
|
}
|
||||||
// No support for other types yet
|
// No support for other types yet
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.BOTH ) {
|
if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") ) {
|
||||||
models.add(GenotypeLikelihoodsCalculationModel.Model.SNP);
|
models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"SNP"));
|
||||||
models.add(GenotypeLikelihoodsCalculationModel.Model.INDEL);
|
models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL"));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
models.add(UAC.GLmodel);
|
models.add(UAC.GLmodel);
|
||||||
|
|
|
||||||
|
|
@ -298,7 +298,7 @@ public class VariantDataManager {
|
||||||
attributes.put(VariantRecalibrator.VQS_LOD_KEY, String.format("%.4f", datum.lod));
|
attributes.put(VariantRecalibrator.VQS_LOD_KEY, String.format("%.4f", datum.lod));
|
||||||
attributes.put(VariantRecalibrator.CULPRIT_KEY, (datum.worstAnnotation != -1 ? annotationKeys.get(datum.worstAnnotation) : "NULL"));
|
attributes.put(VariantRecalibrator.CULPRIT_KEY, (datum.worstAnnotation != -1 ? annotationKeys.get(datum.worstAnnotation) : "NULL"));
|
||||||
|
|
||||||
VariantContextBuilder builder = new VariantContextBuilder("VQSR", datum.loc.getContig(), datum.loc.getStart(), datum.loc.getStart(), alleles).attributes(attributes);
|
VariantContextBuilder builder = new VariantContextBuilder("VQSR", datum.loc.getContig(), datum.loc.getStart(), datum.loc.getStop(), alleles).attributes(attributes);
|
||||||
recalWriter.add(builder.make());
|
recalWriter.add(builder.make());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -129,7 +129,7 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
|
||||||
.attribute("OriginalStart", fromInterval.getStart()).make();
|
.attribute("OriginalStart", fromInterval.getStart()).make();
|
||||||
}
|
}
|
||||||
|
|
||||||
VariantContext newVC = VariantContextUtils.createVariantContextWithPaddedAlleles(vc);
|
VariantContext newVC = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc);
|
||||||
if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(newVC) ) {
|
if ( originalVC.isSNP() && originalVC.isBiallelic() && VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(newVC) ) {
|
||||||
logger.warn(String.format("VCF at %s / %d => %s / %d is switching substitution type %s/%s to %s/%s",
|
logger.warn(String.format("VCF at %s / %d => %s / %d is switching substitution type %s/%s to %s/%s",
|
||||||
originalVC.getChr(), originalVC.getStart(), newVC.getChr(), newVC.getStart(),
|
originalVC.getChr(), originalVC.getStart(), newVC.getChr(), newVC.getStart(),
|
||||||
|
|
|
||||||
|
|
@ -51,6 +51,8 @@ import java.util.*;
|
||||||
*/
|
*/
|
||||||
public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDependentFeatureCodec {
|
public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDependentFeatureCodec {
|
||||||
final protected static Logger logger = Logger.getLogger(BCF2Codec.class);
|
final protected static Logger logger = Logger.getLogger(BCF2Codec.class);
|
||||||
|
private final static boolean FORBID_SYMBOLICS = false;
|
||||||
|
|
||||||
private VCFHeader header = null;
|
private VCFHeader header = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -270,7 +272,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
|
||||||
" samples in header but have a record with " + nSamples + " samples");
|
" samples in header but have a record with " + nSamples + " samples");
|
||||||
|
|
||||||
decodeID(builder);
|
decodeID(builder);
|
||||||
final ArrayList<Allele> alleles = decodeAlleles(builder, pos, nAlleles);
|
final List<Allele> alleles = decodeAlleles(builder, pos, nAlleles);
|
||||||
decodeFilter(builder);
|
decodeFilter(builder);
|
||||||
decodeInfo(builder, nInfo);
|
decodeInfo(builder, nInfo);
|
||||||
|
|
||||||
|
|
@ -283,9 +285,9 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
|
||||||
protected final static class SitesInfoForDecoding {
|
protected final static class SitesInfoForDecoding {
|
||||||
final int nFormatFields;
|
final int nFormatFields;
|
||||||
final int nSamples;
|
final int nSamples;
|
||||||
final ArrayList<Allele> alleles;
|
final List<Allele> alleles;
|
||||||
|
|
||||||
private SitesInfoForDecoding(final int nFormatFields, final int nSamples, final ArrayList<Allele> alleles) {
|
private SitesInfoForDecoding(final int nFormatFields, final int nSamples, final List<Allele> alleles) {
|
||||||
this.nFormatFields = nFormatFields;
|
this.nFormatFields = nFormatFields;
|
||||||
this.nSamples = nSamples;
|
this.nSamples = nSamples;
|
||||||
this.alleles = alleles;
|
this.alleles = alleles;
|
||||||
|
|
@ -325,13 +327,16 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
|
||||||
* @param unclippedAlleles
|
* @param unclippedAlleles
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
protected static ArrayList<Allele> clipAllelesIfNecessary(int position, String ref, ArrayList<Allele> unclippedAlleles) {
|
@Requires({"position > 0", "ref != null && ref.length() > 0", "! unclippedAlleles.isEmpty()"})
|
||||||
if ( ! AbstractVCFCodec.isSingleNucleotideEvent(unclippedAlleles) ) {
|
@Ensures("result.size() == unclippedAlleles.size()")
|
||||||
final ArrayList<Allele> clippedAlleles = new ArrayList<Allele>(unclippedAlleles.size());
|
protected List<Allele> clipAllelesIfNecessary(final int position,
|
||||||
AbstractVCFCodec.clipAlleles(position, ref, unclippedAlleles, clippedAlleles, -1);
|
final String ref,
|
||||||
return clippedAlleles;
|
final List<Allele> unclippedAlleles) {
|
||||||
} else
|
// the last argument of 1 allows us to safely ignore the end, because we are
|
||||||
return unclippedAlleles;
|
// ultimately going to use the end in the record itself
|
||||||
|
final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(position, ref, unclippedAlleles, 1);
|
||||||
|
if ( clipped.getError() != null ) error(clipped.getError());
|
||||||
|
return clipped.getClippedAlleles();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -342,9 +347,9 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
|
||||||
* @return the alleles
|
* @return the alleles
|
||||||
*/
|
*/
|
||||||
@Requires("nAlleles > 0")
|
@Requires("nAlleles > 0")
|
||||||
private ArrayList<Allele> decodeAlleles( final VariantContextBuilder builder, final int pos, final int nAlleles ) {
|
private List<Allele> decodeAlleles( final VariantContextBuilder builder, final int pos, final int nAlleles ) {
|
||||||
// TODO -- probably need inline decoder for efficiency here (no sense in going bytes -> string -> vector -> bytes
|
// TODO -- probably need inline decoder for efficiency here (no sense in going bytes -> string -> vector -> bytes
|
||||||
ArrayList<Allele> alleles = new ArrayList<Allele>(nAlleles);
|
List<Allele> alleles = new ArrayList<Allele>(nAlleles);
|
||||||
String ref = null;
|
String ref = null;
|
||||||
|
|
||||||
for ( int i = 0; i < nAlleles; i++ ) {
|
for ( int i = 0; i < nAlleles; i++ ) {
|
||||||
|
|
@ -356,7 +361,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
|
||||||
|
|
||||||
alleles.add(allele);
|
alleles.add(allele);
|
||||||
|
|
||||||
if ( allele.isSymbolic() )
|
if ( FORBID_SYMBOLICS && allele.isSymbolic() )
|
||||||
throw new ReviewedStingException("LIMITATION: GATK BCF2 codec does not yet support symbolic alleles");
|
throw new ReviewedStingException("LIMITATION: GATK BCF2 codec does not yet support symbolic alleles");
|
||||||
}
|
}
|
||||||
assert ref != null;
|
assert ref != null;
|
||||||
|
|
|
||||||
|
|
@ -276,9 +276,8 @@ public class BCF2GenotypeFieldDecoders {
|
||||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
|
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
|
||||||
for ( final GenotypeBuilder gb : gbs ) {
|
for ( final GenotypeBuilder gb : gbs ) {
|
||||||
Object value = decoder.decodeTypedValue(typeDescriptor, numElements);
|
Object value = decoder.decodeTypedValue(typeDescriptor, numElements);
|
||||||
if ( value != null ) { // don't add missing values
|
assert value == null || value instanceof String;
|
||||||
gb.filters(value instanceof String ? Collections.singletonList((String)value) : (List<String>)value);
|
gb.filter((String)value);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -44,13 +44,13 @@ class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser {
|
||||||
// initialized when this lazy decoder is created, as we know all of this from the BCF2Codec
|
// initialized when this lazy decoder is created, as we know all of this from the BCF2Codec
|
||||||
// and its stored here again for code cleanliness
|
// and its stored here again for code cleanliness
|
||||||
private final BCF2Codec codec;
|
private final BCF2Codec codec;
|
||||||
private final ArrayList<Allele> siteAlleles;
|
private final List<Allele> siteAlleles;
|
||||||
private final int nSamples;
|
private final int nSamples;
|
||||||
private final int nFields;
|
private final int nFields;
|
||||||
private final GenotypeBuilder[] builders;
|
private final GenotypeBuilder[] builders;
|
||||||
|
|
||||||
@Requires("codec.getHeader().getNGenotypeSamples() == builders.length")
|
@Requires("codec.getHeader().getNGenotypeSamples() == builders.length")
|
||||||
BCF2LazyGenotypesDecoder(final BCF2Codec codec, final ArrayList<Allele> alleles, final int nSamples,
|
BCF2LazyGenotypesDecoder(final BCF2Codec codec, final List<Allele> alleles, final int nSamples,
|
||||||
final int nFields, final GenotypeBuilder[] builders) {
|
final int nFields, final GenotypeBuilder[] builders) {
|
||||||
this.codec = codec;
|
this.codec = codec;
|
||||||
this.siteAlleles = alleles;
|
this.siteAlleles = alleles;
|
||||||
|
|
|
||||||
|
|
@ -186,81 +186,19 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
||||||
* @return a feature, (not guaranteed complete) that has the correct start and stop
|
* @return a feature, (not guaranteed complete) that has the correct start and stop
|
||||||
*/
|
*/
|
||||||
public Feature decodeLoc(String line) {
|
public Feature decodeLoc(String line) {
|
||||||
|
return decodeLine(line, false);
|
||||||
// the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line
|
|
||||||
if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null;
|
|
||||||
|
|
||||||
// our header cannot be null, we need the genotype sample names and counts
|
|
||||||
if (header == null) throw new ReviewedStingException("VCF Header cannot be null when decoding a record");
|
|
||||||
|
|
||||||
final int nParts = ParsingUtils.split(line, locParts, VCFConstants.FIELD_SEPARATOR_CHAR, true);
|
|
||||||
|
|
||||||
if ( nParts != 6 )
|
|
||||||
throw new UserException.MalformedVCF("there aren't enough columns for line " + line, lineNo);
|
|
||||||
|
|
||||||
// get our alleles (because the end position depends on them)
|
|
||||||
final String ref = getCachedString(locParts[3].toUpperCase());
|
|
||||||
final String alts = getCachedString(locParts[4].toUpperCase());
|
|
||||||
final List<Allele> alleles = parseAlleles(ref, alts, lineNo);
|
|
||||||
|
|
||||||
// find out our location
|
|
||||||
int start = 0;
|
|
||||||
try {
|
|
||||||
start = Integer.valueOf(locParts[1]);
|
|
||||||
} catch (Exception e) {
|
|
||||||
generateException("the value in the POS field must be an integer but it was " + locParts[1], lineNo);
|
|
||||||
}
|
|
||||||
int stop = start;
|
|
||||||
|
|
||||||
// ref alleles don't need to be single bases for monomorphic sites
|
|
||||||
if ( alleles.size() == 1 ) {
|
|
||||||
stop = start + alleles.get(0).length() - 1;
|
|
||||||
}
|
|
||||||
// we need to parse the INFO field to check for an END tag if it's a symbolic allele
|
|
||||||
else if ( alleles.size() == 2 && alleles.get(1).isSymbolic() ) {
|
|
||||||
final String[] extraParts = new String[4];
|
|
||||||
final int nExtraParts = ParsingUtils.split(locParts[5], extraParts, VCFConstants.FIELD_SEPARATOR_CHAR, true);
|
|
||||||
if ( nExtraParts < 3 )
|
|
||||||
throw new UserException.MalformedVCF("there aren't enough columns for line " + line, lineNo);
|
|
||||||
|
|
||||||
final Map<String, Object> attrs = parseInfo(extraParts[2]);
|
|
||||||
try {
|
|
||||||
stop = attrs.containsKey(VCFConstants.END_KEY) ? Integer.valueOf(attrs.get(VCFConstants.END_KEY).toString()) : start;
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new UserException.MalformedVCF("the END value in the INFO field is not valid for line " + line, lineNo);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// handle multi-positional events
|
|
||||||
else if ( !isSingleNucleotideEvent(alleles) ) {
|
|
||||||
stop = clipAlleles(start, ref, alleles, null, lineNo);
|
|
||||||
}
|
|
||||||
|
|
||||||
return new VCFLocFeature(locParts[0], start, stop);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private final static class VCFLocFeature implements Feature {
|
|
||||||
|
|
||||||
final String chr;
|
|
||||||
final int start, stop;
|
|
||||||
|
|
||||||
private VCFLocFeature(String chr, int start, int stop) {
|
|
||||||
this.chr = chr;
|
|
||||||
this.start = start;
|
|
||||||
this.stop = stop;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getChr() { return chr; }
|
|
||||||
public int getStart() { return start; }
|
|
||||||
public int getEnd() { return stop; }
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* decode the line into a feature (VariantContext)
|
* decode the line into a feature (VariantContext)
|
||||||
* @param line the line
|
* @param line the line
|
||||||
* @return a VariantContext
|
* @return a VariantContext
|
||||||
*/
|
*/
|
||||||
public VariantContext decode(String line) {
|
public VariantContext decode(String line) {
|
||||||
|
return decodeLine(line, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
private final VariantContext decodeLine(final String line, final boolean includeGenotypes) {
|
||||||
// the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line
|
// the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line
|
||||||
if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null;
|
if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null;
|
||||||
|
|
||||||
|
|
@ -278,15 +216,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
||||||
throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) +
|
throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) +
|
||||||
" tokens, and saw " + nParts + " )", lineNo);
|
" tokens, and saw " + nParts + " )", lineNo);
|
||||||
|
|
||||||
return parseVCFLine(parts);
|
return parseVCFLine(parts, includeGenotypes);
|
||||||
}
|
|
||||||
|
|
||||||
protected void generateException(String message) {
|
|
||||||
throw new UserException.MalformedVCF(message, lineNo);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected static void generateException(String message, int lineNo) {
|
|
||||||
throw new UserException.MalformedVCF(message, lineNo);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -295,7 +225,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
||||||
* @param parts the parts split up
|
* @param parts the parts split up
|
||||||
* @return a variant context object
|
* @return a variant context object
|
||||||
*/
|
*/
|
||||||
private VariantContext parseVCFLine(String[] parts) {
|
private VariantContext parseVCFLine(final String[] parts, final boolean includeGenotypes) {
|
||||||
VariantContextBuilder builder = new VariantContextBuilder();
|
VariantContextBuilder builder = new VariantContextBuilder();
|
||||||
builder.source(getName());
|
builder.source(getName());
|
||||||
|
|
||||||
|
|
@ -317,8 +247,8 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
||||||
else
|
else
|
||||||
builder.id(parts[2]);
|
builder.id(parts[2]);
|
||||||
|
|
||||||
String ref = getCachedString(parts[3].toUpperCase());
|
final String ref = getCachedString(parts[3].toUpperCase());
|
||||||
String alts = getCachedString(parts[4].toUpperCase());
|
final String alts = getCachedString(parts[4].toUpperCase());
|
||||||
builder.log10PError(parseQual(parts[5]));
|
builder.log10PError(parseQual(parts[5]));
|
||||||
|
|
||||||
final List<String> filters = parseFilters(getCachedString(parts[6]));
|
final List<String> filters = parseFilters(getCachedString(parts[6]));
|
||||||
|
|
@ -327,31 +257,11 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
||||||
builder.attributes(attrs);
|
builder.attributes(attrs);
|
||||||
|
|
||||||
// get our alleles, filters, and setup an attribute map
|
// get our alleles, filters, and setup an attribute map
|
||||||
List<Allele> alleles = parseAlleles(ref, alts, lineNo);
|
final List<Allele> rawAlleles = parseAlleles(ref, alts, lineNo);
|
||||||
|
final List<Allele> alleles = updateBuilderAllelesAndStop(builder, ref, pos, rawAlleles, attrs);
|
||||||
// find out our current location, and clip the alleles down to their minimum length
|
|
||||||
int stop = pos;
|
|
||||||
// ref alleles don't need to be single bases for monomorphic sites
|
|
||||||
if ( alleles.size() == 1 ) {
|
|
||||||
stop = pos + alleles.get(0).length() - 1;
|
|
||||||
}
|
|
||||||
// we need to parse the INFO field to check for an END tag if it's a symbolic allele
|
|
||||||
else if ( alleles.size() == 2 && alleles.get(1).isSymbolic() && attrs.containsKey(VCFConstants.END_KEY) ) {
|
|
||||||
try {
|
|
||||||
stop = Integer.valueOf(attrs.get(VCFConstants.END_KEY).toString());
|
|
||||||
} catch (Exception e) {
|
|
||||||
generateException("the END value in the INFO field is not valid");
|
|
||||||
}
|
|
||||||
} else if ( !isSingleNucleotideEvent(alleles) ) {
|
|
||||||
ArrayList<Allele> newAlleles = new ArrayList<Allele>();
|
|
||||||
stop = clipAlleles(pos, ref, alleles, newAlleles, lineNo);
|
|
||||||
alleles = newAlleles;
|
|
||||||
}
|
|
||||||
builder.stop(stop);
|
|
||||||
builder.alleles(alleles);
|
|
||||||
|
|
||||||
// do we have genotyping data
|
// do we have genotyping data
|
||||||
if (parts.length > NUM_STANDARD_FIELDS) {
|
if (parts.length > NUM_STANDARD_FIELDS && includeGenotypes) {
|
||||||
final LazyGenotypesContext.LazyParser lazyParser = new LazyVCFGenotypesParser(alleles, chr, pos);
|
final LazyGenotypesContext.LazyParser lazyParser = new LazyVCFGenotypesParser(alleles, chr, pos);
|
||||||
final int nGenotypes = header.getNGenotypeSamples();
|
final int nGenotypes = header.getNGenotypeSamples();
|
||||||
LazyGenotypesContext lazy = new LazyGenotypesContext(lazyParser, parts[8], nGenotypes);
|
LazyGenotypesContext lazy = new LazyGenotypesContext(lazyParser, parts[8], nGenotypes);
|
||||||
|
|
@ -374,6 +284,31 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
||||||
return vc;
|
return vc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private final List<Allele> updateBuilderAllelesAndStop(final VariantContextBuilder builder,
|
||||||
|
final String ref,
|
||||||
|
final int pos,
|
||||||
|
final List<Allele> rawAlleles,
|
||||||
|
final Map<String, Object> attrs) {
|
||||||
|
int endForSymbolicAlleles = pos; // by default we use the pos
|
||||||
|
if ( attrs.containsKey(VCFConstants.END_KEY) ) {
|
||||||
|
// update stop with the end key if provided
|
||||||
|
try {
|
||||||
|
endForSymbolicAlleles = Integer.valueOf(attrs.get(VCFConstants.END_KEY).toString());
|
||||||
|
} catch (Exception e) {
|
||||||
|
generateException("the END value in the INFO field is not valid");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// find out our current location, and clip the alleles down to their minimum length
|
||||||
|
final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(pos, ref, rawAlleles, endForSymbolicAlleles);
|
||||||
|
if ( clipped.getError() != null )
|
||||||
|
generateException(clipped.getError(), lineNo);
|
||||||
|
|
||||||
|
builder.stop(clipped.getStop());
|
||||||
|
builder.alleles(clipped.getClippedAlleles());
|
||||||
|
return clipped.getClippedAlleles();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get the name of this codec
|
* get the name of this codec
|
||||||
* @return our set name
|
* @return our set name
|
||||||
|
|
@ -613,102 +548,6 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
||||||
alleles.add(allele);
|
alleles.add(allele);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isSingleNucleotideEvent(List<Allele> alleles) {
|
|
||||||
for ( Allele a : alleles ) {
|
|
||||||
if ( a.length() != 1 )
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static int computeForwardClipping(final List<Allele> unclippedAlleles, final byte ref0) {
|
|
||||||
boolean clipping = true;
|
|
||||||
int symbolicAlleleCount = 0;
|
|
||||||
|
|
||||||
for ( Allele a : unclippedAlleles ) {
|
|
||||||
if ( a.isSymbolic() ) {
|
|
||||||
symbolicAlleleCount++;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( a.length() < 1 || (a.getBases()[0] != ref0) ) {
|
|
||||||
clipping = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// don't clip if all alleles are symbolic
|
|
||||||
return (clipping && symbolicAlleleCount != unclippedAlleles.size()) ? 1 : 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static int computeReverseClipping(final List<Allele> unclippedAlleles, final byte[] ref, final int forwardClipping, final boolean allowFullClip, final int lineNo) {
|
|
||||||
int clipping = 0;
|
|
||||||
boolean stillClipping = true;
|
|
||||||
|
|
||||||
while ( stillClipping ) {
|
|
||||||
for ( final Allele a : unclippedAlleles ) {
|
|
||||||
if ( a.isSymbolic() )
|
|
||||||
continue;
|
|
||||||
|
|
||||||
// we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong
|
|
||||||
// position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine).
|
|
||||||
if ( a.length() - clipping == 0 )
|
|
||||||
return clipping - (allowFullClip ? 0 : 1);
|
|
||||||
|
|
||||||
if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) {
|
|
||||||
stillClipping = false;
|
|
||||||
}
|
|
||||||
else if ( ref.length == clipping ) {
|
|
||||||
if ( allowFullClip )
|
|
||||||
stillClipping = false;
|
|
||||||
else
|
|
||||||
generateException("bad alleles encountered", lineNo);
|
|
||||||
}
|
|
||||||
else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) {
|
|
||||||
stillClipping = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if ( stillClipping )
|
|
||||||
clipping++;
|
|
||||||
}
|
|
||||||
|
|
||||||
return clipping;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* clip the alleles, based on the reference
|
|
||||||
*
|
|
||||||
* @param position the unadjusted start position (pre-clipping)
|
|
||||||
* @param ref the reference string
|
|
||||||
* @param unclippedAlleles the list of unclipped alleles
|
|
||||||
* @param clippedAlleles output list of clipped alleles
|
|
||||||
* @param lineNo the current line number in the file
|
|
||||||
* @return the new reference end position of this event
|
|
||||||
*/
|
|
||||||
public static int clipAlleles(int position, String ref, List<Allele> unclippedAlleles, List<Allele> clippedAlleles, int lineNo) {
|
|
||||||
|
|
||||||
int forwardClipping = computeForwardClipping(unclippedAlleles, (byte)ref.charAt(0));
|
|
||||||
int reverseClipping = computeReverseClipping(unclippedAlleles, ref.getBytes(), forwardClipping, false, lineNo);
|
|
||||||
|
|
||||||
if ( clippedAlleles != null ) {
|
|
||||||
for ( Allele a : unclippedAlleles ) {
|
|
||||||
if ( a.isSymbolic() ) {
|
|
||||||
clippedAlleles.add(a);
|
|
||||||
} else {
|
|
||||||
final byte[] allele = Arrays.copyOfRange(a.getBases(), forwardClipping, a.getBases().length-reverseClipping);
|
|
||||||
if ( !Allele.acceptableAlleleBases(allele) )
|
|
||||||
generateException("Unparsable vcf record with bad allele [" + allele + "]", lineNo);
|
|
||||||
clippedAlleles.add(Allele.create(allele, a.isReference()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// the new reference length
|
|
||||||
int refLength = ref.length() - reverseClipping;
|
|
||||||
|
|
||||||
return position+Math.max(refLength - 1,0);
|
|
||||||
}
|
|
||||||
|
|
||||||
public final static boolean canDecodeFile(final String potentialInput, final String MAGIC_HEADER_LINE) {
|
public final static boolean canDecodeFile(final String potentialInput, final String MAGIC_HEADER_LINE) {
|
||||||
try {
|
try {
|
||||||
return isVCFStream(new FileInputStream(potentialInput), MAGIC_HEADER_LINE) ||
|
return isVCFStream(new FileInputStream(potentialInput), MAGIC_HEADER_LINE) ||
|
||||||
|
|
@ -857,4 +696,13 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
||||||
public final void disableOnTheFlyModifications() {
|
public final void disableOnTheFlyModifications() {
|
||||||
doOnTheFlyModifications = false;
|
doOnTheFlyModifications = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
protected void generateException(String message) {
|
||||||
|
throw new UserException.MalformedVCF(message, lineNo);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static void generateException(String message, int lineNo) {
|
||||||
|
throw new UserException.MalformedVCF(message, lineNo);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,434 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||||
|
|
||||||
|
import com.google.java.contract.Ensures;
|
||||||
|
import com.google.java.contract.Invariant;
|
||||||
|
import com.google.java.contract.Requires;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* All of the gross allele clipping and padding routines in one place
|
||||||
|
*
|
||||||
|
* Having attempted to understand / fix / document this code myself
|
||||||
|
* I can only conclude that this entire approach needs to be rethought. This
|
||||||
|
* code just doesn't work robustly with symbolic alleles, with multiple alleles,
|
||||||
|
* requires a special "reference base for indels" stored in the VariantContext
|
||||||
|
* whose correctness isn't enforced, and overall has strange special cases
|
||||||
|
* all over the place.
|
||||||
|
*
|
||||||
|
* The reason this code is so complex is due to symbolics and multi-alleleic
|
||||||
|
* variation, which frequently occur when combining variants from multiple
|
||||||
|
* VCF files.
|
||||||
|
*
|
||||||
|
* TODO rethink this class, make it clean, and make it easy to create, mix, and write out alleles
|
||||||
|
* TODO this code doesn't work with reverse clipped alleles (ATA / GTTA -> AT / GT)
|
||||||
|
*
|
||||||
|
* @author Mark DePristo
|
||||||
|
* @since 6/12
|
||||||
|
*/
|
||||||
|
public final class VCFAlleleClipper {
|
||||||
|
private VCFAlleleClipper() { }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determine whether we should clip off the first base of all unclippped alleles or not
|
||||||
|
*
|
||||||
|
* Returns true if all of the alleles in unclippedAlleles share a common first base with
|
||||||
|
* ref0. Ref0 should be the first base of the reference allele UnclippedAlleles may
|
||||||
|
* contain the reference allele itself, or just the alternate alleles, it doesn't matter.
|
||||||
|
*
|
||||||
|
* The algorithm returns true if the first base should be clipped off, or false otherwise
|
||||||
|
*
|
||||||
|
* This algorithm works even in the presence of symbolic alleles, logically ignoring these
|
||||||
|
* values. It
|
||||||
|
*
|
||||||
|
* @param unclippedAlleles list of unclipped alleles to assay
|
||||||
|
* @param ref0 the first base of the reference allele
|
||||||
|
* @return true if we should clip the first base of unclippedAlleles
|
||||||
|
*/
|
||||||
|
@Requires("unclippedAlleles != null")
|
||||||
|
public static boolean shouldClipFirstBaseP(final List<Allele> unclippedAlleles,
|
||||||
|
final byte ref0) {
|
||||||
|
boolean allSymbolicAlt = true;
|
||||||
|
|
||||||
|
for ( final Allele a : unclippedAlleles ) {
|
||||||
|
if ( a.isSymbolic() ) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// already know we aren't symbolic, so we only need to decide if we have only seen a ref
|
||||||
|
if ( ! a.isReference() )
|
||||||
|
allSymbolicAlt = false;
|
||||||
|
|
||||||
|
if ( a.length() < 1 || (a.getBases()[0] != ref0) ) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// to reach here all alleles are consistent with clipping the first base matching ref0
|
||||||
|
// but we don't clip if all ALT alleles are symbolic
|
||||||
|
return ! allSymbolicAlt;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static int computeReverseClipping(final List<Allele> unclippedAlleles,
|
||||||
|
final byte[] ref,
|
||||||
|
final int forwardClipping,
|
||||||
|
final boolean allowFullClip) {
|
||||||
|
int clipping = 0;
|
||||||
|
boolean stillClipping = true;
|
||||||
|
|
||||||
|
while ( stillClipping ) {
|
||||||
|
for ( final Allele a : unclippedAlleles ) {
|
||||||
|
if ( a.isSymbolic() )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong
|
||||||
|
// position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine).
|
||||||
|
if ( a.length() - clipping == 0 )
|
||||||
|
return clipping - (allowFullClip ? 0 : 1);
|
||||||
|
|
||||||
|
if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 ) {
|
||||||
|
stillClipping = false;
|
||||||
|
}
|
||||||
|
else if ( ref.length == clipping ) {
|
||||||
|
if ( allowFullClip )
|
||||||
|
stillClipping = false;
|
||||||
|
else
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
else if ( a.getBases()[a.length()-clipping-1] != ref[ref.length-clipping-1] ) {
|
||||||
|
stillClipping = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ( stillClipping )
|
||||||
|
clipping++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return clipping;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Are the alleles describing a polymorphism substitution one base for another?
|
||||||
|
*
|
||||||
|
* @param alleles a list of alleles, must not be empty
|
||||||
|
* @return Return true if the length of any allele in alleles isn't 1
|
||||||
|
*/
|
||||||
|
@Requires("!alleles.isEmpty()")
|
||||||
|
private static boolean isSingleNucleotideEvent(final List<Allele> alleles) {
|
||||||
|
for ( final Allele a : alleles ) {
|
||||||
|
if ( a.length() != 1 )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* clip the alleles, based on the reference, returning a ClippedAlleles object describing what happened
|
||||||
|
*
|
||||||
|
* The ClippedAlleles object contains the implied stop position of the alleles, given the provided start
|
||||||
|
* position, after clipping. It also contains the list of alleles, in the same order as the provided
|
||||||
|
* unclipped ones, that are the fully clipped version of the input alleles. If an error occurs
|
||||||
|
* during this option the getError() function returns a string describing the problem (for use in parsers).
|
||||||
|
*
|
||||||
|
* The basic operation are:
|
||||||
|
*
|
||||||
|
* single allele
|
||||||
|
* => stop == start and clipped == unclipped
|
||||||
|
* any number of single nucleotide events
|
||||||
|
* => stop == start and clipped == unclipped
|
||||||
|
* two alleles, second being symbolic
|
||||||
|
* => stop == start and clipped == unclipped
|
||||||
|
* Note in this case that the STOP should be computed by other means (from END in VCF, for example)
|
||||||
|
* Note that if there's more than two alleles and the second is a symbolic the code produces an error
|
||||||
|
* Any other case:
|
||||||
|
* The alleles are trimmed of any sequence shared at the end of the alleles. If N bases
|
||||||
|
* are common then the alleles will all be at least N bases shorter.
|
||||||
|
* The stop position returned is the start position + the length of the
|
||||||
|
* reverse trimmed only reference allele - 1.
|
||||||
|
* If the alleles all share a single common starting sequence (just one base is considered)
|
||||||
|
* then the alleles have this leading common base removed as well.
|
||||||
|
*
|
||||||
|
* TODO This code is gross and brittle and needs to be rethought from scratch
|
||||||
|
*
|
||||||
|
* @param start the unadjusted start position (pre-clipping)
|
||||||
|
* @param ref the reference string
|
||||||
|
* @param unclippedAlleles the list of unclipped alleles, including the reference allele
|
||||||
|
* @return the new reference end position of this event
|
||||||
|
*/
|
||||||
|
@Requires({"start > 0", "ref != null && ref.length() > 0", "!unclippedAlleles.isEmpty()"})
|
||||||
|
@Ensures("result != null")
|
||||||
|
public static ClippedAlleles clipAlleles(final int start,
|
||||||
|
final String ref,
|
||||||
|
final List<Allele> unclippedAlleles,
|
||||||
|
final int endForSymbolicAllele ) {
|
||||||
|
// no variation or single nucleotide events are by definition fully clipped
|
||||||
|
if ( unclippedAlleles.size() == 1 || isSingleNucleotideEvent(unclippedAlleles) )
|
||||||
|
return new ClippedAlleles(start, unclippedAlleles, null);
|
||||||
|
|
||||||
|
// we've got to sort out the clipping by looking at the alleles themselves
|
||||||
|
final byte firstRefBase = (byte) ref.charAt(0);
|
||||||
|
final boolean firstBaseIsClipped = shouldClipFirstBaseP(unclippedAlleles, firstRefBase);
|
||||||
|
final int forwardClipping = firstBaseIsClipped ? 1 : 0;
|
||||||
|
final int reverseClipping = computeReverseClipping(unclippedAlleles, ref.getBytes(), forwardClipping, false);
|
||||||
|
final boolean needsClipping = forwardClipping > 0 || reverseClipping > 0;
|
||||||
|
|
||||||
|
if ( reverseClipping == -1 )
|
||||||
|
return new ClippedAlleles("computeReverseClipping failed due to bad alleles");
|
||||||
|
|
||||||
|
boolean sawSymbolic = false;
|
||||||
|
List<Allele> clippedAlleles;
|
||||||
|
if ( ! needsClipping ) {
|
||||||
|
// there's nothing to clip, so clippedAlleles are the original alleles
|
||||||
|
clippedAlleles = unclippedAlleles;
|
||||||
|
} else {
|
||||||
|
clippedAlleles = new ArrayList<Allele>(unclippedAlleles.size());
|
||||||
|
for ( final Allele a : unclippedAlleles ) {
|
||||||
|
if ( a.isSymbolic() ) {
|
||||||
|
sawSymbolic = true;
|
||||||
|
clippedAlleles.add(a);
|
||||||
|
} else {
|
||||||
|
final byte[] allele = Arrays.copyOfRange(a.getBases(), forwardClipping, a.getBases().length - reverseClipping);
|
||||||
|
if ( !Allele.acceptableAlleleBases(allele) )
|
||||||
|
return new ClippedAlleles("Unparsable vcf record with bad allele [" + allele + "]");
|
||||||
|
clippedAlleles.add(Allele.create(allele, a.isReference()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int stop = VariantContextUtils.computeEndFromAlleles(clippedAlleles, start, endForSymbolicAllele);
|
||||||
|
|
||||||
|
// TODO
|
||||||
|
// TODO
|
||||||
|
// TODO COMPLETELY BROKEN CODE -- THE GATK CURRENTLY ENCODES THE STOP POSITION FOR CLIPPED ALLELES AS + 1
|
||||||
|
// TODO ITS TRUE SIZE TO DIFFERENTIATE CLIPPED VS. UNCLIPPED ALLELES. NEEDS TO BE FIXED
|
||||||
|
// TODO
|
||||||
|
// TODO
|
||||||
|
if ( needsClipping && ! sawSymbolic && ! clippedAlleles.get(0).isNull() ) stop++;
|
||||||
|
// TODO
|
||||||
|
// TODO
|
||||||
|
// TODO COMPLETELY BROKEN CODE -- THE GATK CURRENTLY ENCODES THE STOP POSITION FOR CLIPPED ALLELES AS + 1
|
||||||
|
// TODO ITS TRUE SIZE TO DIFFERENTIATE CLIPPED VS. UNCLIPPED ALLELES. NEEDS TO BE FIXED
|
||||||
|
// TODO
|
||||||
|
// TODO
|
||||||
|
|
||||||
|
final Byte refBaseForIndel = firstBaseIsClipped ? firstRefBase : null;
|
||||||
|
return new ClippedAlleles(stop, clippedAlleles, refBaseForIndel);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if the alleles in inputVC should have reference bases added for padding
|
||||||
|
*
|
||||||
|
* We need to pad a VC with a common base if the length of the reference allele is
|
||||||
|
* less than the length of the VariantContext. This happens because the position of
|
||||||
|
* e.g. an indel is always one before the actual event (as per VCF convention).
|
||||||
|
*
|
||||||
|
* @param inputVC the VC to evaluate, cannot be null
|
||||||
|
* @return true if
|
||||||
|
*/
|
||||||
|
public static boolean needsPadding(final VariantContext inputVC) {
|
||||||
|
// biallelic sites with only symbolic never need padding
|
||||||
|
if ( inputVC.isBiallelic() && inputVC.getAlternateAllele(0).isSymbolic() )
|
||||||
|
return false;
|
||||||
|
|
||||||
|
final int recordLength = inputVC.getEnd() - inputVC.getStart() + 1;
|
||||||
|
final int referenceLength = inputVC.getReference().length();
|
||||||
|
|
||||||
|
if ( referenceLength == recordLength )
|
||||||
|
return false;
|
||||||
|
else if ( referenceLength == recordLength - 1 )
|
||||||
|
return true;
|
||||||
|
else if ( !inputVC.hasSymbolicAlleles() )
|
||||||
|
throw new IllegalArgumentException("Badly formed variant context at location " + String.valueOf(inputVC.getStart()) +
|
||||||
|
" in contig " + inputVC.getChr() + ". Reference length must be at most one base shorter than location size");
|
||||||
|
else if ( inputVC.isMixed() && inputVC.hasSymbolicAlleles() )
|
||||||
|
throw new IllegalArgumentException("GATK infrastructure limitation prevents needsPadding from working properly with VariantContexts containing a mixture of symbolic and concrete alleles at " + inputVC);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Allele padAllele(final VariantContext vc, final Allele allele) {
|
||||||
|
assert needsPadding(vc);
|
||||||
|
|
||||||
|
if ( allele.isSymbolic() )
|
||||||
|
return allele;
|
||||||
|
else {
|
||||||
|
// get bases for current allele and create a new one with trimmed bases
|
||||||
|
final StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append((char)vc.getReferenceBaseForIndel().byteValue());
|
||||||
|
sb.append(allele.getDisplayString());
|
||||||
|
final String newBases = sb.toString();
|
||||||
|
return Allele.create(newBases, allele.isReference());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC) {
|
||||||
|
final boolean padVC = needsPadding(inputVC);
|
||||||
|
|
||||||
|
// nothing to do if we don't need to pad bases
|
||||||
|
if ( padVC ) {
|
||||||
|
if ( !inputVC.hasReferenceBaseForIndel() )
|
||||||
|
throw new ReviewedStingException("Badly formed variant context at location " + inputVC.getChr() + ":" + inputVC.getStart() + "; no padded reference base is available.");
|
||||||
|
|
||||||
|
final ArrayList<Allele> alleles = new ArrayList<Allele>(inputVC.getNAlleles());
|
||||||
|
final Map<Allele, Allele> unpaddedToPadded = inputVC.hasGenotypes() ? new HashMap<Allele, Allele>(inputVC.getNAlleles()) : null;
|
||||||
|
|
||||||
|
boolean paddedAtLeastOne = false;
|
||||||
|
for (final Allele a : inputVC.getAlleles()) {
|
||||||
|
final Allele padded = padAllele(inputVC, a);
|
||||||
|
paddedAtLeastOne = paddedAtLeastOne || padded != a;
|
||||||
|
alleles.add(padded);
|
||||||
|
if ( unpaddedToPadded != null ) unpaddedToPadded.put(a, padded); // conditional to avoid making unnecessary make
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( ! paddedAtLeastOne )
|
||||||
|
throw new ReviewedStingException("VC was supposed to need padding but no allele was actually changed at location " + inputVC.getChr() + ":" + inputVC.getStart() + " with allele " + inputVC.getAlleles());
|
||||||
|
|
||||||
|
final VariantContextBuilder vcb = new VariantContextBuilder(inputVC);
|
||||||
|
vcb.alleles(alleles);
|
||||||
|
|
||||||
|
// the position of the inputVC is one further, if it doesn't contain symbolic alleles
|
||||||
|
vcb.computeEndFromAlleles(alleles, inputVC.getStart(), inputVC.getEnd());
|
||||||
|
|
||||||
|
if ( inputVC.hasGenotypes() ) {
|
||||||
|
assert unpaddedToPadded != null;
|
||||||
|
|
||||||
|
// now we can recreate new genotypes with trimmed alleles
|
||||||
|
final GenotypesContext genotypes = GenotypesContext.create(inputVC.getNSamples());
|
||||||
|
for (final Genotype g : inputVC.getGenotypes() ) {
|
||||||
|
final List<Allele> newGenotypeAlleles = new ArrayList<Allele>(g.getAlleles().size());
|
||||||
|
for (final Allele a : g.getAlleles()) {
|
||||||
|
newGenotypeAlleles.add( a.isCalled() ? unpaddedToPadded.get(a) : Allele.NO_CALL);
|
||||||
|
}
|
||||||
|
genotypes.add(new GenotypeBuilder(g).alleles(newGenotypeAlleles).make());
|
||||||
|
}
|
||||||
|
vcb.genotypes(genotypes);
|
||||||
|
}
|
||||||
|
|
||||||
|
return vcb.make();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return inputVC;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) {
|
||||||
|
// see if we need to trim common reference base from all alleles
|
||||||
|
|
||||||
|
final int trimExtent = computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, true);
|
||||||
|
if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 )
|
||||||
|
return inputVC;
|
||||||
|
|
||||||
|
final List<Allele> alleles = new ArrayList<Allele>();
|
||||||
|
final GenotypesContext genotypes = GenotypesContext.create();
|
||||||
|
final Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
|
||||||
|
|
||||||
|
for (final Allele a : inputVC.getAlleles()) {
|
||||||
|
if (a.isSymbolic()) {
|
||||||
|
alleles.add(a);
|
||||||
|
originalToTrimmedAlleleMap.put(a, a);
|
||||||
|
} else {
|
||||||
|
// get bases for current allele and create a new one with trimmed bases
|
||||||
|
final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent);
|
||||||
|
final Allele trimmedAllele = Allele.create(newBases, a.isReference());
|
||||||
|
alleles.add(trimmedAllele);
|
||||||
|
originalToTrimmedAlleleMap.put(a, trimmedAllele);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// now we can recreate new genotypes with trimmed alleles
|
||||||
|
for ( final Genotype genotype : inputVC.getGenotypes() ) {
|
||||||
|
final List<Allele> originalAlleles = genotype.getAlleles();
|
||||||
|
final List<Allele> trimmedAlleles = new ArrayList<Allele>();
|
||||||
|
for ( final Allele a : originalAlleles ) {
|
||||||
|
if ( a.isCalled() )
|
||||||
|
trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
|
||||||
|
else
|
||||||
|
trimmedAlleles.add(Allele.NO_CALL);
|
||||||
|
}
|
||||||
|
genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
|
||||||
|
}
|
||||||
|
|
||||||
|
return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() + (inputVC.isMixed() ? -1 : 0)).alleles(alleles).genotypes(genotypes).make();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Invariant("stop != -1 || error != null") // we're either an error or a meaningful result but not both
|
||||||
|
public static class ClippedAlleles {
|
||||||
|
private final int stop;
|
||||||
|
private final List<Allele> clippedAlleles;
|
||||||
|
private final Byte refBaseForIndel;
|
||||||
|
private final String error;
|
||||||
|
|
||||||
|
@Requires({"stop > 0", "clippedAlleles != null"})
|
||||||
|
private ClippedAlleles(final int stop, final List<Allele> clippedAlleles, final Byte refBaseForIndel) {
|
||||||
|
this.stop = stop;
|
||||||
|
this.clippedAlleles = clippedAlleles;
|
||||||
|
this.error = null;
|
||||||
|
this.refBaseForIndel = refBaseForIndel;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Requires("error != null")
|
||||||
|
private ClippedAlleles(final String error) {
|
||||||
|
this.stop = -1;
|
||||||
|
this.clippedAlleles = null;
|
||||||
|
this.refBaseForIndel = null;
|
||||||
|
this.error = error;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get an error if it occurred
|
||||||
|
* @return the error message, or null if no error occurred
|
||||||
|
*/
|
||||||
|
public String getError() {
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the stop position to use after the clipping as been applied, given the
|
||||||
|
* provided position to clipAlleles
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public int getStop() {
|
||||||
|
return stop;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the clipped alleles themselves
|
||||||
|
* @return the clipped alleles in the order of the input unclipped alleles
|
||||||
|
*/
|
||||||
|
public List<Allele> getClippedAlleles() {
|
||||||
|
return clippedAlleles;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the reference base we should use for indels, or null if none is appropriate
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public Byte getRefBaseForIndel() {
|
||||||
|
return refBaseForIndel;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -183,7 +183,7 @@ public class VCFStandardHeaderLines {
|
||||||
registerStandard(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth (reads with MQ=255 or with bad mates are filtered)"));
|
registerStandard(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth (reads with MQ=255 or with bad mates are filtered)"));
|
||||||
registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_PL_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
|
registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_PL_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
|
||||||
registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed"));
|
registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed"));
|
||||||
registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_FILTER_KEY, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Genotype-level filter"));
|
registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_FILTER_KEY, 1, VCFHeaderLineType.String, "Genotype-level filter"));
|
||||||
|
|
||||||
// INFO lines
|
// INFO lines
|
||||||
registerStandard(new VCFInfoHeaderLine(VCFConstants.END_KEY, 1, VCFHeaderLineType.Integer, "Stop position of the interval"));
|
registerStandard(new VCFInfoHeaderLine(VCFConstants.END_KEY, 1, VCFHeaderLineType.Integer, "Stop position of the interval"));
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,8 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.codecs.vcf;
|
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||||
|
|
||||||
|
import com.google.java.contract.Ensures;
|
||||||
|
import com.google.java.contract.Requires;
|
||||||
import net.sf.samtools.SAMSequenceDictionary;
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
@ -32,6 +34,7 @@ import org.broad.tribble.Feature;
|
||||||
import org.broadinstitute.sting.commandline.RodBinding;
|
import org.broadinstitute.sting.commandline.RodBinding;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
|
||||||
|
|
@ -111,8 +111,9 @@ public final class FastGenotype extends Genotype {
|
||||||
final int DP,
|
final int DP,
|
||||||
final int[] AD,
|
final int[] AD,
|
||||||
final int[] PL,
|
final int[] PL,
|
||||||
|
final String filters,
|
||||||
final Map<String, Object> extendedAttributes) {
|
final Map<String, Object> extendedAttributes) {
|
||||||
super(sampleName);
|
super(sampleName, filters);
|
||||||
this.alleles = alleles;
|
this.alleles = alleles;
|
||||||
this.isPhased = isPhased;
|
this.isPhased = isPhased;
|
||||||
this.GQ = GQ;
|
this.GQ = GQ;
|
||||||
|
|
@ -152,10 +153,6 @@ public final class FastGenotype extends Genotype {
|
||||||
return GQ;
|
return GQ;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override public List<String> getFilters() {
|
|
||||||
return (List<String>) getExtendedAttribute(VCFConstants.GENOTYPE_FILTER_KEY, Collections.emptyList());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override public int[] getPL() {
|
@Override public int[] getPL() {
|
||||||
return PL;
|
return PL;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ public abstract class Genotype implements Comparable<Genotype> {
|
||||||
* extended attributes map
|
* extended attributes map
|
||||||
*/
|
*/
|
||||||
public final static Collection<String> PRIMARY_KEYS = Arrays.asList(
|
public final static Collection<String> PRIMARY_KEYS = Arrays.asList(
|
||||||
|
VCFConstants.GENOTYPE_FILTER_KEY,
|
||||||
VCFConstants.GENOTYPE_KEY,
|
VCFConstants.GENOTYPE_KEY,
|
||||||
VCFConstants.GENOTYPE_QUALITY_KEY,
|
VCFConstants.GENOTYPE_QUALITY_KEY,
|
||||||
VCFConstants.DEPTH_KEY,
|
VCFConstants.DEPTH_KEY,
|
||||||
|
|
@ -38,14 +39,11 @@ public abstract class Genotype implements Comparable<Genotype> {
|
||||||
|
|
||||||
private final String sampleName;
|
private final String sampleName;
|
||||||
private GenotypeType type = null;
|
private GenotypeType type = null;
|
||||||
|
private final String filters;
|
||||||
|
|
||||||
protected Genotype(final String sampleName) {
|
protected Genotype(final String sampleName, final String filters) {
|
||||||
this.sampleName = sampleName;
|
this.sampleName = sampleName;
|
||||||
}
|
this.filters = filters;
|
||||||
|
|
||||||
protected Genotype(final String sampleName, final GenotypeType type) {
|
|
||||||
this.sampleName = sampleName;
|
|
||||||
this.type = type;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -348,13 +346,14 @@ public abstract class Genotype implements Comparable<Genotype> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return String.format("[%s %s%s%s%s%s%s]",
|
return String.format("[%s %s%s%s%s%s%s%s]",
|
||||||
getSampleName(),
|
getSampleName(),
|
||||||
getGenotypeString(false),
|
getGenotypeString(false),
|
||||||
toStringIfExists(VCFConstants.GENOTYPE_QUALITY_KEY, getGQ()),
|
toStringIfExists(VCFConstants.GENOTYPE_QUALITY_KEY, getGQ()),
|
||||||
toStringIfExists(VCFConstants.DEPTH_KEY, getDP()),
|
toStringIfExists(VCFConstants.DEPTH_KEY, getDP()),
|
||||||
toStringIfExists(VCFConstants.GENOTYPE_ALLELE_DEPTHS, getAD()),
|
toStringIfExists(VCFConstants.GENOTYPE_ALLELE_DEPTHS, getAD()),
|
||||||
toStringIfExists(VCFConstants.GENOTYPE_PL_KEY, getPL()),
|
toStringIfExists(VCFConstants.GENOTYPE_PL_KEY, getPL()),
|
||||||
|
toStringIfExists(VCFConstants.GENOTYPE_FILTER_KEY, getFilters()),
|
||||||
sortedString(getExtendedAttributes()));
|
sortedString(getExtendedAttributes()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -448,15 +447,25 @@ public abstract class Genotype implements Comparable<Genotype> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* Returns the filter string associated with this Genotype.
|
||||||
*
|
*
|
||||||
* @return
|
* @return If this result == null, then the genotype is considered PASSing filters
|
||||||
|
* If the result != null, then the genotype has failed filtering for the reason(s)
|
||||||
|
* specified in result. To be reference compliant multiple filter field
|
||||||
|
* string values can be encoded with a ; separator.
|
||||||
*/
|
*/
|
||||||
@Ensures({"result != null"})
|
public final String getFilters() {
|
||||||
public abstract List<String> getFilters();
|
return filters;
|
||||||
|
}
|
||||||
|
|
||||||
@Ensures({"result != getFilters().isEmpty()"})
|
/**
|
||||||
public boolean isFiltered() {
|
* Is this genotype filtered or not?
|
||||||
return ! getFilters().isEmpty();
|
*
|
||||||
|
* @return returns false if getFilters() == null
|
||||||
|
*/
|
||||||
|
@Ensures({"result != (getFilters() == null)"})
|
||||||
|
public final boolean isFiltered() {
|
||||||
|
return getFilters() != null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Deprecated public boolean hasLog10PError() { return hasGQ(); }
|
@Deprecated public boolean hasLog10PError() { return hasGQ(); }
|
||||||
|
|
@ -569,6 +578,16 @@ public abstract class Genotype implements Comparable<Genotype> {
|
||||||
return v == -1 ? "" : " " + name + " " + v;
|
return v == -1 ? "" : " " + name + " " + v;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a display name for field name with String value v if this isn't null. Otherwise returns ""
|
||||||
|
* @param name of the field ("FT")
|
||||||
|
* @param v the value of the field, or null if missing
|
||||||
|
* @return a non-null string for display if the field is not missing
|
||||||
|
*/
|
||||||
|
protected final static String toStringIfExists(final String name, final String v) {
|
||||||
|
return v == null ? "" : " " + name + " " + v;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a display name for field name with values vs if this isn't null. Otherwise returns ""
|
* Returns a display name for field name with values vs if this isn't null. Otherwise returns ""
|
||||||
* @param name of the field ("AD")
|
* @param name of the field ("AD")
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,7 @@ package org.broadinstitute.sting.utils.variantcontext;
|
||||||
import com.google.java.contract.Ensures;
|
import com.google.java.contract.Ensures;
|
||||||
import com.google.java.contract.Invariant;
|
import com.google.java.contract.Invariant;
|
||||||
import com.google.java.contract.Requires;
|
import com.google.java.contract.Requires;
|
||||||
|
import org.broad.tribble.util.ParsingUtils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
@ -63,6 +64,7 @@ public final class GenotypeBuilder {
|
||||||
private int[] AD = null;
|
private int[] AD = null;
|
||||||
private int[] PL = null;
|
private int[] PL = null;
|
||||||
private Map<String, Object> extendedAttributes = null;
|
private Map<String, Object> extendedAttributes = null;
|
||||||
|
private String filters = null;
|
||||||
private int initialAttributeMapSize = 5;
|
private int initialAttributeMapSize = 5;
|
||||||
|
|
||||||
private boolean useFast = MAKE_FAST_BY_DEFAULT;
|
private boolean useFast = MAKE_FAST_BY_DEFAULT;
|
||||||
|
|
@ -147,6 +149,7 @@ public final class GenotypeBuilder {
|
||||||
DP(g.getDP());
|
DP(g.getDP());
|
||||||
AD(g.getAD());
|
AD(g.getAD());
|
||||||
PL(g.getPL());
|
PL(g.getPL());
|
||||||
|
filter(g.getFilters());
|
||||||
attributes(g.getExtendedAttributes());
|
attributes(g.getExtendedAttributes());
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
@ -164,6 +167,7 @@ public final class GenotypeBuilder {
|
||||||
DP = -1;
|
DP = -1;
|
||||||
AD = null;
|
AD = null;
|
||||||
PL = null;
|
PL = null;
|
||||||
|
filters = null;
|
||||||
extendedAttributes = null;
|
extendedAttributes = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -180,21 +184,11 @@ public final class GenotypeBuilder {
|
||||||
public Genotype make() {
|
public Genotype make() {
|
||||||
if ( useFast ) {
|
if ( useFast ) {
|
||||||
final Map<String, Object> ea = extendedAttributes == null ? NO_ATTRIBUTES : extendedAttributes;
|
final Map<String, Object> ea = extendedAttributes == null ? NO_ATTRIBUTES : extendedAttributes;
|
||||||
return new FastGenotype(sampleName, alleles, isPhased, GQ, DP, AD, PL, ea);
|
return new FastGenotype(sampleName, alleles, isPhased, GQ, DP, AD, PL, filters, ea);
|
||||||
} else {
|
} else {
|
||||||
final Map<String, Object> attributes = new LinkedHashMap<String, Object>();
|
final Map<String, Object> attributes = new LinkedHashMap<String, Object>();
|
||||||
if ( extendedAttributes != null ) attributes.putAll(extendedAttributes);
|
if ( extendedAttributes != null ) attributes.putAll(extendedAttributes);
|
||||||
final double log10PError = GQ == -1 ? SlowGenotype.NO_LOG10_PERROR : (GQ == 0 ? 0 : GQ / -10.0);
|
final double log10PError = GQ == -1 ? SlowGenotype.NO_LOG10_PERROR : (GQ == 0 ? 0 : GQ / -10.0);
|
||||||
|
|
||||||
Set<String> filters = null;
|
|
||||||
if ( extendedAttributes != null && extendedAttributes.containsKey(VCFConstants.GENOTYPE_FILTER_KEY) )
|
|
||||||
{
|
|
||||||
final Object f = extendedAttributes.get(VCFConstants.GENOTYPE_FILTER_KEY);
|
|
||||||
if ( f != null )
|
|
||||||
filters = new LinkedHashSet<String>((List<String>)f);
|
|
||||||
attributes.remove(VCFConstants.GENOTYPE_FILTER_KEY);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( DP != -1 ) attributes.put(VCFConstants.DEPTH_KEY, DP);
|
if ( DP != -1 ) attributes.put(VCFConstants.DEPTH_KEY, DP);
|
||||||
if ( AD != null ) attributes.put(VCFConstants.GENOTYPE_ALLELE_DEPTHS, AD);
|
if ( AD != null ) attributes.put(VCFConstants.GENOTYPE_ALLELE_DEPTHS, AD);
|
||||||
final double[] log10likelihoods = PL != null ? GenotypeLikelihoods.fromPLs(PL).getAsVector() : null;
|
final double[] log10likelihoods = PL != null ? GenotypeLikelihoods.fromPLs(PL).getAsVector() : null;
|
||||||
|
|
@ -383,9 +377,12 @@ public final class GenotypeBuilder {
|
||||||
*/
|
*/
|
||||||
@Requires("filters != null")
|
@Requires("filters != null")
|
||||||
public GenotypeBuilder filters(final List<String> filters) {
|
public GenotypeBuilder filters(final List<String> filters) {
|
||||||
if ( ! filters.isEmpty() )
|
if ( filters.isEmpty() )
|
||||||
attribute(VCFConstants.GENOTYPE_FILTER_KEY, filters);
|
return filter(null);
|
||||||
return this;
|
else if ( filters.size() == 1 )
|
||||||
|
return filter(filters.get(0));
|
||||||
|
else
|
||||||
|
return filter(ParsingUtils.join(";", ParsingUtils.sortList(filters)));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -398,15 +395,24 @@ public final class GenotypeBuilder {
|
||||||
return filters(Arrays.asList(filters));
|
return filters(Arrays.asList(filters));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Most efficient version of setting filters -- just set the filters string to filters
|
||||||
|
*
|
||||||
|
* @param filter if filters == null or filters.equals("PASS") => genotype is PASS
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public GenotypeBuilder filter(final String filter) {
|
||||||
|
this.filters = VCFConstants.PASSES_FILTERS_v4.equals(filter) ? null : filter;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This genotype is unfiltered
|
* This genotype is unfiltered
|
||||||
*
|
*
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public GenotypeBuilder unfiltered() {
|
public GenotypeBuilder unfiltered() {
|
||||||
if ( extendedAttributes != null )
|
return filter(null);
|
||||||
extendedAttributes.remove(VCFConstants.GENOTYPE_FILTER_KEY);
|
|
||||||
return this;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -42,14 +42,20 @@ public class SlowGenotype extends Genotype {
|
||||||
protected List<Allele> alleles = null;
|
protected List<Allele> alleles = null;
|
||||||
protected boolean isPhased = false;
|
protected boolean isPhased = false;
|
||||||
|
|
||||||
protected SlowGenotype(String sampleName, List<Allele> alleles, double log10PError, Set<String> filters, Map<String, Object> attributes, boolean isPhased, double[] log10Likelihoods) {
|
protected SlowGenotype(final String sampleName,
|
||||||
super(sampleName);
|
final List<Allele> alleles,
|
||||||
|
final double log10PError,
|
||||||
|
final String filters,
|
||||||
|
final Map<String, Object> attributes,
|
||||||
|
final boolean isPhased,
|
||||||
|
final double[] log10Likelihoods) {
|
||||||
|
super(sampleName, filters);
|
||||||
|
|
||||||
if ( alleles == null || alleles.isEmpty() )
|
if ( alleles == null || alleles.isEmpty() )
|
||||||
this.alleles = Collections.emptyList();
|
this.alleles = Collections.emptyList();
|
||||||
else
|
else
|
||||||
this.alleles = Collections.unmodifiableList(alleles);
|
this.alleles = Collections.unmodifiableList(alleles);
|
||||||
commonInfo = new CommonInfo(sampleName, log10PError, filters, attributes);
|
commonInfo = new CommonInfo(sampleName, log10PError, Collections.<String>emptySet(), attributes);
|
||||||
if ( log10Likelihoods != null )
|
if ( log10Likelihoods != null )
|
||||||
commonInfo.putAttribute(VCFConstants.GENOTYPE_PL_KEY, GenotypeLikelihoods.fromLog10Likelihoods(log10Likelihoods));
|
commonInfo.putAttribute(VCFConstants.GENOTYPE_PL_KEY, GenotypeLikelihoods.fromLog10Likelihoods(log10Likelihoods));
|
||||||
this.isPhased = isPhased;
|
this.isPhased = isPhased;
|
||||||
|
|
@ -112,7 +118,6 @@ public class SlowGenotype extends Genotype {
|
||||||
// get routines to access context info fields
|
// get routines to access context info fields
|
||||||
//
|
//
|
||||||
// ---------------------------------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
@Override public List<String> getFilters() { return new ArrayList<String>(commonInfo.getFilters()); }
|
|
||||||
@Override public boolean hasLog10PError() { return commonInfo.hasLog10PError(); }
|
@Override public boolean hasLog10PError() { return commonInfo.hasLog10PError(); }
|
||||||
@Override public double getLog10PError() { return commonInfo.getLog10PError(); }
|
@Override public double getLog10PError() { return commonInfo.getLog10PError(); }
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -558,8 +558,8 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getAlleleStringWithRefPadding(final Allele allele) {
|
public String getAlleleStringWithRefPadding(final Allele allele) {
|
||||||
if ( VariantContextUtils.needsPadding(this) )
|
if ( VCFAlleleClipper.needsPadding(this) )
|
||||||
return VariantContextUtils.padAllele(this, allele).getDisplayString();
|
return VCFAlleleClipper.padAllele(this, allele).getDisplayString();
|
||||||
else
|
else
|
||||||
return allele.getDisplayString();
|
return allele.getDisplayString();
|
||||||
}
|
}
|
||||||
|
|
@ -1126,6 +1126,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
// ---------------------------------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
private boolean validate(final EnumSet<Validation> validationToPerform) {
|
private boolean validate(final EnumSet<Validation> validationToPerform) {
|
||||||
|
validateStop();
|
||||||
for (final Validation val : validationToPerform ) {
|
for (final Validation val : validationToPerform ) {
|
||||||
switch (val) {
|
switch (val) {
|
||||||
case ALLELES: validateAlleles(); break;
|
case ALLELES: validateAlleles(); break;
|
||||||
|
|
@ -1138,6 +1139,20 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check that getEnd() == END from the info field, if it's present
|
||||||
|
*/
|
||||||
|
private void validateStop() {
|
||||||
|
if ( hasAttribute(VCFConstants.END_KEY) ) {
|
||||||
|
final int end = getAttributeAsInt(VCFConstants.END_KEY, -1);
|
||||||
|
assert end != -1;
|
||||||
|
if ( end != getEnd() )
|
||||||
|
throw new ReviewedStingException("Badly formed variant context at location " + getChr() + ":"
|
||||||
|
+ getStart() + "; getEnd() was " + getEnd()
|
||||||
|
+ " but this VariantContext contains an END key with value " + end);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void validateReferencePadding() {
|
private void validateReferencePadding() {
|
||||||
if ( hasSymbolicAlleles() ) // symbolic alleles don't need padding...
|
if ( hasSymbolicAlleles() ) // symbolic alleles don't need padding...
|
||||||
return;
|
return;
|
||||||
|
|
@ -1176,7 +1191,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
// if ( getType() == Type.INDEL ) {
|
// if ( getType() == Type.INDEL ) {
|
||||||
// if ( getReference().length() != (getLocation().size()-1) ) {
|
// if ( getReference().length() != (getLocation().size()-1) ) {
|
||||||
long length = (stop - start) + 1;
|
long length = (stop - start) + 1;
|
||||||
if ( ! isSymbolic()
|
if ( ! hasSymbolicAlleles()
|
||||||
&& ((getReference().isNull() && length != 1 )
|
&& ((getReference().isNull() && length != 1 )
|
||||||
|| (getReference().isNonNull() && (length - getReference().length() > 1)))) {
|
|| (getReference().isNonNull() && (length - getReference().length() > 1)))) {
|
||||||
throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this);
|
throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this);
|
||||||
|
|
@ -1477,7 +1492,11 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean hasSymbolicAlleles() {
|
public boolean hasSymbolicAlleles() {
|
||||||
for (final Allele a: getAlleles()) {
|
return hasSymbolicAlleles(getAlleles());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean hasSymbolicAlleles( final List<Allele> alleles ) {
|
||||||
|
for ( final Allele a: alleles ) {
|
||||||
if (a.isSymbolic()) {
|
if (a.isSymbolic()) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -454,6 +454,32 @@ public class VariantContextBuilder {
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @see #computeEndFromAlleles(java.util.List, int, int) with endForSymbolicAlleles == -1
|
||||||
|
*/
|
||||||
|
public VariantContextBuilder computeEndFromAlleles(final List<Allele> alleles, final int start) {
|
||||||
|
return computeEndFromAlleles(alleles, start, -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute the end position for this VariantContext from the alleles themselves
|
||||||
|
*
|
||||||
|
* @see VariantContextUtils.computeEndFromAlleles()
|
||||||
|
*
|
||||||
|
* assigns this builder the stop position computed.
|
||||||
|
*
|
||||||
|
* @param alleles the list of alleles to consider. The reference allele must be the first one
|
||||||
|
* @param start the known start position of this event
|
||||||
|
* @param endForSymbolicAlleles the end position to use if any of the alleles is symbolic. Can be -1
|
||||||
|
* if no is expected but will throw an error if one is found
|
||||||
|
* @return this builder
|
||||||
|
*/
|
||||||
|
@Requires({"! alleles.isEmpty()", "start > 0", "endForSymbolicAlleles == -1 || endForSymbolicAlleles > 0" })
|
||||||
|
public VariantContextBuilder computeEndFromAlleles(final List<Allele> alleles, final int start, final int endForSymbolicAlleles) {
|
||||||
|
stop(VariantContextUtils.computeEndFromAlleles(alleles, start, endForSymbolicAlleles));
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return true if this builder contains fully decoded data
|
* @return true if this builder contains fully decoded data
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -46,7 +46,9 @@ public class VariantContextUtils {
|
||||||
public final static String MERGE_FILTER_IN_ALL = "FilteredInAll";
|
public final static String MERGE_FILTER_IN_ALL = "FilteredInAll";
|
||||||
public final static String MERGE_REF_IN_ALL = "ReferenceInAll";
|
public final static String MERGE_REF_IN_ALL = "ReferenceInAll";
|
||||||
public final static String MERGE_FILTER_PREFIX = "filterIn";
|
public final static String MERGE_FILTER_PREFIX = "filterIn";
|
||||||
|
|
||||||
private static final List<Allele> DIPLOID_NO_CALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
private static final List<Allele> DIPLOID_NO_CALL = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||||
|
private static Set<String> MISSING_KEYS_WARNED_ABOUT = new HashSet<String>();
|
||||||
|
|
||||||
final public static JexlEngine engine = new JexlEngine();
|
final public static JexlEngine engine = new JexlEngine();
|
||||||
public static final int DEFAULT_PLOIDY = 2;
|
public static final int DEFAULT_PLOIDY = 2;
|
||||||
|
|
@ -145,6 +147,10 @@ public class VariantContextUtils {
|
||||||
|
|
||||||
attributes.put(VCFConstants.ALLELE_COUNT_KEY, alleleCounts.size() == 1 ? alleleCounts.get(0) : alleleCounts);
|
attributes.put(VCFConstants.ALLELE_COUNT_KEY, alleleCounts.size() == 1 ? alleleCounts.get(0) : alleleCounts);
|
||||||
attributes.put(VCFConstants.ALLELE_FREQUENCY_KEY, alleleFreqs.size() == 1 ? alleleFreqs.get(0) : alleleFreqs);
|
attributes.put(VCFConstants.ALLELE_FREQUENCY_KEY, alleleFreqs.size() == 1 ? alleleFreqs.get(0) : alleleFreqs);
|
||||||
|
} else {
|
||||||
|
// if there's no alt AC and AF shouldn't be present
|
||||||
|
attributes.remove(VCFConstants.ALLELE_COUNT_KEY);
|
||||||
|
attributes.remove(VCFConstants.ALLELE_FREQUENCY_KEY);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -184,83 +190,6 @@ public class VariantContextUtils {
|
||||||
return g;
|
return g;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns true if the alleles in inputVC should have reference bases added for padding
|
|
||||||
*
|
|
||||||
* We need to pad a VC with a common base if the length of the reference allele is
|
|
||||||
* less than the length of the VariantContext. This happens because the position of
|
|
||||||
* e.g. an indel is always one before the actual event (as per VCF convention).
|
|
||||||
*
|
|
||||||
* @param inputVC the VC to evaluate, cannot be null
|
|
||||||
* @return true if
|
|
||||||
*/
|
|
||||||
public static boolean needsPadding(final VariantContext inputVC) {
|
|
||||||
final int recordLength = inputVC.getEnd() - inputVC.getStart() + 1;
|
|
||||||
final int referenceLength = inputVC.getReference().length();
|
|
||||||
|
|
||||||
if ( referenceLength == recordLength )
|
|
||||||
return false;
|
|
||||||
else if ( referenceLength == recordLength - 1 )
|
|
||||||
return true;
|
|
||||||
else if ( !inputVC.hasSymbolicAlleles() )
|
|
||||||
throw new IllegalArgumentException("Badly formed variant context at location " + String.valueOf(inputVC.getStart()) +
|
|
||||||
" in contig " + inputVC.getChr() + ". Reference length must be at most one base shorter than location size");
|
|
||||||
else
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Allele padAllele(final VariantContext vc, final Allele allele) {
|
|
||||||
assert needsPadding(vc);
|
|
||||||
|
|
||||||
if ( allele.isSymbolic() )
|
|
||||||
return allele;
|
|
||||||
else {
|
|
||||||
// get bases for current allele and create a new one with trimmed bases
|
|
||||||
final StringBuilder sb = new StringBuilder();
|
|
||||||
sb.append((char)vc.getReferenceBaseForIndel().byteValue());
|
|
||||||
sb.append(allele.getDisplayString());
|
|
||||||
final String newBases = sb.toString();
|
|
||||||
return Allele.create(newBases, allele.isReference());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public static VariantContext createVariantContextWithPaddedAlleles(VariantContext inputVC) {
|
|
||||||
final boolean padVC = needsPadding(inputVC);
|
|
||||||
|
|
||||||
// nothing to do if we don't need to pad bases
|
|
||||||
if ( padVC ) {
|
|
||||||
if ( !inputVC.hasReferenceBaseForIndel() )
|
|
||||||
throw new ReviewedStingException("Badly formed variant context at location " + inputVC.getChr() + ":" + inputVC.getStart() + "; no padded reference base is available.");
|
|
||||||
|
|
||||||
final ArrayList<Allele> alleles = new ArrayList<Allele>(inputVC.getNAlleles());
|
|
||||||
final Map<Allele, Allele> unpaddedToPadded = new HashMap<Allele, Allele>(inputVC.getNAlleles());
|
|
||||||
|
|
||||||
for (final Allele a : inputVC.getAlleles()) {
|
|
||||||
final Allele padded = padAllele(inputVC, a);
|
|
||||||
alleles.add(padded);
|
|
||||||
unpaddedToPadded.put(a, padded);
|
|
||||||
}
|
|
||||||
|
|
||||||
// now we can recreate new genotypes with trimmed alleles
|
|
||||||
GenotypesContext genotypes = GenotypesContext.create(inputVC.getNSamples());
|
|
||||||
for (final Genotype g : inputVC.getGenotypes() ) {
|
|
||||||
final List<Allele> newGenotypeAlleles = new ArrayList<Allele>(g.getAlleles().size());
|
|
||||||
for (final Allele a : g.getAlleles()) {
|
|
||||||
newGenotypeAlleles.add( a.isCalled() ? unpaddedToPadded.get(a) : Allele.NO_CALL);
|
|
||||||
}
|
|
||||||
genotypes.add(new GenotypeBuilder(g).alleles(newGenotypeAlleles).make());
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
return new VariantContextBuilder(inputVC).alleles(alleles).genotypes(genotypes).make();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
return inputVC;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Set<String> MISSING_KEYS_WARNED_ABOUT = new HashSet<String>();
|
|
||||||
public final static VCFCompoundHeaderLine getMetaDataForField(final VCFHeader header, final String field) {
|
public final static VCFCompoundHeaderLine getMetaDataForField(final VCFHeader header, final String field) {
|
||||||
VCFCompoundHeaderLine metaData = header.getFormatHeaderLine(field);
|
VCFCompoundHeaderLine metaData = header.getFormatHeaderLine(field);
|
||||||
if ( metaData == null ) metaData = header.getInfoHeaderLine(field);
|
if ( metaData == null ) metaData = header.getInfoHeaderLine(field);
|
||||||
|
|
@ -564,7 +493,7 @@ public class VariantContextUtils {
|
||||||
for (final VariantContext vc : prepaddedVCs) {
|
for (final VariantContext vc : prepaddedVCs) {
|
||||||
// also a reasonable place to remove filtered calls, if needed
|
// also a reasonable place to remove filtered calls, if needed
|
||||||
if ( ! filteredAreUncalled || vc.isNotFiltered() )
|
if ( ! filteredAreUncalled || vc.isNotFiltered() )
|
||||||
VCs.add(createVariantContextWithPaddedAlleles(vc));
|
VCs.add(VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc));
|
||||||
}
|
}
|
||||||
if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled
|
if ( VCs.size() == 0 ) // everything is filtered out and we're filteredAreUncalled
|
||||||
return null;
|
return null;
|
||||||
|
|
@ -769,7 +698,7 @@ public class VariantContextUtils {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) {
|
private static VariantContext createVariantContextWithTrimmedAlleles(VariantContext inputVC) {
|
||||||
// see if we need to trim common reference base from all alleles
|
// see if we need to trim common reference base from all alleles
|
||||||
boolean trimVC;
|
boolean trimVC;
|
||||||
|
|
||||||
|
|
@ -780,7 +709,7 @@ public class VariantContextUtils {
|
||||||
else if (refAllele.isNull())
|
else if (refAllele.isNull())
|
||||||
trimVC = false;
|
trimVC = false;
|
||||||
else {
|
else {
|
||||||
trimVC = (AbstractVCFCodec.computeForwardClipping(inputVC.getAlternateAlleles(), (byte)inputVC.getReference().getDisplayString().charAt(0)) > 0);
|
trimVC = VCFAlleleClipper.shouldClipFirstBaseP(inputVC.getAlternateAlleles(), (byte) inputVC.getReference().getDisplayString().charAt(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
// nothing to do if we don't need to trim bases
|
// nothing to do if we don't need to trim bases
|
||||||
|
|
@ -836,46 +765,6 @@ public class VariantContextUtils {
|
||||||
return inputVC;
|
return inputVC;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static VariantContext reverseTrimAlleles( final VariantContext inputVC ) {
|
|
||||||
// see if we need to trim common reference base from all alleles
|
|
||||||
|
|
||||||
final int trimExtent = AbstractVCFCodec.computeReverseClipping(inputVC.getAlleles(), inputVC.getReference().getDisplayString().getBytes(), 0, true, -1);
|
|
||||||
if ( trimExtent <= 0 || inputVC.getAlleles().size() <= 1 )
|
|
||||||
return inputVC;
|
|
||||||
|
|
||||||
final List<Allele> alleles = new ArrayList<Allele>();
|
|
||||||
final GenotypesContext genotypes = GenotypesContext.create();
|
|
||||||
final Map<Allele, Allele> originalToTrimmedAlleleMap = new HashMap<Allele, Allele>();
|
|
||||||
|
|
||||||
for (final Allele a : inputVC.getAlleles()) {
|
|
||||||
if (a.isSymbolic()) {
|
|
||||||
alleles.add(a);
|
|
||||||
originalToTrimmedAlleleMap.put(a, a);
|
|
||||||
} else {
|
|
||||||
// get bases for current allele and create a new one with trimmed bases
|
|
||||||
final byte[] newBases = Arrays.copyOfRange(a.getBases(), 0, a.length()-trimExtent);
|
|
||||||
final Allele trimmedAllele = Allele.create(newBases, a.isReference());
|
|
||||||
alleles.add(trimmedAllele);
|
|
||||||
originalToTrimmedAlleleMap.put(a, trimmedAllele);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// now we can recreate new genotypes with trimmed alleles
|
|
||||||
for ( final Genotype genotype : inputVC.getGenotypes() ) {
|
|
||||||
final List<Allele> originalAlleles = genotype.getAlleles();
|
|
||||||
final List<Allele> trimmedAlleles = new ArrayList<Allele>();
|
|
||||||
for ( final Allele a : originalAlleles ) {
|
|
||||||
if ( a.isCalled() )
|
|
||||||
trimmedAlleles.add(originalToTrimmedAlleleMap.get(a));
|
|
||||||
else
|
|
||||||
trimmedAlleles.add(Allele.NO_CALL);
|
|
||||||
}
|
|
||||||
genotypes.add(new GenotypeBuilder(genotype).alleles(trimmedAlleles).make());
|
|
||||||
}
|
|
||||||
|
|
||||||
return new VariantContextBuilder(inputVC).stop(inputVC.getStart() + alleles.get(0).length() + (inputVC.isMixed() ? -1 : 0)).alleles(alleles).genotypes(genotypes).make();
|
|
||||||
}
|
|
||||||
|
|
||||||
public static GenotypesContext stripPLs(GenotypesContext genotypes) {
|
public static GenotypesContext stripPLs(GenotypesContext genotypes) {
|
||||||
GenotypesContext newGs = GenotypesContext.create(genotypes.size());
|
GenotypesContext newGs = GenotypesContext.create(genotypes.size());
|
||||||
|
|
||||||
|
|
@ -1485,4 +1374,33 @@ public class VariantContextUtils {
|
||||||
|
|
||||||
return true; // we passed all tests, we matched
|
return true; // we passed all tests, we matched
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute the end position for this VariantContext from the alleles themselves
|
||||||
|
*
|
||||||
|
* In the trivial case this is a single BP event and end = start (open intervals)
|
||||||
|
* In general the end is start + ref length - 1, handling the case where ref length == 0
|
||||||
|
* However, if alleles contains a symbolic allele then we use endForSymbolicAllele in all cases
|
||||||
|
*
|
||||||
|
* @param alleles the list of alleles to consider. The reference allele must be the first one
|
||||||
|
* @param start the known start position of this event
|
||||||
|
* @param endForSymbolicAlleles the end position to use if any of the alleles is symbolic. Can be -1
|
||||||
|
* if no is expected but will throw an error if one is found
|
||||||
|
* @return this builder
|
||||||
|
*/
|
||||||
|
@Requires({"! alleles.isEmpty()", "start > 0", "endForSymbolicAlleles == -1 || endForSymbolicAlleles > 0" })
|
||||||
|
public static int computeEndFromAlleles(final List<Allele> alleles, final int start, final int endForSymbolicAlleles) {
|
||||||
|
final Allele ref = alleles.get(0);
|
||||||
|
|
||||||
|
if ( ref.isNonReference() )
|
||||||
|
throw new ReviewedStingException("computeEndFromAlleles requires first allele to be reference");
|
||||||
|
|
||||||
|
if ( VariantContext.hasSymbolicAlleles(alleles) ) {
|
||||||
|
if ( endForSymbolicAlleles == -1 )
|
||||||
|
throw new ReviewedStingException("computeEndFromAlleles found a symbolic allele but endForSymbolicAlleles was provided");
|
||||||
|
return endForSymbolicAlleles;
|
||||||
|
} else {
|
||||||
|
return start + Math.max(ref.length() - 1, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.variantcontext.writer;
|
package org.broadinstitute.sting.utils.variantcontext.writer;
|
||||||
|
|
||||||
|
import com.google.java.contract.Ensures;
|
||||||
import com.google.java.contract.Requires;
|
import com.google.java.contract.Requires;
|
||||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
|
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
|
||||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
|
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
|
||||||
|
|
@ -49,17 +50,22 @@ public abstract class BCF2FieldWriter {
|
||||||
private final VCFHeader header;
|
private final VCFHeader header;
|
||||||
private final BCF2FieldEncoder fieldEncoder;
|
private final BCF2FieldEncoder fieldEncoder;
|
||||||
|
|
||||||
|
@Requires({"header != null", "fieldEncoder != null"})
|
||||||
protected BCF2FieldWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
|
protected BCF2FieldWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
|
||||||
this.header = header;
|
this.header = header;
|
||||||
this.fieldEncoder = fieldEncoder;
|
this.fieldEncoder = fieldEncoder;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Ensures("result != null")
|
||||||
protected VCFHeader getHeader() { return header; }
|
protected VCFHeader getHeader() { return header; }
|
||||||
|
@Ensures("result != null")
|
||||||
protected BCF2FieldEncoder getFieldEncoder() {
|
protected BCF2FieldEncoder getFieldEncoder() {
|
||||||
return fieldEncoder;
|
return fieldEncoder;
|
||||||
}
|
}
|
||||||
|
@Ensures("result != null")
|
||||||
protected String getField() { return getFieldEncoder().getField(); }
|
protected String getField() { return getFieldEncoder().getField(); }
|
||||||
|
|
||||||
|
@Requires("vc != null")
|
||||||
public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
|
public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
|
||||||
fieldEncoder.writeFieldKey(encoder);
|
fieldEncoder.writeFieldKey(encoder);
|
||||||
}
|
}
|
||||||
|
|
@ -124,6 +130,9 @@ public abstract class BCF2FieldWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@Requires({"encodingType != null",
|
||||||
|
"nValuesPerGenotype >= 0 || ! getFieldEncoder().hasConstantNumElements()"})
|
||||||
|
@Ensures("nValuesPerGenotype >= 0")
|
||||||
public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
|
public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
|
||||||
// writes the key information
|
// writes the key information
|
||||||
super.start(encoder, vc);
|
super.start(encoder, vc);
|
||||||
|
|
@ -141,15 +150,18 @@ public abstract class BCF2FieldWriter {
|
||||||
encoder.encodeType(nValuesPerGenotype, encodingType);
|
encoder.encodeType(nValuesPerGenotype, encodingType);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Requires({"encodingType != null", "nValuesPerGenotype >= 0"})
|
||||||
public void addGenotype(final BCF2Encoder encoder, final VariantContext vc, final Genotype g) throws IOException {
|
public void addGenotype(final BCF2Encoder encoder, final VariantContext vc, final Genotype g) throws IOException {
|
||||||
final Object fieldValue = g.getExtendedAttribute(getField(), null);
|
final Object fieldValue = g.getExtendedAttribute(getField(), null);
|
||||||
getFieldEncoder().encodeValue(encoder, fieldValue, encodingType, nValuesPerGenotype);
|
getFieldEncoder().encodeValue(encoder, fieldValue, encodingType, nValuesPerGenotype);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Ensures({"result >= 0"})
|
||||||
protected int numElements(final VariantContext vc, final Genotype g) {
|
protected int numElements(final VariantContext vc, final Genotype g) {
|
||||||
return getFieldEncoder().numElements(vc, g.getExtendedAttribute(getField()));
|
return getFieldEncoder().numElements(vc, g.getExtendedAttribute(getField()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Ensures({"result >= 0"})
|
||||||
private final int computeMaxSizeOfGenotypeFieldFromValues(final VariantContext vc) {
|
private final int computeMaxSizeOfGenotypeFieldFromValues(final VariantContext vc) {
|
||||||
int size = -1;
|
int size = -1;
|
||||||
|
|
||||||
|
|
@ -227,6 +239,22 @@ public abstract class BCF2FieldWriter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static class FTGenotypesWriter extends StaticallyTypeGenotypesWriter {
|
||||||
|
public FTGenotypesWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
|
||||||
|
super(header, fieldEncoder);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addGenotype(final BCF2Encoder encoder, final VariantContext vc, final Genotype g) throws IOException {
|
||||||
|
final String fieldValue = g.getFilters();
|
||||||
|
getFieldEncoder().encodeValue(encoder, fieldValue, encodingType, nValuesPerGenotype);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected int numElements(final VariantContext vc, final Genotype g) {
|
||||||
|
return getFieldEncoder().numElements(vc, g.getFilters());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static class GTWriter extends GenotypesWriter {
|
public static class GTWriter extends GenotypesWriter {
|
||||||
final Map<Allele, Integer> alleleMapForTriPlus = new HashMap<Allele, Integer>(5);
|
final Map<Allele, Integer> alleleMapForTriPlus = new HashMap<Allele, Integer>(5);
|
||||||
Allele ref, alt1;
|
Allele ref, alt1;
|
||||||
|
|
|
||||||
|
|
@ -140,6 +140,8 @@ public class BCF2FieldWriterManager {
|
||||||
|
|
||||||
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
|
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||||
return new BCF2FieldWriter.GTWriter(header, fieldEncoder);
|
return new BCF2FieldWriter.GTWriter(header, fieldEncoder);
|
||||||
|
} else if ( line.getID().equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
|
||||||
|
return new BCF2FieldWriter.FTGenotypesWriter(header, fieldEncoder);
|
||||||
} else if ( intGenotypeFieldAccessors.getAccessor(field) != null ) {
|
} else if ( intGenotypeFieldAccessors.getAccessor(field) != null ) {
|
||||||
return new BCF2FieldWriter.IGFGenotypesWriter(header, fieldEncoder, intGenotypeFieldAccessors.getAccessor(field));
|
return new BCF2FieldWriter.IGFGenotypesWriter(header, fieldEncoder, intGenotypeFieldAccessors.getAccessor(field));
|
||||||
} else if ( line.getType() == VCFHeaderLineType.Integer ) {
|
} else if ( line.getType() == VCFHeaderLineType.Integer ) {
|
||||||
|
|
|
||||||
|
|
@ -261,11 +261,13 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void buildAlleles( VariantContext vc ) throws IOException {
|
private void buildAlleles( VariantContext vc ) throws IOException {
|
||||||
final boolean needsPadding = VariantContextUtils.needsPadding(vc);
|
final boolean needsPadding = VCFAlleleClipper.needsPadding(vc);
|
||||||
for ( Allele allele : vc.getAlleles() ) {
|
for ( Allele allele : vc.getAlleles() ) {
|
||||||
if ( needsPadding )
|
if ( needsPadding )
|
||||||
allele = VariantContextUtils.padAllele(vc,allele);
|
allele = VCFAlleleClipper.padAllele(vc, allele);
|
||||||
final byte[] s = allele.getDisplayBases();
|
final byte[] s = allele.getDisplayBases();
|
||||||
|
if ( s == null )
|
||||||
|
throw new ReviewedStingException("BUG: BCF2Writer encountered null padded allele" + allele);
|
||||||
encoder.encodeTypedString(s);
|
encoder.encodeTypedString(s);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -162,7 +162,7 @@ class VCFWriter extends IndexingVariantContextWriter {
|
||||||
vc = new VariantContextBuilder(vc).noGenotypes().make();
|
vc = new VariantContextBuilder(vc).noGenotypes().make();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
vc = VariantContextUtils.createVariantContextWithPaddedAlleles(vc);
|
vc = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc);
|
||||||
super.add(vc);
|
super.add(vc);
|
||||||
|
|
||||||
Map<Allele, String> alleleMap = buildAlleleMap(vc);
|
Map<Allele, String> alleleMap = buildAlleleMap(vc);
|
||||||
|
|
@ -348,9 +348,8 @@ class VCFWriter extends IndexingVariantContextWriter {
|
||||||
missingSampleError(vc, mHeader);
|
missingSampleError(vc, mHeader);
|
||||||
}
|
}
|
||||||
|
|
||||||
List<String> attrs = new ArrayList<String>(genotypeFormatKeys.size());
|
final List<String> attrs = new ArrayList<String>(genotypeFormatKeys.size());
|
||||||
for ( String field : genotypeFormatKeys ) {
|
for ( String field : genotypeFormatKeys ) {
|
||||||
|
|
||||||
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
|
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||||
if ( !g.isAvailable() ) {
|
if ( !g.isAvailable() ) {
|
||||||
throw new ReviewedStingException("GTs cannot be missing for some samples if they are available for others in the record");
|
throw new ReviewedStingException("GTs cannot be missing for some samples if they are available for others in the record");
|
||||||
|
|
@ -363,54 +362,53 @@ class VCFWriter extends IndexingVariantContextWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
|
|
||||||
String outputValue;
|
|
||||||
final IntGenotypeFieldAccessors.Accessor accessor = intGenotypeFieldAccessors.getAccessor(field);
|
|
||||||
if ( accessor != null ) {
|
|
||||||
final int[] intValues = accessor.getValues(g);
|
|
||||||
if ( intValues == null )
|
|
||||||
outputValue = VCFConstants.MISSING_VALUE_v4;
|
|
||||||
else if ( intValues.length == 1 ) // fast path
|
|
||||||
outputValue = Integer.toString(intValues[0]);
|
|
||||||
else {
|
|
||||||
StringBuilder sb = new StringBuilder();
|
|
||||||
sb.append(intValues[0]);
|
|
||||||
for ( int i = 1; i < intValues.length; i++) {
|
|
||||||
sb.append(",");
|
|
||||||
sb.append(intValues[i]);
|
|
||||||
}
|
|
||||||
outputValue = sb.toString();
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
Object val = g.hasExtendedAttribute(field) ? g.getExtendedAttribute(field) : VCFConstants.MISSING_VALUE_v4;
|
String outputValue;
|
||||||
|
|
||||||
// some exceptions
|
|
||||||
if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY ) ) {
|
if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY ) ) {
|
||||||
val = g.isFiltered() ? ParsingUtils.join(";", ParsingUtils.sortList(g.getFilters())) : VCFConstants.PASSES_FILTERS_v4;
|
outputValue = g.isFiltered() ? g.getFilters() : VCFConstants.PASSES_FILTERS_v4;
|
||||||
}
|
} else {
|
||||||
|
final IntGenotypeFieldAccessors.Accessor accessor = intGenotypeFieldAccessors.getAccessor(field);
|
||||||
VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(field);
|
if ( accessor != null ) {
|
||||||
if ( metaData != null ) {
|
final int[] intValues = accessor.getValues(g);
|
||||||
int numInFormatField = metaData.getCount(vc);
|
if ( intValues == null )
|
||||||
if ( numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4) ) {
|
outputValue = VCFConstants.MISSING_VALUE_v4;
|
||||||
// If we have a missing field but multiple values are expected, we need to construct a new string with all fields.
|
else if ( intValues.length == 1 ) // fast path
|
||||||
// For example, if Number=2, the string has to be ".,."
|
outputValue = Integer.toString(intValues[0]);
|
||||||
StringBuilder sb = new StringBuilder(VCFConstants.MISSING_VALUE_v4);
|
else {
|
||||||
for ( int i = 1; i < numInFormatField; i++ ) {
|
StringBuilder sb = new StringBuilder();
|
||||||
sb.append(",");
|
sb.append(intValues[0]);
|
||||||
sb.append(VCFConstants.MISSING_VALUE_v4);
|
for ( int i = 1; i < intValues.length; i++) {
|
||||||
|
sb.append(",");
|
||||||
|
sb.append(intValues[i]);
|
||||||
|
}
|
||||||
|
outputValue = sb.toString();
|
||||||
}
|
}
|
||||||
val = sb.toString();
|
} else {
|
||||||
|
Object val = g.hasExtendedAttribute(field) ? g.getExtendedAttribute(field) : VCFConstants.MISSING_VALUE_v4;
|
||||||
|
|
||||||
|
VCFFormatHeaderLine metaData = mHeader.getFormatHeaderLine(field);
|
||||||
|
if ( metaData != null ) {
|
||||||
|
int numInFormatField = metaData.getCount(vc);
|
||||||
|
if ( numInFormatField > 1 && val.equals(VCFConstants.MISSING_VALUE_v4) ) {
|
||||||
|
// If we have a missing field but multiple values are expected, we need to construct a new string with all fields.
|
||||||
|
// For example, if Number=2, the string has to be ".,."
|
||||||
|
StringBuilder sb = new StringBuilder(VCFConstants.MISSING_VALUE_v4);
|
||||||
|
for ( int i = 1; i < numInFormatField; i++ ) {
|
||||||
|
sb.append(",");
|
||||||
|
sb.append(VCFConstants.MISSING_VALUE_v4);
|
||||||
|
}
|
||||||
|
val = sb.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// assume that if key is absent, then the given string encoding suffices
|
||||||
|
outputValue = formatVCFField(val);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// assume that if key is absent, then the given string encoding suffices
|
if ( outputValue != null )
|
||||||
outputValue = formatVCFField(val);
|
attrs.add(outputValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( outputValue != null )
|
|
||||||
attrs.add(outputValue);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// strip off trailing missing values
|
// strip off trailing missing values
|
||||||
|
|
@ -524,7 +522,7 @@ class VCFWriter extends IndexingVariantContextWriter {
|
||||||
if ( g.hasDP() ) sawDP = true;
|
if ( g.hasDP() ) sawDP = true;
|
||||||
if ( g.hasAD() ) sawAD = true;
|
if ( g.hasAD() ) sawAD = true;
|
||||||
if ( g.hasPL() ) sawPL = true;
|
if ( g.hasPL() ) sawPL = true;
|
||||||
if (g.isFiltered() && g.isCalled()) sawGenotypeFilter = true;
|
if (g.isFiltered()) sawGenotypeFilter = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( sawGoodQual ) keys.add(VCFConstants.GENOTYPE_QUALITY_KEY);
|
if ( sawGoodQual ) keys.add(VCFConstants.GENOTYPE_QUALITY_KEY);
|
||||||
|
|
|
||||||
|
|
@ -15,10 +15,7 @@ import org.testng.SkipException;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
|
|
@ -296,6 +293,12 @@ public abstract class BaseTest {
|
||||||
assertEqualsDoubleSmart(actual, expected, DEFAULT_FLOAT_TOLERANCE);
|
assertEqualsDoubleSmart(actual, expected, DEFAULT_FLOAT_TOLERANCE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static final <T> void assertEqualsSet(final Set<T> actual, final Set<T> expected, final String info) {
|
||||||
|
final Set<T> actualSet = new HashSet<T>(actual);
|
||||||
|
final Set<T> expectedSet = new HashSet<T>(expected);
|
||||||
|
Assert.assertTrue(actualSet.equals(expectedSet), info); // note this is necessary due to testng bug for set comps
|
||||||
|
}
|
||||||
|
|
||||||
public static final void assertEqualsDoubleSmart(final double actual, final double expected, final double tolerance) {
|
public static final void assertEqualsDoubleSmart(final double actual, final double expected, final double tolerance) {
|
||||||
if ( Double.isNaN(expected) ) // NaN == NaN => false unfortunately
|
if ( Double.isNaN(expected) ) // NaN == NaN => false unfortunately
|
||||||
Assert.assertTrue(Double.isNaN(actual));
|
Assert.assertTrue(Double.isNaN(actual));
|
||||||
|
|
|
||||||
|
|
@ -18,22 +18,21 @@ public class SymbolicAllelesIntegrationTest extends WalkerTest {
|
||||||
" --no_cmdline_in_header";
|
" --no_cmdline_in_header";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(enabled = true)
|
||||||
@Test(enabled = false)
|
|
||||||
public void test1() {
|
public void test1() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString(b36KGReference, "symbolic_alleles_1.vcf"),
|
baseTestString(b36KGReference, "symbolic_alleles_1.vcf"),
|
||||||
1,
|
1,
|
||||||
Arrays.asList("c79137da24ad4dc15cedc742de39247f"));
|
Arrays.asList("5bafc5a99ea839e686e55de93f91fd5c"));
|
||||||
executeTest("Test symbolic alleles", spec);
|
executeTest("Test symbolic alleles", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = false)
|
@Test(enabled = true)
|
||||||
public void test2() {
|
public void test2() {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString(b36KGReference, "symbolic_alleles_2.vcf"),
|
baseTestString(b36KGReference, "symbolic_alleles_2.vcf"),
|
||||||
1,
|
1,
|
||||||
Arrays.asList("3f6cbbd5fdf164d87081a3af19eeeba7"));
|
Arrays.asList("bf5a09f783ab1fa44774c81f91d10921"));
|
||||||
executeTest("Test symbolic alleles mixed in with non-symbolic alleles", spec);
|
executeTest("Test symbolic alleles mixed in with non-symbolic alleles", spec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -75,7 +75,7 @@ public class BeagleIntegrationTest extends WalkerTest {
|
||||||
"--beagleR2:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.r2 "+
|
"--beagleR2:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.r2 "+
|
||||||
"--beagleProbs:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.gprobs.bgl "+
|
"--beagleProbs:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.gprobs.bgl "+
|
||||||
"--beaglePhased:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.phased.bgl "+
|
"--beaglePhased:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.phased.bgl "+
|
||||||
"-L 20:1-70000 -o %s --no_cmdline_in_header -U LENIENT_VCF_PROCESSING",1,Arrays.asList("fbbbebfda35bab3f6f62eea2f0be1c01"));
|
"-L 20:1-70000 -o %s --no_cmdline_in_header -U LENIENT_VCF_PROCESSING",1,Arrays.asList("d8906b67c7f9fdb5b37b8e9e050982d3"));
|
||||||
spec.disableShadowBCF();
|
spec.disableShadowBCF();
|
||||||
executeTest("testBeagleChangesSitesToRef",spec);
|
executeTest("testBeagleChangesSitesToRef",spec);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -38,17 +38,17 @@ public class DiagnoseTargetsIntegrationTest extends WalkerTest {
|
||||||
private void DTTest(String testName, String args, String md5) {
|
private void DTTest(String testName, String args, String md5) {
|
||||||
String base = String.format("-T DiagnoseTargets --no_cmdline_in_header -R %s -L %s", REF, L) + " -o %s ";
|
String base = String.format("-T DiagnoseTargets --no_cmdline_in_header -R %s -L %s", REF, L) + " -o %s ";
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(base + args, Arrays.asList(md5));
|
WalkerTestSpec spec = new WalkerTestSpec(base + args, Arrays.asList(md5));
|
||||||
spec.disableShadowBCF();
|
//spec.disableShadowBCF();
|
||||||
executeTest(testName, spec);
|
executeTest(testName, spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testSingleSample() {
|
public void testSingleSample() {
|
||||||
DTTest("testSingleSample ", "-I " + singleSample + " -max 75", "ef71a569a48697c89e642cdda7bfb766");
|
DTTest("testSingleSample ", "-I " + singleSample + " -max 75", "9954b21163d3e66db232938ec509067f");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testMultiSample() {
|
public void testMultiSample() {
|
||||||
DTTest("testMultiSample ", "-I " + multiSample, "1e6e15156e01e736274898fdac77d911");
|
DTTest("testMultiSample ", "-I " + multiSample, "7c5277261e8e9dd74666f04843ffb09c");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -80,7 +80,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
||||||
public void testGenotypeFilters1() {
|
public void testGenotypeFilters1() {
|
||||||
WalkerTestSpec spec1 = new WalkerTestSpec(
|
WalkerTestSpec spec1 = new WalkerTestSpec(
|
||||||
baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
baseTestString() + " -G_filter 'GQ == 0.60' -G_filterName foo --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||||
Arrays.asList("c5ed9dd3975b3602293bb484b4fda5f4"));
|
Arrays.asList("060e9e7b6faf8b2f7b3291594eb6b39c"));
|
||||||
executeTest("test genotype filter #1", spec1);
|
executeTest("test genotype filter #1", spec1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -88,7 +88,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
||||||
public void testGenotypeFilters2() {
|
public void testGenotypeFilters2() {
|
||||||
WalkerTestSpec spec2 = new WalkerTestSpec(
|
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||||
baseTestString() + " -G_filter 'isHomVar == 1' -G_filterName foo --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
baseTestString() + " -G_filter 'isHomVar == 1' -G_filterName foo --variant " + privateTestDir + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1,
|
||||||
Arrays.asList("979ccdf484259117aa31305701075602"));
|
Arrays.asList("00f90028a8c0d56772c47f039816b585"));
|
||||||
executeTest("test genotype filter #2", spec2);
|
executeTest("test genotype filter #2", spec2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -50,7 +50,6 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
" -recalFile %s" +
|
" -recalFile %s" +
|
||||||
" -tranchesFile %s",
|
" -tranchesFile %s",
|
||||||
Arrays.asList(params.recalMD5, params.tranchesMD5));
|
Arrays.asList(params.recalMD5, params.tranchesMD5));
|
||||||
spec.disableShadowBCF(); // TODO -- enable when we support symbolic alleles
|
|
||||||
executeTest("testVariantRecalibrator-"+params.inVCF, spec).getFirst();
|
executeTest("testVariantRecalibrator-"+params.inVCF, spec).getFirst();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -99,7 +98,6 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
" -recalFile %s" +
|
" -recalFile %s" +
|
||||||
" -tranchesFile %s",
|
" -tranchesFile %s",
|
||||||
Arrays.asList(params.recalMD5, params.tranchesMD5));
|
Arrays.asList(params.recalMD5, params.tranchesMD5));
|
||||||
spec.disableShadowBCF(); // TODO -- enable when we support symbolic alleles
|
|
||||||
executeTest("testVariantRecalibratorIndel-"+params.inVCF, spec).getFirst();
|
executeTest("testVariantRecalibratorIndel-"+params.inVCF, spec).getFirst();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -116,7 +114,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
||||||
" -tranchesFile " + getMd5DB().getMD5FilePath(params.tranchesMD5, null) +
|
" -tranchesFile " + getMd5DB().getMD5FilePath(params.tranchesMD5, null) +
|
||||||
" -recalFile " + getMd5DB().getMD5FilePath(params.recalMD5, null),
|
" -recalFile " + getMd5DB().getMD5FilePath(params.recalMD5, null),
|
||||||
Arrays.asList(params.cutVCFMD5));
|
Arrays.asList(params.cutVCFMD5));
|
||||||
spec.disableShadowBCF(); // TODO -- enable when we support symbolic alleles
|
spec.disableShadowBCF(); // has to be disabled because the input VCF is missing LowQual annotation
|
||||||
executeTest("testApplyRecalibrationIndel-"+params.inVCF, spec);
|
executeTest("testApplyRecalibrationIndel-"+params.inVCF, spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,226 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||||
|
|
||||||
|
import com.google.java.contract.Requires;
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
|
import org.testng.Assert;
|
||||||
|
import org.testng.SkipException;
|
||||||
|
import org.testng.annotations.DataProvider;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
public class VCFAlleleClipperUnitTest extends BaseTest {
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Test allele clipping
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private class ClipAllelesTest extends TestDataProvider {
|
||||||
|
final int position;
|
||||||
|
final int stop;
|
||||||
|
final String ref;
|
||||||
|
List<Allele> inputs;
|
||||||
|
List<Allele> expected;
|
||||||
|
|
||||||
|
@Requires("arg.length % 2 == 0")
|
||||||
|
private ClipAllelesTest(final int position, final int stop, final String ... arg) {
|
||||||
|
super(ClipAllelesTest.class);
|
||||||
|
this.position = position;
|
||||||
|
this.stop = stop;
|
||||||
|
this.ref = arg[0];
|
||||||
|
|
||||||
|
int n = arg.length / 2;
|
||||||
|
inputs = new ArrayList<Allele>(n);
|
||||||
|
expected = new ArrayList<Allele>(n);
|
||||||
|
|
||||||
|
for ( int i = 0; i < n; i++ ) {
|
||||||
|
final boolean ref = i % n == 0;
|
||||||
|
inputs.add(Allele.create(arg[i], ref));
|
||||||
|
}
|
||||||
|
for ( int i = n; i < arg.length; i++ ) {
|
||||||
|
final boolean ref = i % n == 0;
|
||||||
|
expected.add(Allele.create(arg[i], ref));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isClipped() {
|
||||||
|
for ( int i = 0; i < inputs.size(); i++ ) {
|
||||||
|
if ( inputs.get(i).length() != expected.get(i).length() )
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return String.format("ClipAllelesTest input=%s expected=%s", inputs, expected);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@DataProvider(name = "ClipAllelesTest")
|
||||||
|
public Object[][] makeClipAllelesTest() {
|
||||||
|
// do no harm
|
||||||
|
new ClipAllelesTest(10, 10, "A", "A");
|
||||||
|
new ClipAllelesTest(10, 10, "A", "C", "A", "C");
|
||||||
|
new ClipAllelesTest(10, 10, "A", "C", "G", "A", "C", "G");
|
||||||
|
|
||||||
|
// insertions
|
||||||
|
new ClipAllelesTest(10, 10, "A", "AA", "-", "A");
|
||||||
|
new ClipAllelesTest(10, 10, "A", "AAA", "-", "AA");
|
||||||
|
new ClipAllelesTest(10, 10, "A", "AG", "-", "G");
|
||||||
|
|
||||||
|
// deletions
|
||||||
|
new ClipAllelesTest(10, 11, "AA", "A", "A", "-");
|
||||||
|
new ClipAllelesTest(10, 12, "AAA", "A", "AA", "-");
|
||||||
|
new ClipAllelesTest(10, 11, "AG", "A", "G", "-");
|
||||||
|
new ClipAllelesTest(10, 12, "AGG", "A", "GG", "-");
|
||||||
|
|
||||||
|
// multi-allelic insertion and deletions
|
||||||
|
new ClipAllelesTest(10, 11, "AA", "A", "AAA", "A", "-", "AA");
|
||||||
|
new ClipAllelesTest(10, 11, "AA", "A", "AAG", "A", "-", "AG");
|
||||||
|
new ClipAllelesTest(10, 10, "A", "AA", "AAA", "-", "A", "AA");
|
||||||
|
new ClipAllelesTest(10, 10, "A", "AA", "ACA", "-", "A", "CA");
|
||||||
|
new ClipAllelesTest(10, 12, "ACG", "ATC", "AGG", "CG", "TC", "GG");
|
||||||
|
new ClipAllelesTest(10, 11, "AC", "AT", "AG", "C", "T", "G");
|
||||||
|
|
||||||
|
// cannot be clipped
|
||||||
|
new ClipAllelesTest(10, 11, "AC", "CT", "AG", "AC", "CT", "AG");
|
||||||
|
new ClipAllelesTest(10, 11, "AC", "CT", "GG", "AC", "CT", "GG");
|
||||||
|
|
||||||
|
// symbolic
|
||||||
|
new ClipAllelesTest(10, 100, "A", "<DEL>", "A", "<DEL>");
|
||||||
|
new ClipAllelesTest(50, 50, "G", "G]22:60]", "G", "G]22:60]");
|
||||||
|
new ClipAllelesTest(51, 51, "T", "]22:55]T", "T", "]22:55]T");
|
||||||
|
new ClipAllelesTest(52, 52, "C", "C[22:51[", "C", "C[22:51[");
|
||||||
|
new ClipAllelesTest(60, 60, "A", "A]22:50]", "A", "A]22:50]");
|
||||||
|
|
||||||
|
// symbolic with alleles that should be clipped
|
||||||
|
new ClipAllelesTest(10, 100, "A", "<DEL>", "AA", "-", "<DEL>", "A");
|
||||||
|
new ClipAllelesTest(10, 100, "AA", "<DEL>", "A", "A", "<DEL>", "-");
|
||||||
|
new ClipAllelesTest(10, 100, "AA", "<DEL>", "A", "AAA", "A", "<DEL>", "-", "AA");
|
||||||
|
new ClipAllelesTest(10, 100, "AG", "<DEL>", "A", "AGA", "G", "<DEL>", "-", "GA");
|
||||||
|
new ClipAllelesTest(10, 100, "G", "<DEL>", "A", "G", "<DEL>", "A");
|
||||||
|
|
||||||
|
// clipping from both ends
|
||||||
|
//
|
||||||
|
// TODO -- THIS CODE IS BROKEN BECAUSE CLIPPING DOES WORK WITH ALLELES CLIPPED FROM THE END
|
||||||
|
//
|
||||||
|
// new ClipAllelesTest(10, 10, "ATA", "ATTA", "-", "T");
|
||||||
|
// new ClipAllelesTest(10, 10, "ATAA", "ATTAA", "-", "T");
|
||||||
|
// new ClipAllelesTest(10, 10, "ATAAG", "ATTAAG", "-", "T");
|
||||||
|
// new ClipAllelesTest(10, 11, "GTA", "ATTA", "G", "AT");
|
||||||
|
// new ClipAllelesTest(10, 11, "GTAA", "ATTAA", "G", "AT");
|
||||||
|
// new ClipAllelesTest(10, 11, "GTAAG", "ATTAAG", "G", "AT");
|
||||||
|
|
||||||
|
// complex substitutions
|
||||||
|
new ClipAllelesTest(10, 10, "A", "GA", "A", "GA");
|
||||||
|
|
||||||
|
return ClipAllelesTest.getTests(ClipAllelesTest.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "ClipAllelesTest")
|
||||||
|
public void testClipAllelesTest(ClipAllelesTest cfg) {
|
||||||
|
final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(cfg.position, cfg.ref, cfg.inputs, cfg.stop);
|
||||||
|
Assert.assertNull(clipped.getError(), "Unexpected error occurred");
|
||||||
|
Assert.assertEquals(clipped.getStop(), cfg.stop, "Clipped alleles stop");
|
||||||
|
Assert.assertEquals(clipped.getClippedAlleles(), cfg.expected, "Clipped alleles");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(dataProvider = "ClipAllelesTest", dependsOnMethods = "testClipAllelesTest")
|
||||||
|
public void testPaddingAllelesInVC(final ClipAllelesTest cfg) {
|
||||||
|
final VCFAlleleClipper.ClippedAlleles clipped = VCFAlleleClipper.clipAlleles(cfg.position, cfg.ref, cfg.inputs, cfg.stop);
|
||||||
|
final VariantContext vc = new VariantContextBuilder("x", "1", cfg.position, cfg.stop, clipped.getClippedAlleles())
|
||||||
|
.referenceBaseForIndel(clipped.getRefBaseForIndel()).make();
|
||||||
|
|
||||||
|
if ( vc.isMixed() && vc.hasSymbolicAlleles() )
|
||||||
|
throw new SkipException("GATK cannot handle mixed variant contexts with symbolic and concrete alleles. Remove this check when allele clipping and padding is generalized");
|
||||||
|
|
||||||
|
Assert.assertEquals(VCFAlleleClipper.needsPadding(vc), cfg.isClipped(), "needPadding method");
|
||||||
|
|
||||||
|
if ( cfg.isClipped() ) {
|
||||||
|
// TODO
|
||||||
|
// TODO note that the GATK currently uses a broken approach to the clipped alleles, so the expected stop is
|
||||||
|
// TODO actually the original stop, as the original stop is +1 its true size.
|
||||||
|
// TODO
|
||||||
|
final int expectedStop = vc.getEnd(); // + (vc.hasSymbolicAlleles() ? 0 : 1);
|
||||||
|
|
||||||
|
final VariantContext padded = VCFAlleleClipper.createVariantContextWithPaddedAlleles(vc);
|
||||||
|
Assert.assertEquals(padded.getStart(), vc.getStart(), "padded VC start");
|
||||||
|
Assert.assertEquals(padded.getAlleles(), cfg.inputs, "padded VC alleles == original unclipped alleles");
|
||||||
|
Assert.assertEquals(padded.getEnd(), expectedStop, "padded VC end should be clipped VC + 1 (added a base to ref allele)");
|
||||||
|
Assert.assertFalse(VCFAlleleClipper.needsPadding(padded), "padded VC shouldn't need padding again");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// basic allele clipping test
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private class ReverseClippingPositionTestProvider extends TestDataProvider {
|
||||||
|
final String ref;
|
||||||
|
final List<Allele> alleles = new ArrayList<Allele>();
|
||||||
|
final int expectedClip;
|
||||||
|
|
||||||
|
private ReverseClippingPositionTestProvider(final int expectedClip, final String ref, final String... alleles) {
|
||||||
|
super(ReverseClippingPositionTestProvider.class);
|
||||||
|
this.ref = ref;
|
||||||
|
for ( final String allele : alleles )
|
||||||
|
this.alleles.add(Allele.create(allele));
|
||||||
|
this.expectedClip = expectedClip;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return String.format("ref=%s allele=%s reverse clip %d", ref, alleles, expectedClip);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@DataProvider(name = "ReverseClippingPositionTestProvider")
|
||||||
|
public Object[][] makeReverseClippingPositionTestProvider() {
|
||||||
|
// pair clipping
|
||||||
|
new ReverseClippingPositionTestProvider(0, "ATT", "CCG");
|
||||||
|
new ReverseClippingPositionTestProvider(1, "ATT", "CCT");
|
||||||
|
new ReverseClippingPositionTestProvider(2, "ATT", "CTT");
|
||||||
|
new ReverseClippingPositionTestProvider(2, "ATT", "ATT"); // cannot completely clip allele
|
||||||
|
|
||||||
|
// triplets
|
||||||
|
new ReverseClippingPositionTestProvider(0, "ATT", "CTT", "CGG");
|
||||||
|
new ReverseClippingPositionTestProvider(1, "ATT", "CTT", "CGT"); // the T can go
|
||||||
|
new ReverseClippingPositionTestProvider(2, "ATT", "CTT", "CTT"); // both Ts can go
|
||||||
|
|
||||||
|
return ReverseClippingPositionTestProvider.getTests(ReverseClippingPositionTestProvider.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test(dataProvider = "ReverseClippingPositionTestProvider")
|
||||||
|
public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) {
|
||||||
|
int result = VCFAlleleClipper.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false);
|
||||||
|
Assert.assertEquals(result, cfg.expectedClip);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,91 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2012, The Broad Institute
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
// our package
|
|
||||||
package org.broadinstitute.sting.utils.codecs.vcf;
|
|
||||||
|
|
||||||
|
|
||||||
// the imports for unit testing.
|
|
||||||
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.BaseTest;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
|
||||||
import org.testng.Assert;
|
|
||||||
import org.testng.annotations.BeforeSuite;
|
|
||||||
import org.testng.annotations.DataProvider;
|
|
||||||
import org.testng.annotations.Test;
|
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
|
|
||||||
public class VCFCodecUnitTest extends BaseTest {
|
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// Provider
|
|
||||||
//
|
|
||||||
// --------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
private class AlleleClippingTestProvider extends TestDataProvider {
|
|
||||||
final String ref;
|
|
||||||
final List<Allele> alleles = new ArrayList<Allele>();
|
|
||||||
final int expectedClip;
|
|
||||||
|
|
||||||
private AlleleClippingTestProvider(final int expectedClip, final String ref, final String ... alleles) {
|
|
||||||
super(AlleleClippingTestProvider.class);
|
|
||||||
this.ref = ref;
|
|
||||||
for ( final String allele : alleles )
|
|
||||||
this.alleles.add(Allele.create(allele));
|
|
||||||
this.expectedClip = expectedClip;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return String.format("ref=%s allele=%s reverse clip %d", ref, alleles, expectedClip);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@DataProvider(name = "AlleleClippingTestProvider")
|
|
||||||
public Object[][] MakeAlleleClippingTest() {
|
|
||||||
// pair clipping
|
|
||||||
new AlleleClippingTestProvider(0, "ATT", "CCG");
|
|
||||||
new AlleleClippingTestProvider(1, "ATT", "CCT");
|
|
||||||
new AlleleClippingTestProvider(2, "ATT", "CTT");
|
|
||||||
new AlleleClippingTestProvider(2, "ATT", "ATT"); // cannot completely clip allele
|
|
||||||
|
|
||||||
// triplets
|
|
||||||
new AlleleClippingTestProvider(0, "ATT", "CTT", "CGG");
|
|
||||||
new AlleleClippingTestProvider(1, "ATT", "CTT", "CGT"); // the T can go
|
|
||||||
new AlleleClippingTestProvider(2, "ATT", "CTT", "CTT"); // both Ts can go
|
|
||||||
|
|
||||||
return AlleleClippingTestProvider.getTests(AlleleClippingTestProvider.class);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Test(dataProvider = "AlleleClippingTestProvider")
|
|
||||||
public void TestAlleleClipping(AlleleClippingTestProvider cfg) {
|
|
||||||
int result = AbstractVCFCodec.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false, 1);
|
|
||||||
Assert.assertEquals(result, cfg.expectedClip);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -26,7 +26,7 @@ public class VCFIntegrationTest extends WalkerTest {
|
||||||
executeTest("Test Variants To VCF from new output", spec2);
|
executeTest("Test Variants To VCF from new output", spec2);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = false)
|
@Test(enabled = true)
|
||||||
// See https://getsatisfaction.com/gsa/topics/support_vcf_4_1_structural_variation_breakend_alleles?utm_content=topic_link&utm_medium=email&utm_source=new_topic
|
// See https://getsatisfaction.com/gsa/topics/support_vcf_4_1_structural_variation_breakend_alleles?utm_content=topic_link&utm_medium=email&utm_source=new_topic
|
||||||
public void testReadingAndWritingBreakpointAlleles() {
|
public void testReadingAndWritingBreakpointAlleles() {
|
||||||
String testVCF = privateTestDir + "breakpoint-example.vcf";
|
String testVCF = privateTestDir + "breakpoint-example.vcf";
|
||||||
|
|
|
||||||
|
|
@ -48,6 +48,26 @@ public class GenotypeUnitTest extends BaseTest {
|
||||||
T = Allele.create("T");
|
T = Allele.create("T");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static final GenotypeBuilder makeGB() {
|
||||||
|
return new GenotypeBuilder("misc");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFilters() {
|
||||||
|
Assert.assertFalse(makeGB().make().isFiltered(), "by default Genotypes must be PASS");
|
||||||
|
Assert.assertNull(makeGB().make().getFilters(), "by default Genotypes must be PASS => getFilters() == null");
|
||||||
|
Assert.assertFalse(makeGB().filter(null).make().isFiltered(), "setting filter == null => Genotypes must be PASS");
|
||||||
|
Assert.assertNull(makeGB().filter(null).make().getFilters(), "Genotypes PASS => getFilters == null");
|
||||||
|
Assert.assertFalse(makeGB().filter("PASS").make().isFiltered(), "setting filter == PASS => Genotypes must be PASS");
|
||||||
|
Assert.assertNull(makeGB().filter("PASS").make().getFilters(), "Genotypes PASS => getFilters == null");
|
||||||
|
Assert.assertTrue(makeGB().filter("x").make().isFiltered(), "setting filter != null => Genotypes must be PASS");
|
||||||
|
Assert.assertEquals(makeGB().filter("x").make().getFilters(), "x", "Should get back the expected filter string");
|
||||||
|
Assert.assertEquals(makeGB().filters("x", "y").make().getFilters(), "x;y", "Multiple filter field values should be joined with ;");
|
||||||
|
Assert.assertEquals(makeGB().filters("x", "y", "z").make().getFilters(), "x;y;z", "Multiple filter field values should be joined with ;");
|
||||||
|
Assert.assertTrue(makeGB().filters("x", "y", "z").make().isFiltered(), "Multiple filter values should be filtered");
|
||||||
|
Assert.assertEquals(makeGB().filter("x;y;z").make().getFilters(), "x;y;z", "Multiple filter field values should be joined with ;");
|
||||||
|
}
|
||||||
|
|
||||||
// public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, boolean isPhased) {
|
// public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, boolean isPhased) {
|
||||||
// public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, boolean isPhased, double[] log10Likelihoods) {
|
// public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, boolean isPhased, double[] log10Likelihoods) {
|
||||||
// public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, double[] log10Likelihoods)
|
// public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, double[] log10Likelihoods)
|
||||||
|
|
|
||||||
|
|
@ -56,7 +56,7 @@ public class VariantContextTestProvider {
|
||||||
final private static boolean ENABLE_VARARRAY_TESTS = true;
|
final private static boolean ENABLE_VARARRAY_TESTS = true;
|
||||||
final private static boolean ENABLE_PLOIDY_TESTS = true;
|
final private static boolean ENABLE_PLOIDY_TESTS = true;
|
||||||
final private static boolean ENABLE_PL_TESTS = true;
|
final private static boolean ENABLE_PL_TESTS = true;
|
||||||
final private static boolean ENABLE_SYMBOLIC_ALLELE_TESTS = false;
|
final private static boolean ENABLE_SYMBOLIC_ALLELE_TESTS = true;
|
||||||
final private static boolean ENABLE_SOURCE_VCF_TESTS = true;
|
final private static boolean ENABLE_SOURCE_VCF_TESTS = true;
|
||||||
final private static boolean ENABLE_VARIABLE_LENGTH_GENOTYPE_STRING_TESTS = true;
|
final private static boolean ENABLE_VARIABLE_LENGTH_GENOTYPE_STRING_TESTS = true;
|
||||||
final private static List<Integer> TWENTY_INTS = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20);
|
final private static List<Integer> TWENTY_INTS = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20);
|
||||||
|
|
@ -70,8 +70,10 @@ public class VariantContextTestProvider {
|
||||||
testSourceVCFs.add(new File(BaseTest.privateTestDir + "ILLUMINA.wex.broad_phase2_baseline.20111114.both.exome.genotypes.1000.vcf"));
|
testSourceVCFs.add(new File(BaseTest.privateTestDir + "ILLUMINA.wex.broad_phase2_baseline.20111114.both.exome.genotypes.1000.vcf"));
|
||||||
testSourceVCFs.add(new File(BaseTest.privateTestDir + "ex2.vcf"));
|
testSourceVCFs.add(new File(BaseTest.privateTestDir + "ex2.vcf"));
|
||||||
testSourceVCFs.add(new File(BaseTest.privateTestDir + "dbsnp_135.b37.1000.vcf"));
|
testSourceVCFs.add(new File(BaseTest.privateTestDir + "dbsnp_135.b37.1000.vcf"));
|
||||||
if ( ENABLE_SYMBOLIC_ALLELE_TESTS )
|
if ( ENABLE_SYMBOLIC_ALLELE_TESTS ) {
|
||||||
testSourceVCFs.add(new File(BaseTest.privateTestDir + "diagnosis_targets_testfile.vcf"));
|
testSourceVCFs.add(new File(BaseTest.privateTestDir + "diagnosis_targets_testfile.vcf"));
|
||||||
|
testSourceVCFs.add(new File(BaseTest.privateTestDir + "VQSR.mixedTest.recal"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public abstract static class VariantContextIOTest {
|
public abstract static class VariantContextIOTest {
|
||||||
|
|
@ -181,6 +183,7 @@ public class VariantContextTestProvider {
|
||||||
Set<VCFHeaderLine> metaData = new TreeSet<VCFHeaderLine>();
|
Set<VCFHeaderLine> metaData = new TreeSet<VCFHeaderLine>();
|
||||||
|
|
||||||
addHeaderLine(metaData, "STRING1", 1, VCFHeaderLineType.String);
|
addHeaderLine(metaData, "STRING1", 1, VCFHeaderLineType.String);
|
||||||
|
addHeaderLine(metaData, "END", 1, VCFHeaderLineType.Integer);
|
||||||
addHeaderLine(metaData, "STRING3", 3, VCFHeaderLineType.String);
|
addHeaderLine(metaData, "STRING3", 3, VCFHeaderLineType.String);
|
||||||
addHeaderLine(metaData, "STRING20", 20, VCFHeaderLineType.String);
|
addHeaderLine(metaData, "STRING20", 20, VCFHeaderLineType.String);
|
||||||
addHeaderLine(metaData, "VAR.INFO.STRING", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String);
|
addHeaderLine(metaData, "VAR.INFO.STRING", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String);
|
||||||
|
|
@ -191,7 +194,7 @@ public class VariantContextTestProvider {
|
||||||
addHeaderLine(metaData, "PL", VCFHeaderLineCount.G, VCFHeaderLineType.Integer);
|
addHeaderLine(metaData, "PL", VCFHeaderLineCount.G, VCFHeaderLineType.Integer);
|
||||||
addHeaderLine(metaData, "GS", 2, VCFHeaderLineType.String);
|
addHeaderLine(metaData, "GS", 2, VCFHeaderLineType.String);
|
||||||
addHeaderLine(metaData, "GV", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String);
|
addHeaderLine(metaData, "GV", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String);
|
||||||
addHeaderLine(metaData, "FT", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String);
|
addHeaderLine(metaData, "FT", 1, VCFHeaderLineType.String);
|
||||||
|
|
||||||
// prep the header
|
// prep the header
|
||||||
metaData.add(new VCFContigHeaderLine(VCFHeader.CONTIG_KEY, Collections.singletonMap("ID", "1"), 0));
|
metaData.add(new VCFContigHeaderLine(VCFHeader.CONTIG_KEY, Collections.singletonMap("ID", "1"), 0));
|
||||||
|
|
@ -283,6 +286,15 @@ public class VariantContextTestProvider {
|
||||||
|
|
||||||
if ( ENABLE_A_AND_G_TESTS )
|
if ( ENABLE_A_AND_G_TESTS )
|
||||||
addGenotypesAndGTests();
|
addGenotypesAndGTests();
|
||||||
|
|
||||||
|
if ( ENABLE_SYMBOLIC_ALLELE_TESTS )
|
||||||
|
addSymbolicAlleleTests();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void addSymbolicAlleleTests() {
|
||||||
|
// two tests to ensure that the end is computed correctly when there's (and not) an END field present
|
||||||
|
add(builder().alleles("N", "<VQSR>").start(10).stop(11).attribute("END", 11));
|
||||||
|
add(builder().alleles("N", "<VQSR>").start(10).stop(10));
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void addGenotypesToTestData() {
|
private static void addGenotypesToTestData() {
|
||||||
|
|
@ -509,27 +521,26 @@ public class VariantContextTestProvider {
|
||||||
addGenotypeTests(site, // missing value in varlist of string
|
addGenotypeTests(site, // missing value in varlist of string
|
||||||
attr("g1", ref, "FLOAT1", 1.0),
|
attr("g1", ref, "FLOAT1", 1.0),
|
||||||
attr("g2", ref, "GV", Arrays.asList("S3", "S4", "S5")));
|
attr("g2", ref, "GV", Arrays.asList("S3", "S4", "S5")));
|
||||||
|
|
||||||
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// TESTING GENOTYPE FILTERS
|
|
||||||
//
|
|
||||||
//
|
|
||||||
addGenotypeTests(site,
|
|
||||||
new GenotypeBuilder("g1-x", Arrays.asList(ref, ref)).filters("X").make(),
|
|
||||||
new GenotypeBuilder("g2-x", Arrays.asList(ref, ref)).filters("X").make());
|
|
||||||
addGenotypeTests(site,
|
|
||||||
new GenotypeBuilder("g1-unft", Arrays.asList(ref, ref)).unfiltered().make(),
|
|
||||||
new GenotypeBuilder("g2-x", Arrays.asList(ref, ref)).filters("X").make());
|
|
||||||
addGenotypeTests(site,
|
|
||||||
new GenotypeBuilder("g1-unft", Arrays.asList(ref, ref)).unfiltered().make(),
|
|
||||||
new GenotypeBuilder("g2-xy", Arrays.asList(ref, ref)).filters("X", "Y").make());
|
|
||||||
addGenotypeTests(site,
|
|
||||||
new GenotypeBuilder("g1-unft", Arrays.asList(ref, ref)).unfiltered().make(),
|
|
||||||
new GenotypeBuilder("g2-x", Arrays.asList(ref, ref)).filters("X").make(),
|
|
||||||
new GenotypeBuilder("g3-xy", Arrays.asList(ref, ref)).filters("X", "Y").make());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// TESTING GENOTYPE FILTERS
|
||||||
|
//
|
||||||
|
//
|
||||||
|
addGenotypeTests(site,
|
||||||
|
new GenotypeBuilder("g1-x", Arrays.asList(ref, ref)).filters("X").make(),
|
||||||
|
new GenotypeBuilder("g2-x", Arrays.asList(ref, ref)).filters("X").make());
|
||||||
|
addGenotypeTests(site,
|
||||||
|
new GenotypeBuilder("g1-unft", Arrays.asList(ref, ref)).unfiltered().make(),
|
||||||
|
new GenotypeBuilder("g2-x", Arrays.asList(ref, ref)).filters("X").make());
|
||||||
|
addGenotypeTests(site,
|
||||||
|
new GenotypeBuilder("g1-unft", Arrays.asList(ref, ref)).unfiltered().make(),
|
||||||
|
new GenotypeBuilder("g2-xy", Arrays.asList(ref, ref)).filters("X", "Y").make());
|
||||||
|
addGenotypeTests(site,
|
||||||
|
new GenotypeBuilder("g1-unft", Arrays.asList(ref, ref)).unfiltered().make(),
|
||||||
|
new GenotypeBuilder("g2-x", Arrays.asList(ref, ref)).filters("X").make(),
|
||||||
|
new GenotypeBuilder("g3-xy", Arrays.asList(ref, ref)).filters("X", "Y").make());
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void addGenotypesAndGTests() {
|
private static void addGenotypesAndGTests() {
|
||||||
|
|
@ -711,14 +722,12 @@ public class VariantContextTestProvider {
|
||||||
Assert.assertEquals(actual.getAlleles(), expected.getAlleles(), "alleles");
|
Assert.assertEquals(actual.getAlleles(), expected.getAlleles(), "alleles");
|
||||||
|
|
||||||
assertAttributesEquals(actual.getAttributes(), expected.getAttributes());
|
assertAttributesEquals(actual.getAttributes(), expected.getAttributes());
|
||||||
Assert.assertEquals(actual.getFilters(), expected.getFilters(), "filters");
|
BaseTest.assertEqualsSet(actual.getFilters(), expected.getFilters(), "filters");
|
||||||
BaseTest.assertEqualsDoubleSmart(actual.getPhredScaledQual(), expected.getPhredScaledQual());
|
BaseTest.assertEqualsDoubleSmart(actual.getPhredScaledQual(), expected.getPhredScaledQual());
|
||||||
|
|
||||||
Assert.assertEquals(actual.hasGenotypes(), expected.hasGenotypes(), "hasGenotypes");
|
Assert.assertEquals(actual.hasGenotypes(), expected.hasGenotypes(), "hasGenotypes");
|
||||||
if ( expected.hasGenotypes() ) {
|
if ( expected.hasGenotypes() ) {
|
||||||
final Set<String> actualSampleSet = new HashSet<String>(actual.getSampleNames());
|
BaseTest.assertEqualsSet(actual.getSampleNames(), expected.getSampleNames(), "sample names set");
|
||||||
final Set<String> expectedSampleSet = new HashSet<String>(expected.getSampleNames());
|
|
||||||
Assert.assertTrue(actualSampleSet.equals(expectedSampleSet), "sample names"); // note this is necessary due to testng bug for set comps
|
|
||||||
Assert.assertEquals(actual.getSampleNamesOrderedByName(), expected.getSampleNamesOrderedByName(), "sample names");
|
Assert.assertEquals(actual.getSampleNamesOrderedByName(), expected.getSampleNamesOrderedByName(), "sample names");
|
||||||
final Set<String> samples = expected.getSampleNames();
|
final Set<String> samples = expected.getSampleNames();
|
||||||
for ( final String sample : samples ) {
|
for ( final String sample : samples ) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue