Move additional VariantContext utility methods back to the GATK
Thanks to Eric for his feedback
This commit is contained in:
parent
ff8ba03249
commit
591df2be44
|
|
@ -320,7 +320,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
|
||||||
VariantContext subvc = vc.subContextFromSamples(samplesToPhase);
|
VariantContext subvc = vc.subContextFromSamples(samplesToPhase);
|
||||||
// logger.debug("original VC = " + vc);
|
// logger.debug("original VC = " + vc);
|
||||||
// logger.debug("sub VC = " + subvc);
|
// logger.debug("sub VC = " + subvc);
|
||||||
return VariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF);
|
return GATKVariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF);
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<VariantContext> processQueue(PhasingStats phaseStats, boolean processAll) {
|
private List<VariantContext> processQueue(PhasingStats phaseStats, boolean processAll) {
|
||||||
|
|
|
||||||
|
|
@ -280,7 +280,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
||||||
List<VariantContext> mergedVCs = new ArrayList<VariantContext>();
|
List<VariantContext> mergedVCs = new ArrayList<VariantContext>();
|
||||||
|
|
||||||
if (multipleAllelesMergeType == GATKVariantContextUtils.MultipleAllelesMergeType.BY_TYPE) {
|
if (multipleAllelesMergeType == GATKVariantContextUtils.MultipleAllelesMergeType.BY_TYPE) {
|
||||||
Map<VariantContext.Type, List<VariantContext>> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
|
Map<VariantContext.Type, List<VariantContext>> VCsByType = GATKVariantContextUtils.separateVariantContextsByType(vcs);
|
||||||
|
|
||||||
// TODO -- clean this up in a refactoring
|
// TODO -- clean this up in a refactoring
|
||||||
// merge NO_VARIATION into another type of variant (based on the ordering in VariantContext.Type)
|
// merge NO_VARIATION into another type of variant (based on the ordering in VariantContext.Type)
|
||||||
|
|
@ -320,7 +320,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
||||||
// re-compute chromosome counts
|
// re-compute chromosome counts
|
||||||
VariantContextUtils.calculateChromosomeCounts(builder, false);
|
VariantContextUtils.calculateChromosomeCounts(builder, false);
|
||||||
if ( minimalVCF )
|
if ( minimalVCF )
|
||||||
VariantContextUtils.pruneVariantContext(builder, Arrays.asList(SET_KEY));
|
GATKVariantContextUtils.pruneVariantContext(builder, Arrays.asList(SET_KEY));
|
||||||
vcfWriter.add(builder.make());
|
vcfWriter.add(builder.make());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,11 +25,9 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||||
|
|
||||||
import com.google.java.contract.Ensures;
|
|
||||||
import com.google.java.contract.Invariant;
|
|
||||||
import com.google.java.contract.Requires;
|
import com.google.java.contract.Requires;
|
||||||
import org.broad.tribble.util.ParsingUtils;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||||
import org.broadinstitute.variant.variantcontext.*;
|
import org.broadinstitute.variant.variantcontext.*;
|
||||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||||
|
|
||||||
|
|
@ -277,8 +275,8 @@ public class ConcordanceMetrics {
|
||||||
if ( truth.isMonomorphicInSamples() )
|
if ( truth.isMonomorphicInSamples() )
|
||||||
return EVAL_ONLY;
|
return EVAL_ONLY;
|
||||||
|
|
||||||
boolean evalSubsetTruth = VariantContextUtils.allelesAreSubset(eval,truth);
|
boolean evalSubsetTruth = GATKVariantContextUtils.allelesAreSubset(eval, truth);
|
||||||
boolean truthSubsetEval = VariantContextUtils.allelesAreSubset(truth,eval);
|
boolean truthSubsetEval = GATKVariantContextUtils.allelesAreSubset(truth, eval);
|
||||||
|
|
||||||
if ( evalSubsetTruth && truthSubsetEval )
|
if ( evalSubsetTruth && truthSubsetEval )
|
||||||
return ALLELES_MATCH;
|
return ALLELES_MATCH;
|
||||||
|
|
|
||||||
|
|
@ -44,6 +44,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.SampleUtils;
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
import org.broadinstitute.sting.utils.codecs.hapmap.RawHapMapFeature;
|
import org.broadinstitute.sting.utils.codecs.hapmap.RawHapMapFeature;
|
||||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||||
|
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||||
import org.broadinstitute.variant.vcf.*;
|
import org.broadinstitute.variant.vcf.*;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
|
|
@ -246,7 +247,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
||||||
vcfwriter.writeHeader(new VCFHeader(hInfo, samples));
|
vcfwriter.writeHeader(new VCFHeader(hInfo, samples));
|
||||||
}
|
}
|
||||||
|
|
||||||
vc = VariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings);
|
vc = GATKVariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings);
|
||||||
vcfwriter.add(vc);
|
vcfwriter.add(vc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -50,6 +50,7 @@ public class GATKVariantContextUtils {
|
||||||
public final static String MERGE_FILTER_IN_ALL = "FilteredInAll";
|
public final static String MERGE_FILTER_IN_ALL = "FilteredInAll";
|
||||||
public final static String MERGE_INTERSECTION = "Intersection";
|
public final static String MERGE_INTERSECTION = "Intersection";
|
||||||
|
|
||||||
|
|
||||||
public enum GenotypeMergeType {
|
public enum GenotypeMergeType {
|
||||||
/**
|
/**
|
||||||
* Make all sample genotypes unique by file. Each sample shared across RODs gets named sample.ROD.
|
* Make all sample genotypes unique by file. Each sample shared across RODs gets named sample.ROD.
|
||||||
|
|
@ -1013,6 +1014,124 @@ public class GATKVariantContextUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private final static Map<String, Object> subsetAttributes(final CommonInfo igc, final Collection<String> keysToPreserve) {
|
||||||
|
Map<String, Object> attributes = new HashMap<String, Object>(keysToPreserve.size());
|
||||||
|
for ( final String key : keysToPreserve ) {
|
||||||
|
if ( igc.hasAttribute(key) )
|
||||||
|
attributes.put(key, igc.getAttribute(key));
|
||||||
|
}
|
||||||
|
return attributes;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @deprecated use variant context builder version instead
|
||||||
|
* @param vc the variant context
|
||||||
|
* @param keysToPreserve the keys to preserve
|
||||||
|
* @return a pruned version of the original variant context
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
|
public static VariantContext pruneVariantContext(final VariantContext vc, Collection<String> keysToPreserve ) {
|
||||||
|
return pruneVariantContext(new VariantContextBuilder(vc), keysToPreserve).make();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static VariantContextBuilder pruneVariantContext(final VariantContextBuilder builder, Collection<String> keysToPreserve ) {
|
||||||
|
final VariantContext vc = builder.make();
|
||||||
|
if ( keysToPreserve == null ) keysToPreserve = Collections.emptyList();
|
||||||
|
|
||||||
|
// VC info
|
||||||
|
final Map<String, Object> attributes = subsetAttributes(vc.getCommonInfo(), keysToPreserve);
|
||||||
|
|
||||||
|
// Genotypes
|
||||||
|
final GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples());
|
||||||
|
for ( final Genotype g : vc.getGenotypes() ) {
|
||||||
|
final GenotypeBuilder gb = new GenotypeBuilder(g);
|
||||||
|
// remove AD, DP, PL, and all extended attributes, keeping just GT and GQ
|
||||||
|
gb.noAD().noDP().noPL().noAttributes();
|
||||||
|
genotypes.add(gb.make());
|
||||||
|
}
|
||||||
|
|
||||||
|
return builder.genotypes(genotypes).attributes(attributes);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean allelesAreSubset(VariantContext vc1, VariantContext vc2) {
|
||||||
|
// if all alleles of vc1 are a contained in alleles of vc2, return true
|
||||||
|
if (!vc1.getReference().equals(vc2.getReference()))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
for (Allele a :vc1.getAlternateAlleles()) {
|
||||||
|
if (!vc2.getAlternateAlleles().contains(a))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Map<VariantContext.Type, List<VariantContext>> separateVariantContextsByType(Collection<VariantContext> VCs) {
|
||||||
|
HashMap<VariantContext.Type, List<VariantContext>> mappedVCs = new HashMap<VariantContext.Type, List<VariantContext>>();
|
||||||
|
for ( VariantContext vc : VCs ) {
|
||||||
|
|
||||||
|
// look at previous variant contexts of different type. If:
|
||||||
|
// a) otherVC has alleles which are subset of vc, remove otherVC from its list and add otherVC to vc's list
|
||||||
|
// b) vc has alleles which are subset of otherVC. Then, add vc to otherVC's type list (rather, do nothing since vc will be added automatically to its list)
|
||||||
|
// c) neither: do nothing, just add vc to its own list
|
||||||
|
boolean addtoOwnList = true;
|
||||||
|
for (VariantContext.Type type : VariantContext.Type.values()) {
|
||||||
|
if (type.equals(vc.getType()))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (!mappedVCs.containsKey(type))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
List<VariantContext> vcList = mappedVCs.get(type);
|
||||||
|
for (int k=0; k < vcList.size(); k++) {
|
||||||
|
VariantContext otherVC = vcList.get(k);
|
||||||
|
if (allelesAreSubset(otherVC,vc)) {
|
||||||
|
// otherVC has a type different than vc and its alleles are a subset of vc: remove otherVC from its list and add it to vc's type list
|
||||||
|
vcList.remove(k);
|
||||||
|
// avoid having empty lists
|
||||||
|
if (vcList.size() == 0)
|
||||||
|
mappedVCs.remove(type);
|
||||||
|
if ( !mappedVCs.containsKey(vc.getType()) )
|
||||||
|
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
|
||||||
|
mappedVCs.get(vc.getType()).add(otherVC);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
else if (allelesAreSubset(vc,otherVC)) {
|
||||||
|
// vc has a type different than otherVC and its alleles are a subset of VC: add vc to otherVC's type list and don't add to its own
|
||||||
|
mappedVCs.get(type).add(vc);
|
||||||
|
addtoOwnList = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (addtoOwnList) {
|
||||||
|
if ( !mappedVCs.containsKey(vc.getType()) )
|
||||||
|
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
|
||||||
|
mappedVCs.get(vc.getType()).add(vc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return mappedVCs;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static VariantContext purgeUnallowedGenotypeAttributes(VariantContext vc, Set<String> allowedAttributes) {
|
||||||
|
if ( allowedAttributes == null )
|
||||||
|
return vc;
|
||||||
|
|
||||||
|
GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples());
|
||||||
|
for ( final Genotype genotype : vc.getGenotypes() ) {
|
||||||
|
Map<String, Object> attrs = new HashMap<String, Object>();
|
||||||
|
for ( Map.Entry<String, Object> attr : genotype.getExtendedAttributes().entrySet() ) {
|
||||||
|
if ( allowedAttributes.contains(attr.getKey()) )
|
||||||
|
attrs.put(attr.getKey(), attr.getValue());
|
||||||
|
}
|
||||||
|
newGenotypes.add(new GenotypeBuilder(genotype).attributes(attrs).make());
|
||||||
|
}
|
||||||
|
|
||||||
|
return new VariantContextBuilder(vc).genotypes(newGenotypes).make();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private static class AlleleMapper {
|
private static class AlleleMapper {
|
||||||
private VariantContext vc = null;
|
private VariantContext vc = null;
|
||||||
private Map<Allele, Allele> map = null;
|
private Map<Allele, Allele> map = null;
|
||||||
|
|
|
||||||
|
|
@ -625,6 +625,10 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
public double getAttributeAsDouble(String key, double defaultValue) { return commonInfo.getAttributeAsDouble(key, defaultValue); }
|
public double getAttributeAsDouble(String key, double defaultValue) { return commonInfo.getAttributeAsDouble(key, defaultValue); }
|
||||||
public boolean getAttributeAsBoolean(String key, boolean defaultValue) { return commonInfo.getAttributeAsBoolean(key, defaultValue); }
|
public boolean getAttributeAsBoolean(String key, boolean defaultValue) { return commonInfo.getAttributeAsBoolean(key, defaultValue); }
|
||||||
|
|
||||||
|
public CommonInfo getCommonInfo() {
|
||||||
|
return commonInfo;
|
||||||
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// Working with alleles
|
// Working with alleles
|
||||||
|
|
|
||||||
|
|
@ -316,106 +316,6 @@ public class VariantContextUtils {
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
private final static Map<String, Object> subsetAttributes(final CommonInfo igc, final Collection<String> keysToPreserve) {
|
|
||||||
Map<String, Object> attributes = new HashMap<String, Object>(keysToPreserve.size());
|
|
||||||
for ( final String key : keysToPreserve ) {
|
|
||||||
if ( igc.hasAttribute(key) )
|
|
||||||
attributes.put(key, igc.getAttribute(key));
|
|
||||||
}
|
|
||||||
return attributes;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @deprecated use variant context builder version instead
|
|
||||||
* @param vc the variant context
|
|
||||||
* @param keysToPreserve the keys to preserve
|
|
||||||
* @return a pruned version of the original variant context
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public static VariantContext pruneVariantContext(final VariantContext vc, Collection<String> keysToPreserve ) {
|
|
||||||
return pruneVariantContext(new VariantContextBuilder(vc), keysToPreserve).make();
|
|
||||||
}
|
|
||||||
|
|
||||||
public static VariantContextBuilder pruneVariantContext(final VariantContextBuilder builder, Collection<String> keysToPreserve ) {
|
|
||||||
final VariantContext vc = builder.make();
|
|
||||||
if ( keysToPreserve == null ) keysToPreserve = Collections.emptyList();
|
|
||||||
|
|
||||||
// VC info
|
|
||||||
final Map<String, Object> attributes = subsetAttributes(vc.commonInfo, keysToPreserve);
|
|
||||||
|
|
||||||
// Genotypes
|
|
||||||
final GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples());
|
|
||||||
for ( final Genotype g : vc.getGenotypes() ) {
|
|
||||||
final GenotypeBuilder gb = new GenotypeBuilder(g);
|
|
||||||
// remove AD, DP, PL, and all extended attributes, keeping just GT and GQ
|
|
||||||
gb.noAD().noDP().noPL().noAttributes();
|
|
||||||
genotypes.add(gb.make());
|
|
||||||
}
|
|
||||||
|
|
||||||
return builder.genotypes(genotypes).attributes(attributes);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean allelesAreSubset(VariantContext vc1, VariantContext vc2) {
|
|
||||||
// if all alleles of vc1 are a contained in alleles of vc2, return true
|
|
||||||
if (!vc1.getReference().equals(vc2.getReference()))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
for (Allele a :vc1.getAlternateAlleles()) {
|
|
||||||
if (!vc2.getAlternateAlleles().contains(a))
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Map<VariantContext.Type, List<VariantContext>> separateVariantContextsByType(Collection<VariantContext> VCs) {
|
|
||||||
HashMap<VariantContext.Type, List<VariantContext>> mappedVCs = new HashMap<VariantContext.Type, List<VariantContext>>();
|
|
||||||
for ( VariantContext vc : VCs ) {
|
|
||||||
|
|
||||||
// look at previous variant contexts of different type. If:
|
|
||||||
// a) otherVC has alleles which are subset of vc, remove otherVC from its list and add otherVC to vc's list
|
|
||||||
// b) vc has alleles which are subset of otherVC. Then, add vc to otherVC's type list (rather, do nothing since vc will be added automatically to its list)
|
|
||||||
// c) neither: do nothing, just add vc to its own list
|
|
||||||
boolean addtoOwnList = true;
|
|
||||||
for (VariantContext.Type type : VariantContext.Type.values()) {
|
|
||||||
if (type.equals(vc.getType()))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (!mappedVCs.containsKey(type))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
List<VariantContext> vcList = mappedVCs.get(type);
|
|
||||||
for (int k=0; k < vcList.size(); k++) {
|
|
||||||
VariantContext otherVC = vcList.get(k);
|
|
||||||
if (allelesAreSubset(otherVC,vc)) {
|
|
||||||
// otherVC has a type different than vc and its alleles are a subset of vc: remove otherVC from its list and add it to vc's type list
|
|
||||||
vcList.remove(k);
|
|
||||||
// avoid having empty lists
|
|
||||||
if (vcList.size() == 0)
|
|
||||||
mappedVCs.remove(type);
|
|
||||||
if ( !mappedVCs.containsKey(vc.getType()) )
|
|
||||||
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
|
|
||||||
mappedVCs.get(vc.getType()).add(otherVC);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
else if (allelesAreSubset(vc,otherVC)) {
|
|
||||||
// vc has a type different than otherVC and its alleles are a subset of VC: add vc to otherVC's type list and don't add to its own
|
|
||||||
mappedVCs.get(type).add(vc);
|
|
||||||
addtoOwnList = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (addtoOwnList) {
|
|
||||||
if ( !mappedVCs.containsKey(vc.getType()) )
|
|
||||||
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
|
|
||||||
mappedVCs.get(vc.getType()).add(vc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return mappedVCs;
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: remove that after testing
|
// TODO: remove that after testing
|
||||||
// static private void verifyUniqueSampleNames(Collection<VariantContext> unsortedVCs) {
|
// static private void verifyUniqueSampleNames(Collection<VariantContext> unsortedVCs) {
|
||||||
// Set<String> names = new HashSet<String>();
|
// Set<String> names = new HashSet<String>();
|
||||||
|
|
@ -431,23 +331,6 @@ public class VariantContextUtils {
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
|
||||||
public static VariantContext purgeUnallowedGenotypeAttributes(VariantContext vc, Set<String> allowedAttributes) {
|
|
||||||
if ( allowedAttributes == null )
|
|
||||||
return vc;
|
|
||||||
|
|
||||||
GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples());
|
|
||||||
for ( final Genotype genotype : vc.getGenotypes() ) {
|
|
||||||
Map<String, Object> attrs = new HashMap<String, Object>();
|
|
||||||
for ( Map.Entry<String, Object> attr : genotype.getExtendedAttributes().entrySet() ) {
|
|
||||||
if ( allowedAttributes.contains(attr.getKey()) )
|
|
||||||
attrs.put(attr.getKey(), attr.getValue());
|
|
||||||
}
|
|
||||||
newGenotypes.add(new GenotypeBuilder(genotype).attributes(attrs).make());
|
|
||||||
}
|
|
||||||
|
|
||||||
return new VariantContextBuilder(vc).genotypes(newGenotypes).make();
|
|
||||||
}
|
|
||||||
|
|
||||||
public static int getSize( VariantContext vc ) {
|
public static int getSize( VariantContext vc ) {
|
||||||
return vc.getEnd() - vc.getStart() + 1;
|
return vc.getEnd() - vc.getStart() + 1;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue