Move additional VariantContext utility methods back to the GATK
Thanks to Eric for his feedback
This commit is contained in:
parent
ff8ba03249
commit
591df2be44
|
|
@ -320,7 +320,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
|
|||
VariantContext subvc = vc.subContextFromSamples(samplesToPhase);
|
||||
// logger.debug("original VC = " + vc);
|
||||
// logger.debug("sub VC = " + subvc);
|
||||
return VariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF);
|
||||
return GATKVariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF);
|
||||
}
|
||||
|
||||
private List<VariantContext> processQueue(PhasingStats phaseStats, boolean processAll) {
|
||||
|
|
|
|||
|
|
@ -280,7 +280,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
List<VariantContext> mergedVCs = new ArrayList<VariantContext>();
|
||||
|
||||
if (multipleAllelesMergeType == GATKVariantContextUtils.MultipleAllelesMergeType.BY_TYPE) {
|
||||
Map<VariantContext.Type, List<VariantContext>> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
|
||||
Map<VariantContext.Type, List<VariantContext>> VCsByType = GATKVariantContextUtils.separateVariantContextsByType(vcs);
|
||||
|
||||
// TODO -- clean this up in a refactoring
|
||||
// merge NO_VARIATION into another type of variant (based on the ordering in VariantContext.Type)
|
||||
|
|
@ -320,7 +320,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
// re-compute chromosome counts
|
||||
VariantContextUtils.calculateChromosomeCounts(builder, false);
|
||||
if ( minimalVCF )
|
||||
VariantContextUtils.pruneVariantContext(builder, Arrays.asList(SET_KEY));
|
||||
GATKVariantContextUtils.pruneVariantContext(builder, Arrays.asList(SET_KEY));
|
||||
vcfWriter.add(builder.make());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,11 +25,9 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Invariant;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broad.tribble.util.ParsingUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
|
||||
|
|
@ -277,8 +275,8 @@ public class ConcordanceMetrics {
|
|||
if ( truth.isMonomorphicInSamples() )
|
||||
return EVAL_ONLY;
|
||||
|
||||
boolean evalSubsetTruth = VariantContextUtils.allelesAreSubset(eval,truth);
|
||||
boolean truthSubsetEval = VariantContextUtils.allelesAreSubset(truth,eval);
|
||||
boolean evalSubsetTruth = GATKVariantContextUtils.allelesAreSubset(eval, truth);
|
||||
boolean truthSubsetEval = GATKVariantContextUtils.allelesAreSubset(truth, eval);
|
||||
|
||||
if ( evalSubsetTruth && truthSubsetEval )
|
||||
return ALLELES_MATCH;
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.hapmap.RawHapMapFeature;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
|
@ -246,7 +247,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
|
|||
vcfwriter.writeHeader(new VCFHeader(hInfo, samples));
|
||||
}
|
||||
|
||||
vc = VariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings);
|
||||
vc = GATKVariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings);
|
||||
vcfwriter.add(vc);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@ public class GATKVariantContextUtils {
|
|||
public final static String MERGE_FILTER_IN_ALL = "FilteredInAll";
|
||||
public final static String MERGE_INTERSECTION = "Intersection";
|
||||
|
||||
|
||||
public enum GenotypeMergeType {
|
||||
/**
|
||||
* Make all sample genotypes unique by file. Each sample shared across RODs gets named sample.ROD.
|
||||
|
|
@ -1013,6 +1014,124 @@ public class GATKVariantContextUtils {
|
|||
}
|
||||
}
|
||||
|
||||
private final static Map<String, Object> subsetAttributes(final CommonInfo igc, final Collection<String> keysToPreserve) {
|
||||
Map<String, Object> attributes = new HashMap<String, Object>(keysToPreserve.size());
|
||||
for ( final String key : keysToPreserve ) {
|
||||
if ( igc.hasAttribute(key) )
|
||||
attributes.put(key, igc.getAttribute(key));
|
||||
}
|
||||
return attributes;
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated use variant context builder version instead
|
||||
* @param vc the variant context
|
||||
* @param keysToPreserve the keys to preserve
|
||||
* @return a pruned version of the original variant context
|
||||
*/
|
||||
@Deprecated
|
||||
public static VariantContext pruneVariantContext(final VariantContext vc, Collection<String> keysToPreserve ) {
|
||||
return pruneVariantContext(new VariantContextBuilder(vc), keysToPreserve).make();
|
||||
}
|
||||
|
||||
public static VariantContextBuilder pruneVariantContext(final VariantContextBuilder builder, Collection<String> keysToPreserve ) {
|
||||
final VariantContext vc = builder.make();
|
||||
if ( keysToPreserve == null ) keysToPreserve = Collections.emptyList();
|
||||
|
||||
// VC info
|
||||
final Map<String, Object> attributes = subsetAttributes(vc.getCommonInfo(), keysToPreserve);
|
||||
|
||||
// Genotypes
|
||||
final GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples());
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(g);
|
||||
// remove AD, DP, PL, and all extended attributes, keeping just GT and GQ
|
||||
gb.noAD().noDP().noPL().noAttributes();
|
||||
genotypes.add(gb.make());
|
||||
}
|
||||
|
||||
return builder.genotypes(genotypes).attributes(attributes);
|
||||
}
|
||||
|
||||
public static boolean allelesAreSubset(VariantContext vc1, VariantContext vc2) {
|
||||
// if all alleles of vc1 are a contained in alleles of vc2, return true
|
||||
if (!vc1.getReference().equals(vc2.getReference()))
|
||||
return false;
|
||||
|
||||
for (Allele a :vc1.getAlternateAlleles()) {
|
||||
if (!vc2.getAlternateAlleles().contains(a))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public static Map<VariantContext.Type, List<VariantContext>> separateVariantContextsByType(Collection<VariantContext> VCs) {
|
||||
HashMap<VariantContext.Type, List<VariantContext>> mappedVCs = new HashMap<VariantContext.Type, List<VariantContext>>();
|
||||
for ( VariantContext vc : VCs ) {
|
||||
|
||||
// look at previous variant contexts of different type. If:
|
||||
// a) otherVC has alleles which are subset of vc, remove otherVC from its list and add otherVC to vc's list
|
||||
// b) vc has alleles which are subset of otherVC. Then, add vc to otherVC's type list (rather, do nothing since vc will be added automatically to its list)
|
||||
// c) neither: do nothing, just add vc to its own list
|
||||
boolean addtoOwnList = true;
|
||||
for (VariantContext.Type type : VariantContext.Type.values()) {
|
||||
if (type.equals(vc.getType()))
|
||||
continue;
|
||||
|
||||
if (!mappedVCs.containsKey(type))
|
||||
continue;
|
||||
|
||||
List<VariantContext> vcList = mappedVCs.get(type);
|
||||
for (int k=0; k < vcList.size(); k++) {
|
||||
VariantContext otherVC = vcList.get(k);
|
||||
if (allelesAreSubset(otherVC,vc)) {
|
||||
// otherVC has a type different than vc and its alleles are a subset of vc: remove otherVC from its list and add it to vc's type list
|
||||
vcList.remove(k);
|
||||
// avoid having empty lists
|
||||
if (vcList.size() == 0)
|
||||
mappedVCs.remove(type);
|
||||
if ( !mappedVCs.containsKey(vc.getType()) )
|
||||
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
|
||||
mappedVCs.get(vc.getType()).add(otherVC);
|
||||
break;
|
||||
}
|
||||
else if (allelesAreSubset(vc,otherVC)) {
|
||||
// vc has a type different than otherVC and its alleles are a subset of VC: add vc to otherVC's type list and don't add to its own
|
||||
mappedVCs.get(type).add(vc);
|
||||
addtoOwnList = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (addtoOwnList) {
|
||||
if ( !mappedVCs.containsKey(vc.getType()) )
|
||||
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
|
||||
mappedVCs.get(vc.getType()).add(vc);
|
||||
}
|
||||
}
|
||||
|
||||
return mappedVCs;
|
||||
}
|
||||
|
||||
public static VariantContext purgeUnallowedGenotypeAttributes(VariantContext vc, Set<String> allowedAttributes) {
|
||||
if ( allowedAttributes == null )
|
||||
return vc;
|
||||
|
||||
GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples());
|
||||
for ( final Genotype genotype : vc.getGenotypes() ) {
|
||||
Map<String, Object> attrs = new HashMap<String, Object>();
|
||||
for ( Map.Entry<String, Object> attr : genotype.getExtendedAttributes().entrySet() ) {
|
||||
if ( allowedAttributes.contains(attr.getKey()) )
|
||||
attrs.put(attr.getKey(), attr.getValue());
|
||||
}
|
||||
newGenotypes.add(new GenotypeBuilder(genotype).attributes(attrs).make());
|
||||
}
|
||||
|
||||
return new VariantContextBuilder(vc).genotypes(newGenotypes).make();
|
||||
}
|
||||
|
||||
|
||||
private static class AlleleMapper {
|
||||
private VariantContext vc = null;
|
||||
private Map<Allele, Allele> map = null;
|
||||
|
|
|
|||
|
|
@ -625,6 +625,10 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
public double getAttributeAsDouble(String key, double defaultValue) { return commonInfo.getAttributeAsDouble(key, defaultValue); }
|
||||
public boolean getAttributeAsBoolean(String key, boolean defaultValue) { return commonInfo.getAttributeAsBoolean(key, defaultValue); }
|
||||
|
||||
public CommonInfo getCommonInfo() {
|
||||
return commonInfo;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Working with alleles
|
||||
|
|
|
|||
|
|
@ -316,106 +316,6 @@ public class VariantContextUtils {
|
|||
return r;
|
||||
}
|
||||
|
||||
private final static Map<String, Object> subsetAttributes(final CommonInfo igc, final Collection<String> keysToPreserve) {
|
||||
Map<String, Object> attributes = new HashMap<String, Object>(keysToPreserve.size());
|
||||
for ( final String key : keysToPreserve ) {
|
||||
if ( igc.hasAttribute(key) )
|
||||
attributes.put(key, igc.getAttribute(key));
|
||||
}
|
||||
return attributes;
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated use variant context builder version instead
|
||||
* @param vc the variant context
|
||||
* @param keysToPreserve the keys to preserve
|
||||
* @return a pruned version of the original variant context
|
||||
*/
|
||||
@Deprecated
|
||||
public static VariantContext pruneVariantContext(final VariantContext vc, Collection<String> keysToPreserve ) {
|
||||
return pruneVariantContext(new VariantContextBuilder(vc), keysToPreserve).make();
|
||||
}
|
||||
|
||||
public static VariantContextBuilder pruneVariantContext(final VariantContextBuilder builder, Collection<String> keysToPreserve ) {
|
||||
final VariantContext vc = builder.make();
|
||||
if ( keysToPreserve == null ) keysToPreserve = Collections.emptyList();
|
||||
|
||||
// VC info
|
||||
final Map<String, Object> attributes = subsetAttributes(vc.commonInfo, keysToPreserve);
|
||||
|
||||
// Genotypes
|
||||
final GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples());
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(g);
|
||||
// remove AD, DP, PL, and all extended attributes, keeping just GT and GQ
|
||||
gb.noAD().noDP().noPL().noAttributes();
|
||||
genotypes.add(gb.make());
|
||||
}
|
||||
|
||||
return builder.genotypes(genotypes).attributes(attributes);
|
||||
}
|
||||
|
||||
public static boolean allelesAreSubset(VariantContext vc1, VariantContext vc2) {
|
||||
// if all alleles of vc1 are a contained in alleles of vc2, return true
|
||||
if (!vc1.getReference().equals(vc2.getReference()))
|
||||
return false;
|
||||
|
||||
for (Allele a :vc1.getAlternateAlleles()) {
|
||||
if (!vc2.getAlternateAlleles().contains(a))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public static Map<VariantContext.Type, List<VariantContext>> separateVariantContextsByType(Collection<VariantContext> VCs) {
|
||||
HashMap<VariantContext.Type, List<VariantContext>> mappedVCs = new HashMap<VariantContext.Type, List<VariantContext>>();
|
||||
for ( VariantContext vc : VCs ) {
|
||||
|
||||
// look at previous variant contexts of different type. If:
|
||||
// a) otherVC has alleles which are subset of vc, remove otherVC from its list and add otherVC to vc's list
|
||||
// b) vc has alleles which are subset of otherVC. Then, add vc to otherVC's type list (rather, do nothing since vc will be added automatically to its list)
|
||||
// c) neither: do nothing, just add vc to its own list
|
||||
boolean addtoOwnList = true;
|
||||
for (VariantContext.Type type : VariantContext.Type.values()) {
|
||||
if (type.equals(vc.getType()))
|
||||
continue;
|
||||
|
||||
if (!mappedVCs.containsKey(type))
|
||||
continue;
|
||||
|
||||
List<VariantContext> vcList = mappedVCs.get(type);
|
||||
for (int k=0; k < vcList.size(); k++) {
|
||||
VariantContext otherVC = vcList.get(k);
|
||||
if (allelesAreSubset(otherVC,vc)) {
|
||||
// otherVC has a type different than vc and its alleles are a subset of vc: remove otherVC from its list and add it to vc's type list
|
||||
vcList.remove(k);
|
||||
// avoid having empty lists
|
||||
if (vcList.size() == 0)
|
||||
mappedVCs.remove(type);
|
||||
if ( !mappedVCs.containsKey(vc.getType()) )
|
||||
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
|
||||
mappedVCs.get(vc.getType()).add(otherVC);
|
||||
break;
|
||||
}
|
||||
else if (allelesAreSubset(vc,otherVC)) {
|
||||
// vc has a type different than otherVC and its alleles are a subset of VC: add vc to otherVC's type list and don't add to its own
|
||||
mappedVCs.get(type).add(vc);
|
||||
addtoOwnList = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (addtoOwnList) {
|
||||
if ( !mappedVCs.containsKey(vc.getType()) )
|
||||
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
|
||||
mappedVCs.get(vc.getType()).add(vc);
|
||||
}
|
||||
}
|
||||
|
||||
return mappedVCs;
|
||||
}
|
||||
|
||||
// TODO: remove that after testing
|
||||
// static private void verifyUniqueSampleNames(Collection<VariantContext> unsortedVCs) {
|
||||
// Set<String> names = new HashSet<String>();
|
||||
|
|
@ -431,23 +331,6 @@ public class VariantContextUtils {
|
|||
// }
|
||||
|
||||
|
||||
public static VariantContext purgeUnallowedGenotypeAttributes(VariantContext vc, Set<String> allowedAttributes) {
|
||||
if ( allowedAttributes == null )
|
||||
return vc;
|
||||
|
||||
GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples());
|
||||
for ( final Genotype genotype : vc.getGenotypes() ) {
|
||||
Map<String, Object> attrs = new HashMap<String, Object>();
|
||||
for ( Map.Entry<String, Object> attr : genotype.getExtendedAttributes().entrySet() ) {
|
||||
if ( allowedAttributes.contains(attr.getKey()) )
|
||||
attrs.put(attr.getKey(), attr.getValue());
|
||||
}
|
||||
newGenotypes.add(new GenotypeBuilder(genotype).attributes(attrs).make());
|
||||
}
|
||||
|
||||
return new VariantContextBuilder(vc).genotypes(newGenotypes).make();
|
||||
}
|
||||
|
||||
public static int getSize( VariantContext vc ) {
|
||||
return vc.getEnd() - vc.getStart() + 1;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue