Move additional VariantContext utility methods back to the GATK

Thanks to Eric for his feedback
This commit is contained in:
David Roazen 2013-01-30 13:46:59 -05:00
parent ff8ba03249
commit 591df2be44
7 changed files with 131 additions and 126 deletions

View File

@ -320,7 +320,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
VariantContext subvc = vc.subContextFromSamples(samplesToPhase);
// logger.debug("original VC = " + vc);
// logger.debug("sub VC = " + subvc);
return VariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF);
return GATKVariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF);
}
private List<VariantContext> processQueue(PhasingStats phaseStats, boolean processAll) {

View File

@ -280,7 +280,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
List<VariantContext> mergedVCs = new ArrayList<VariantContext>();
if (multipleAllelesMergeType == GATKVariantContextUtils.MultipleAllelesMergeType.BY_TYPE) {
Map<VariantContext.Type, List<VariantContext>> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
Map<VariantContext.Type, List<VariantContext>> VCsByType = GATKVariantContextUtils.separateVariantContextsByType(vcs);
// TODO -- clean this up in a refactoring
// merge NO_VARIATION into another type of variant (based on the ordering in VariantContext.Type)
@ -320,7 +320,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
// re-compute chromosome counts
VariantContextUtils.calculateChromosomeCounts(builder, false);
if ( minimalVCF )
VariantContextUtils.pruneVariantContext(builder, Arrays.asList(SET_KEY));
GATKVariantContextUtils.pruneVariantContext(builder, Arrays.asList(SET_KEY));
vcfWriter.add(builder.make());
}

View File

@ -25,11 +25,9 @@
package org.broadinstitute.sting.gatk.walkers.variantutils;
import com.google.java.contract.Ensures;
import com.google.java.contract.Invariant;
import com.google.java.contract.Requires;
import org.broad.tribble.util.ParsingUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.*;
import org.broadinstitute.variant.vcf.VCFHeader;
@ -277,8 +275,8 @@ public class ConcordanceMetrics {
if ( truth.isMonomorphicInSamples() )
return EVAL_ONLY;
boolean evalSubsetTruth = VariantContextUtils.allelesAreSubset(eval,truth);
boolean truthSubsetEval = VariantContextUtils.allelesAreSubset(truth,eval);
boolean evalSubsetTruth = GATKVariantContextUtils.allelesAreSubset(eval, truth);
boolean truthSubsetEval = GATKVariantContextUtils.allelesAreSubset(truth, eval);
if ( evalSubsetTruth && truthSubsetEval )
return ALLELES_MATCH;

View File

@ -44,6 +44,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.hapmap.RawHapMapFeature;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
@ -246,7 +247,7 @@ public class VariantsToVCF extends RodWalker<Integer, Integer> {
vcfwriter.writeHeader(new VCFHeader(hInfo, samples));
}
vc = VariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings);
vc = GATKVariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings);
vcfwriter.add(vc);
}

View File

@ -50,6 +50,7 @@ public class GATKVariantContextUtils {
public final static String MERGE_FILTER_IN_ALL = "FilteredInAll";
public final static String MERGE_INTERSECTION = "Intersection";
public enum GenotypeMergeType {
/**
* Make all sample genotypes unique by file. Each sample shared across RODs gets named sample.ROD.
@ -1013,6 +1014,124 @@ public class GATKVariantContextUtils {
}
}
private final static Map<String, Object> subsetAttributes(final CommonInfo igc, final Collection<String> keysToPreserve) {
Map<String, Object> attributes = new HashMap<String, Object>(keysToPreserve.size());
for ( final String key : keysToPreserve ) {
if ( igc.hasAttribute(key) )
attributes.put(key, igc.getAttribute(key));
}
return attributes;
}
/**
* @deprecated use variant context builder version instead
* @param vc the variant context
* @param keysToPreserve the keys to preserve
* @return a pruned version of the original variant context
*/
@Deprecated
public static VariantContext pruneVariantContext(final VariantContext vc, Collection<String> keysToPreserve ) {
return pruneVariantContext(new VariantContextBuilder(vc), keysToPreserve).make();
}
public static VariantContextBuilder pruneVariantContext(final VariantContextBuilder builder, Collection<String> keysToPreserve ) {
final VariantContext vc = builder.make();
if ( keysToPreserve == null ) keysToPreserve = Collections.emptyList();
// VC info
final Map<String, Object> attributes = subsetAttributes(vc.getCommonInfo(), keysToPreserve);
// Genotypes
final GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples());
for ( final Genotype g : vc.getGenotypes() ) {
final GenotypeBuilder gb = new GenotypeBuilder(g);
// remove AD, DP, PL, and all extended attributes, keeping just GT and GQ
gb.noAD().noDP().noPL().noAttributes();
genotypes.add(gb.make());
}
return builder.genotypes(genotypes).attributes(attributes);
}
public static boolean allelesAreSubset(VariantContext vc1, VariantContext vc2) {
// if all alleles of vc1 are a contained in alleles of vc2, return true
if (!vc1.getReference().equals(vc2.getReference()))
return false;
for (Allele a :vc1.getAlternateAlleles()) {
if (!vc2.getAlternateAlleles().contains(a))
return false;
}
return true;
}
public static Map<VariantContext.Type, List<VariantContext>> separateVariantContextsByType(Collection<VariantContext> VCs) {
HashMap<VariantContext.Type, List<VariantContext>> mappedVCs = new HashMap<VariantContext.Type, List<VariantContext>>();
for ( VariantContext vc : VCs ) {
// look at previous variant contexts of different type. If:
// a) otherVC has alleles which are subset of vc, remove otherVC from its list and add otherVC to vc's list
// b) vc has alleles which are subset of otherVC. Then, add vc to otherVC's type list (rather, do nothing since vc will be added automatically to its list)
// c) neither: do nothing, just add vc to its own list
boolean addtoOwnList = true;
for (VariantContext.Type type : VariantContext.Type.values()) {
if (type.equals(vc.getType()))
continue;
if (!mappedVCs.containsKey(type))
continue;
List<VariantContext> vcList = mappedVCs.get(type);
for (int k=0; k < vcList.size(); k++) {
VariantContext otherVC = vcList.get(k);
if (allelesAreSubset(otherVC,vc)) {
// otherVC has a type different than vc and its alleles are a subset of vc: remove otherVC from its list and add it to vc's type list
vcList.remove(k);
// avoid having empty lists
if (vcList.size() == 0)
mappedVCs.remove(type);
if ( !mappedVCs.containsKey(vc.getType()) )
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
mappedVCs.get(vc.getType()).add(otherVC);
break;
}
else if (allelesAreSubset(vc,otherVC)) {
// vc has a type different than otherVC and its alleles are a subset of VC: add vc to otherVC's type list and don't add to its own
mappedVCs.get(type).add(vc);
addtoOwnList = false;
break;
}
}
}
if (addtoOwnList) {
if ( !mappedVCs.containsKey(vc.getType()) )
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
mappedVCs.get(vc.getType()).add(vc);
}
}
return mappedVCs;
}
public static VariantContext purgeUnallowedGenotypeAttributes(VariantContext vc, Set<String> allowedAttributes) {
if ( allowedAttributes == null )
return vc;
GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples());
for ( final Genotype genotype : vc.getGenotypes() ) {
Map<String, Object> attrs = new HashMap<String, Object>();
for ( Map.Entry<String, Object> attr : genotype.getExtendedAttributes().entrySet() ) {
if ( allowedAttributes.contains(attr.getKey()) )
attrs.put(attr.getKey(), attr.getValue());
}
newGenotypes.add(new GenotypeBuilder(genotype).attributes(attrs).make());
}
return new VariantContextBuilder(vc).genotypes(newGenotypes).make();
}
private static class AlleleMapper {
private VariantContext vc = null;
private Map<Allele, Allele> map = null;

View File

@ -625,6 +625,10 @@ public class VariantContext implements Feature { // to enable tribble integratio
public double getAttributeAsDouble(String key, double defaultValue) { return commonInfo.getAttributeAsDouble(key, defaultValue); }
public boolean getAttributeAsBoolean(String key, boolean defaultValue) { return commonInfo.getAttributeAsBoolean(key, defaultValue); }
public CommonInfo getCommonInfo() {
return commonInfo;
}
// ---------------------------------------------------------------------------------------------------------
//
// Working with alleles

View File

@ -316,106 +316,6 @@ public class VariantContextUtils {
return r;
}
private final static Map<String, Object> subsetAttributes(final CommonInfo igc, final Collection<String> keysToPreserve) {
Map<String, Object> attributes = new HashMap<String, Object>(keysToPreserve.size());
for ( final String key : keysToPreserve ) {
if ( igc.hasAttribute(key) )
attributes.put(key, igc.getAttribute(key));
}
return attributes;
}
/**
* @deprecated use variant context builder version instead
* @param vc the variant context
* @param keysToPreserve the keys to preserve
* @return a pruned version of the original variant context
*/
@Deprecated
public static VariantContext pruneVariantContext(final VariantContext vc, Collection<String> keysToPreserve ) {
return pruneVariantContext(new VariantContextBuilder(vc), keysToPreserve).make();
}
public static VariantContextBuilder pruneVariantContext(final VariantContextBuilder builder, Collection<String> keysToPreserve ) {
final VariantContext vc = builder.make();
if ( keysToPreserve == null ) keysToPreserve = Collections.emptyList();
// VC info
final Map<String, Object> attributes = subsetAttributes(vc.commonInfo, keysToPreserve);
// Genotypes
final GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples());
for ( final Genotype g : vc.getGenotypes() ) {
final GenotypeBuilder gb = new GenotypeBuilder(g);
// remove AD, DP, PL, and all extended attributes, keeping just GT and GQ
gb.noAD().noDP().noPL().noAttributes();
genotypes.add(gb.make());
}
return builder.genotypes(genotypes).attributes(attributes);
}
public static boolean allelesAreSubset(VariantContext vc1, VariantContext vc2) {
// if all alleles of vc1 are a contained in alleles of vc2, return true
if (!vc1.getReference().equals(vc2.getReference()))
return false;
for (Allele a :vc1.getAlternateAlleles()) {
if (!vc2.getAlternateAlleles().contains(a))
return false;
}
return true;
}
public static Map<VariantContext.Type, List<VariantContext>> separateVariantContextsByType(Collection<VariantContext> VCs) {
HashMap<VariantContext.Type, List<VariantContext>> mappedVCs = new HashMap<VariantContext.Type, List<VariantContext>>();
for ( VariantContext vc : VCs ) {
// look at previous variant contexts of different type. If:
// a) otherVC has alleles which are subset of vc, remove otherVC from its list and add otherVC to vc's list
// b) vc has alleles which are subset of otherVC. Then, add vc to otherVC's type list (rather, do nothing since vc will be added automatically to its list)
// c) neither: do nothing, just add vc to its own list
boolean addtoOwnList = true;
for (VariantContext.Type type : VariantContext.Type.values()) {
if (type.equals(vc.getType()))
continue;
if (!mappedVCs.containsKey(type))
continue;
List<VariantContext> vcList = mappedVCs.get(type);
for (int k=0; k < vcList.size(); k++) {
VariantContext otherVC = vcList.get(k);
if (allelesAreSubset(otherVC,vc)) {
// otherVC has a type different than vc and its alleles are a subset of vc: remove otherVC from its list and add it to vc's type list
vcList.remove(k);
// avoid having empty lists
if (vcList.size() == 0)
mappedVCs.remove(type);
if ( !mappedVCs.containsKey(vc.getType()) )
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
mappedVCs.get(vc.getType()).add(otherVC);
break;
}
else if (allelesAreSubset(vc,otherVC)) {
// vc has a type different than otherVC and its alleles are a subset of VC: add vc to otherVC's type list and don't add to its own
mappedVCs.get(type).add(vc);
addtoOwnList = false;
break;
}
}
}
if (addtoOwnList) {
if ( !mappedVCs.containsKey(vc.getType()) )
mappedVCs.put(vc.getType(), new ArrayList<VariantContext>());
mappedVCs.get(vc.getType()).add(vc);
}
}
return mappedVCs;
}
// TODO: remove that after testing
// static private void verifyUniqueSampleNames(Collection<VariantContext> unsortedVCs) {
// Set<String> names = new HashSet<String>();
@ -431,23 +331,6 @@ public class VariantContextUtils {
// }
public static VariantContext purgeUnallowedGenotypeAttributes(VariantContext vc, Set<String> allowedAttributes) {
if ( allowedAttributes == null )
return vc;
GenotypesContext newGenotypes = GenotypesContext.create(vc.getNSamples());
for ( final Genotype genotype : vc.getGenotypes() ) {
Map<String, Object> attrs = new HashMap<String, Object>();
for ( Map.Entry<String, Object> attr : genotype.getExtendedAttributes().entrySet() ) {
if ( allowedAttributes.contains(attr.getKey()) )
attrs.put(attr.getKey(), attr.getValue());
}
newGenotypes.add(new GenotypeBuilder(genotype).attributes(attrs).make());
}
return new VariantContextBuilder(vc).genotypes(newGenotypes).make();
}
public static int getSize( VariantContext vc ) {
return vc.getEnd() - vc.getStart() + 1;
}