Update to CombineVariants. Now splits merge options into variant and genotype options separately.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3746 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-07-08 20:09:48 +00:00
parent bd2ba3eb37
commit 760aaeda88
3 changed files with 30 additions and 34 deletions

View File

@ -160,12 +160,16 @@ public class VariantContextUtils {
return HardyWeinbergCalculation.hwCalculate(vc.getHomRefCount(), vc.getHetCount(), vc.getHomVarCount());
}
public enum MergeType {
UNION_VARIANTS, INTERSECT_VARIANTS, UNIQUIFY_GENOTYPES, PRIORITIZE_GENOTYPES, UNSORTED_GENOTYPES
public enum GenotypeMergeType {
UNIQUIFY, PRIORITIZE, UNSORTED
}
public enum VariantMergeType {
UNION, INTERSECT
}
public static VariantContext simpleMerge(Collection<VariantContext> unsortedVCs) {
return simpleMerge(unsortedVCs, null, EnumSet.of(MergeType.INTERSECT_VARIANTS, MergeType.UNSORTED_GENOTYPES), false, false);
return simpleMerge(unsortedVCs, null, VariantMergeType.INTERSECT, GenotypeMergeType.UNSORTED, false, false);
}
@ -176,17 +180,20 @@ public class VariantContextUtils {
*
* @param unsortedVCs
* @param priorityListOfVCs
* @param mergeOptions
* @param variantMergeOptions
* @param genotypeMergeOptions
* @return
*/
public static VariantContext simpleMerge(Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs, EnumSet<MergeType> mergeOptions, boolean annotateOrigin, boolean printMessages ) {
public static VariantContext simpleMerge(Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
VariantMergeType variantMergeOptions, GenotypeMergeType genotypeMergeOptions,
boolean annotateOrigin, boolean printMessages ) {
if ( unsortedVCs == null || unsortedVCs.size() == 0 )
return null;
if ( annotateOrigin && priorityListOfVCs == null )
throw new IllegalArgumentException("Cannot merge calls and annotate their origins with a complete priority list of VariantContexts");
List<VariantContext> VCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, mergeOptions);
List<VariantContext> VCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions);
// establish the baseline info from the first VC
VariantContext first = VCs.get(0);
@ -223,7 +230,7 @@ public class VariantContextUtils {
alleles.addAll(alleleMapping.values());
mergeGenotypes(genotypes, vc, alleleMapping, mergeOptions.contains(MergeType.UNIQUIFY_GENOTYPES));
mergeGenotypes(genotypes, vc, alleleMapping, genotypeMergeOptions == GenotypeMergeType.UNIQUIFY);
negLog10PError = Math.max(negLog10PError, vc.isVariant() ? vc.getNegLog10PError() : -1);
@ -235,7 +242,7 @@ public class VariantContextUtils {
}
// if at least one record was unfiltered and we want a union, clear all of the filters
if ( mergeOptions.contains(MergeType.UNION_VARIANTS) && nFiltered != VCs.size() )
if ( variantMergeOptions == VariantMergeType.UNION && nFiltered != VCs.size() )
filters.clear();
// we care about where the call came from
@ -362,11 +369,11 @@ public class VariantContextUtils {
}
}
public static List<VariantContext> sortVariantContextsByPriority(Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs, EnumSet<MergeType> mergeOptions ) {
if ( mergeOptions.contains(MergeType.PRIORITIZE_GENOTYPES) && priorityListOfVCs == null )
public static List<VariantContext> sortVariantContextsByPriority(Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs, GenotypeMergeType mergeOption ) {
if ( mergeOption == GenotypeMergeType.PRIORITIZE && priorityListOfVCs == null )
throw new IllegalArgumentException("Cannot merge calls by priority with a null priority list");
if ( priorityListOfVCs == null || mergeOptions.contains(MergeType.UNSORTED_GENOTYPES) )
if ( priorityListOfVCs == null || mergeOption == GenotypeMergeType.UNSORTED )
return new ArrayList<VariantContext>(unsortedVCs);
else {
ArrayList<VariantContext> sorted = new ArrayList<VariantContext>(unsortedVCs);

View File

@ -49,28 +49,22 @@ import java.util.*;
@Requires(value={})
public class CombineVariants extends RodWalker<Integer, Integer> {
// the types of combinations we currently allow
public enum ComboType { UNION, MERGE }
@Argument(fullName="combination_type", shortName="type", doc="combination type; MERGE are supported", required=true)
protected ComboType COMBO_TYPE;
@Argument(shortName="genotypeMergeOptions", doc="How should we merge genotype records for samples shared across the ROD files?", required=false)
public VariantContextUtils.GenotypeMergeType genotypeMergeOption = VariantContextUtils.GenotypeMergeType.PRIORITIZE;
@Argument(shortName="variantMergeOptions", doc="How should we merge variant records across RODs? Union leaves the record if any record is unfiltered, Intersection requires all records to be unfiltered", required=false)
public VariantContextUtils.VariantMergeType variantMergeOption = VariantContextUtils.VariantMergeType.UNION;
@Argument(fullName="rod_priority_list", shortName="priority", doc="When taking the union of variants containing genotypes: a comma-separated string describing the priority ordering for the genotypes as far as which record gets emitted; a complete priority list MUST be provided", required=true)
protected String PRIORITY_STRING = null;
public String PRIORITY_STRING = null;
@Argument(fullName="printComplexMerges", shortName="printComplexMerges", doc="Print out interesting sites requiring complex compatibility merging", required=false)
protected boolean printComplexMerges = false;
public boolean printComplexMerges = false;
private VCFWriter vcfWriter = null;
private List<String> priority = null;
protected EnumSet<VariantContextUtils.MergeType> mergeOptions;
protected final static EnumSet<VariantContextUtils.MergeType> mergeTypeOptions = EnumSet.of(VariantContextUtils.MergeType.UNION_VARIANTS, VariantContextUtils.MergeType.UNIQUIFY_GENOTYPES);
protected final static EnumSet<VariantContextUtils.MergeType> unionTypeOptions = EnumSet.of(VariantContextUtils.MergeType.UNION_VARIANTS, VariantContextUtils.MergeType.PRIORITIZE_GENOTYPES);
public void initialize() {
//Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
//hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
vcfWriter = new VCFWriter(out, true);
priority = new ArrayList<String>(Arrays.asList(PRIORITY_STRING.split(",")));
@ -78,9 +72,8 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
// todo -- need to merge headers in an intelligent way
validateAnnotateUnionArguments(priority);
mergeOptions = COMBO_TYPE == ComboType.MERGE ? mergeTypeOptions : unionTypeOptions;
Map<String, VCFHeader> vcfRods = SampleUtils.getRodsWithVCFHeader(getToolkit(), null);
Set<String> samples = getSampleList(vcfRods, mergeOptions);
Set<String> samples = getSampleList(vcfRods, genotypeMergeOption);
Set<VCFHeaderLine> headerLines = smartMergeHeaders(vcfRods.values());
headerLines.add(new VCFHeaderLine("source", "CombineVariants"));
@ -89,12 +82,12 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
}
// todo -- Eric, where's a better place to put this?
public static Set<String> getSampleList(Map<String, VCFHeader> headers, EnumSet<VariantContextUtils.MergeType> mergeOptions ) {
public static Set<String> getSampleList(Map<String, VCFHeader> headers, VariantContextUtils.GenotypeMergeType mergeOption ) {
Set<String> samples = new TreeSet<String>();
for ( Map.Entry<String, VCFHeader> val : headers.entrySet() ) {
VCFHeader header = val.getValue();
for ( String sample : header.getGenotypeSamples() ) {
samples.add(VariantContextUtils.mergedSampleName(val.getKey(), sample, mergeOptions.contains(VariantContextUtils.MergeType.UNIQUIFY_GENOTYPES)));
samples.add(VariantContextUtils.mergedSampleName(val.getKey(), sample, mergeOption == VariantContextUtils.GenotypeMergeType.UNIQUIFY));
}
}
@ -164,12 +157,9 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
// get all of the vcf rods at this locus
Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref, context.getLocation());
VariantContext mergedVC = VariantContextUtils.simpleMerge(vcs, priority, mergeOptions, true, printComplexMerges);
VariantContext mergedVC = VariantContextUtils.simpleMerge(vcs, priority, variantMergeOption, genotypeMergeOption, true, printComplexMerges);
if ( mergedVC != null ) // only operate at the start of events
//if ( ! mergedVC.isMixed() ) // todo remove restriction when VCF4 writer is fixed
vcfWriter.add(mergedVC, ref.getBases());
// else
// logger.info(String.format("Ignoring complex event: " + mergedVC));
return vcs.isEmpty() ? 0 : 1;
}

View File

@ -344,7 +344,6 @@ public class VCFWriter {
if ( allowedGenotypeAttributeKeys == null || allowedGenotypeAttributeKeys.contains(key) )
vcfGenotypeAttributeKeys.add(key);
}
}
String genotypeFormatString = Utils.join(GENOTYPE_FIELD_SEPARATOR, vcfGenotypeAttributeKeys);
@ -426,7 +425,7 @@ public class VCFWriter {
mGenotypeRecords.clear();
mGenotypeRecords.addAll(genotypeObjects);
// info fields
Map<String, String> infoFields = new HashMap<String, String>();
Map<String, String> infoFields = new TreeMap<String, String>();
for ( Map.Entry<String, Object> elt : vc.getAttributes().entrySet() ) {
String key = elt.getKey();
if ( key.equals("ID") )