From 760aaeda88f6ae477252c07c982be08dc3544929 Mon Sep 17 00:00:00 2001 From: depristo Date: Thu, 8 Jul 2010 20:09:48 +0000 Subject: [PATCH] Update to CombineVariants. Now splits merge options into variant and genotype options separately. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3746 348d0f76-0448-11de-a6fe-93d51630548a --- .../variantcontext/VariantContextUtils.java | 29 ++++++++++------- .../walkers/variantutils/CombineVariants.java | 32 +++++++------------ .../sting/utils/genotype/vcf/VCFWriter.java | 3 +- 3 files changed, 30 insertions(+), 34 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java index 6e04d2585..05182bade 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java @@ -160,12 +160,16 @@ public class VariantContextUtils { return HardyWeinbergCalculation.hwCalculate(vc.getHomRefCount(), vc.getHetCount(), vc.getHomVarCount()); } - public enum MergeType { - UNION_VARIANTS, INTERSECT_VARIANTS, UNIQUIFY_GENOTYPES, PRIORITIZE_GENOTYPES, UNSORTED_GENOTYPES + public enum GenotypeMergeType { + UNIQUIFY, PRIORITIZE, UNSORTED + } + + public enum VariantMergeType { + UNION, INTERSECT } public static VariantContext simpleMerge(Collection unsortedVCs) { - return simpleMerge(unsortedVCs, null, EnumSet.of(MergeType.INTERSECT_VARIANTS, MergeType.UNSORTED_GENOTYPES), false, false); + return simpleMerge(unsortedVCs, null, VariantMergeType.INTERSECT, GenotypeMergeType.UNSORTED, false, false); } @@ -176,17 +180,20 @@ public class VariantContextUtils { * * @param unsortedVCs * @param priorityListOfVCs - * @param mergeOptions + * @param variantMergeOptions + * @param genotypeMergeOptions * @return */ - public static VariantContext simpleMerge(Collection unsortedVCs, List priorityListOfVCs, EnumSet mergeOptions, boolean annotateOrigin, boolean printMessages ) { + public static VariantContext simpleMerge(Collection unsortedVCs, List priorityListOfVCs, + VariantMergeType variantMergeOptions, GenotypeMergeType genotypeMergeOptions, + boolean annotateOrigin, boolean printMessages ) { if ( unsortedVCs == null || unsortedVCs.size() == 0 ) return null; if ( annotateOrigin && priorityListOfVCs == null ) throw new IllegalArgumentException("Cannot merge calls and annotate their origins with a complete priority list of VariantContexts"); - List VCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, mergeOptions); + List VCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions); // establish the baseline info from the first VC VariantContext first = VCs.get(0); @@ -223,7 +230,7 @@ public class VariantContextUtils { alleles.addAll(alleleMapping.values()); - mergeGenotypes(genotypes, vc, alleleMapping, mergeOptions.contains(MergeType.UNIQUIFY_GENOTYPES)); + mergeGenotypes(genotypes, vc, alleleMapping, genotypeMergeOptions == GenotypeMergeType.UNIQUIFY); negLog10PError = Math.max(negLog10PError, vc.isVariant() ? vc.getNegLog10PError() : -1); @@ -235,7 +242,7 @@ public class VariantContextUtils { } // if at least one record was unfiltered and we want a union, clear all of the filters - if ( mergeOptions.contains(MergeType.UNION_VARIANTS) && nFiltered != VCs.size() ) + if ( variantMergeOptions == VariantMergeType.UNION && nFiltered != VCs.size() ) filters.clear(); // we care about where the call came from @@ -362,11 +369,11 @@ public class VariantContextUtils { } } - public static List sortVariantContextsByPriority(Collection unsortedVCs, List priorityListOfVCs, EnumSet mergeOptions ) { - if ( mergeOptions.contains(MergeType.PRIORITIZE_GENOTYPES) && priorityListOfVCs == null ) + public static List sortVariantContextsByPriority(Collection unsortedVCs, List priorityListOfVCs, GenotypeMergeType mergeOption ) { + if ( mergeOption == GenotypeMergeType.PRIORITIZE && priorityListOfVCs == null ) throw new IllegalArgumentException("Cannot merge calls by priority with a null priority list"); - if ( priorityListOfVCs == null || mergeOptions.contains(MergeType.UNSORTED_GENOTYPES) ) + if ( priorityListOfVCs == null || mergeOption == GenotypeMergeType.UNSORTED ) return new ArrayList(unsortedVCs); else { ArrayList sorted = new ArrayList(unsortedVCs); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 32f63b55c..fb208b380 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -49,28 +49,22 @@ import java.util.*; @Requires(value={}) public class CombineVariants extends RodWalker { // the types of combinations we currently allow - public enum ComboType { UNION, MERGE } - @Argument(fullName="combination_type", shortName="type", doc="combination type; MERGE are supported", required=true) - protected ComboType COMBO_TYPE; + @Argument(shortName="genotypeMergeOptions", doc="How should we merge genotype records for samples shared across the ROD files?", required=false) + public VariantContextUtils.GenotypeMergeType genotypeMergeOption = VariantContextUtils.GenotypeMergeType.PRIORITIZE; + + @Argument(shortName="variantMergeOptions", doc="How should we merge variant records across RODs? Union leaves the record if any record is unfiltered, Intersection requires all records to be unfiltered", required=false) + public VariantContextUtils.VariantMergeType variantMergeOption = VariantContextUtils.VariantMergeType.UNION; @Argument(fullName="rod_priority_list", shortName="priority", doc="When taking the union of variants containing genotypes: a comma-separated string describing the priority ordering for the genotypes as far as which record gets emitted; a complete priority list MUST be provided", required=true) - protected String PRIORITY_STRING = null; + public String PRIORITY_STRING = null; @Argument(fullName="printComplexMerges", shortName="printComplexMerges", doc="Print out interesting sites requiring complex compatibility merging", required=false) - protected boolean printComplexMerges = false; + public boolean printComplexMerges = false; private VCFWriter vcfWriter = null; private List priority = null; - protected EnumSet mergeOptions; - - protected final static EnumSet mergeTypeOptions = EnumSet.of(VariantContextUtils.MergeType.UNION_VARIANTS, VariantContextUtils.MergeType.UNIQUIFY_GENOTYPES); - protected final static EnumSet unionTypeOptions = EnumSet.of(VariantContextUtils.MergeType.UNION_VARIANTS, VariantContextUtils.MergeType.PRIORITIZE_GENOTYPES); public void initialize() { - - //Set hInfo = new HashSet(); - //hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); - vcfWriter = new VCFWriter(out, true); priority = new ArrayList(Arrays.asList(PRIORITY_STRING.split(","))); @@ -78,9 +72,8 @@ public class CombineVariants extends RodWalker { // todo -- need to merge headers in an intelligent way validateAnnotateUnionArguments(priority); - mergeOptions = COMBO_TYPE == ComboType.MERGE ? mergeTypeOptions : unionTypeOptions; Map vcfRods = SampleUtils.getRodsWithVCFHeader(getToolkit(), null); - Set samples = getSampleList(vcfRods, mergeOptions); + Set samples = getSampleList(vcfRods, genotypeMergeOption); Set headerLines = smartMergeHeaders(vcfRods.values()); headerLines.add(new VCFHeaderLine("source", "CombineVariants")); @@ -89,12 +82,12 @@ public class CombineVariants extends RodWalker { } // todo -- Eric, where's a better place to put this? - public static Set getSampleList(Map headers, EnumSet mergeOptions ) { + public static Set getSampleList(Map headers, VariantContextUtils.GenotypeMergeType mergeOption ) { Set samples = new TreeSet(); for ( Map.Entry val : headers.entrySet() ) { VCFHeader header = val.getValue(); for ( String sample : header.getGenotypeSamples() ) { - samples.add(VariantContextUtils.mergedSampleName(val.getKey(), sample, mergeOptions.contains(VariantContextUtils.MergeType.UNIQUIFY_GENOTYPES))); + samples.add(VariantContextUtils.mergedSampleName(val.getKey(), sample, mergeOption == VariantContextUtils.GenotypeMergeType.UNIQUIFY)); } } @@ -164,12 +157,9 @@ public class CombineVariants extends RodWalker { // get all of the vcf rods at this locus Collection vcs = tracker.getAllVariantContexts(ref, context.getLocation()); - VariantContext mergedVC = VariantContextUtils.simpleMerge(vcs, priority, mergeOptions, true, printComplexMerges); + VariantContext mergedVC = VariantContextUtils.simpleMerge(vcs, priority, variantMergeOption, genotypeMergeOption, true, printComplexMerges); if ( mergedVC != null ) // only operate at the start of events - //if ( ! mergedVC.isMixed() ) // todo remove restriction when VCF4 writer is fixed vcfWriter.add(mergedVC, ref.getBases()); -// else -// logger.info(String.format("Ignoring complex event: " + mergedVC)); return vcs.isEmpty() ? 0 : 1; } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java index f815608a4..751386ed2 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java @@ -344,7 +344,6 @@ public class VCFWriter { if ( allowedGenotypeAttributeKeys == null || allowedGenotypeAttributeKeys.contains(key) ) vcfGenotypeAttributeKeys.add(key); } - } String genotypeFormatString = Utils.join(GENOTYPE_FIELD_SEPARATOR, vcfGenotypeAttributeKeys); @@ -426,7 +425,7 @@ public class VCFWriter { mGenotypeRecords.clear(); mGenotypeRecords.addAll(genotypeObjects); // info fields - Map infoFields = new HashMap(); + Map infoFields = new TreeMap(); for ( Map.Entry elt : vc.getAttributes().entrySet() ) { String key = elt.getKey(); if ( key.equals("ID") )