From c3c66e853cc3a221f2b851e35505960f679ad1b8 Mon Sep 17 00:00:00 2001 From: depristo Date: Fri, 9 Jul 2010 20:18:37 +0000 Subject: [PATCH] Improvements for Jason git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3756 348d0f76-0448-11de-a6fe-93d51630548a --- .../variantcontext/VariantContextUtils.java | 30 +++++++++++++++++-- .../walkers/variantutils/CombineVariants.java | 19 ++++++++---- 2 files changed, 40 insertions(+), 9 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java index 05182bade..c458e093b 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java @@ -161,7 +161,7 @@ public class VariantContextUtils { } public enum GenotypeMergeType { - UNIQUIFY, PRIORITIZE, UNSORTED + UNIQUIFY, PRIORITIZE, UNSORTED, REQUIRE_UNIQUE } public enum VariantMergeType { @@ -191,7 +191,10 @@ public class VariantContextUtils { return null; if ( annotateOrigin && priorityListOfVCs == null ) - throw new IllegalArgumentException("Cannot merge calls and annotate their origins with a complete priority list of VariantContexts"); + throw new IllegalArgumentException("Cannot merge calls and annotate their origins without a complete priority list of VariantContexts"); + + if ( genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE ) + verifyUniqueSampleNames(unsortedVCs); List VCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions); @@ -204,7 +207,7 @@ public class VariantContextUtils { Map genotypes = new TreeMap(); double negLog10PError = -1; Set filters = new TreeSet(); - Map attributes = new TreeMap(first.getAttributes()); + Map attributes = new TreeMap(); String rsID = null; int depth = 0; @@ -239,6 +242,13 @@ public class VariantContextUtils { depth += Integer.valueOf(vc.getAttributeAsString(VCFRecord.DEPTH_KEY)); if ( rsID == null && vc.hasAttribute("ID") ) rsID = vc.getAttributeAsString("ID"); + + for ( Map.Entry p : vc.getAttributes().entrySet() ) { + if ( ! attributes.containsKey(p.getKey()) || attributes.get(p.getKey()).equals(".") ) { // no value + //if ( vc != first ) System.out.printf("Adding key %s => %s%n", p.getKey(), p.getValue()); + attributes.put(p.getKey(), p.getValue()); + } + } } // if at least one record was unfiltered and we want a union, clear all of the filters @@ -292,6 +302,20 @@ public class VariantContextUtils { } } + static private void verifyUniqueSampleNames(Collection unsortedVCs) { + Set names = new HashSet(); + for ( VariantContext vc : unsortedVCs ) { + for ( String name : vc.getSampleNames() ) { + //System.out.printf("Checking %s %b%n", name, names.contains(name)); + if ( names.contains(name) ) + throw new StingException("REQUIRE_UNIQUE sample names is true but duplicate names were discovered " + name); + } + + names.addAll(vc.getSampleNames()); + } + } + + static private Allele determineReferenceAllele(List VCs) { Allele ref = null; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 5252e8181..897c0f2fb 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -55,7 +55,7 @@ public class CombineVariants extends RodWalker { @Argument(shortName="variantMergeOptions", doc="How should we merge variant records across RODs? Union leaves the record if any record is unfiltered, Intersection requires all records to be unfiltered", required=false) public VariantContextUtils.VariantMergeType variantMergeOption = VariantContextUtils.VariantMergeType.UNION; - @Argument(fullName="rod_priority_list", shortName="priority", doc="When taking the union of variants containing genotypes: a comma-separated string describing the priority ordering for the genotypes as far as which record gets emitted; a complete priority list MUST be provided", required=true) + @Argument(fullName="rod_priority_list", shortName="priority", doc="When taking the union of variants containing genotypes: a comma-separated string describing the priority ordering for the genotypes as far as which record gets emitted; a complete priority list MUST be provided", required=false) public String PRIORITY_STRING = null; @Argument(fullName="printComplexMerges", shortName="printComplexMerges", doc="Print out interesting sites requiring complex compatibility merging", required=false) @@ -66,12 +66,10 @@ public class CombineVariants extends RodWalker { public void initialize() { vcfWriter = new VCFWriter(out, true); - priority = new ArrayList(Arrays.asList(PRIORITY_STRING.split(","))); - + validateAnnotateUnionArguments(); // todo -- need to merge headers in an intelligent way - validateAnnotateUnionArguments(priority); Map vcfRods = SampleUtils.getRodsWithVCFHeader(getToolkit(), null); Set samples = getSampleList(vcfRods, genotypeMergeOption); @@ -143,9 +141,18 @@ public class CombineVariants extends RodWalker { } - private void validateAnnotateUnionArguments(List priority) { + private void validateAnnotateUnionArguments() { Set rodNames = SampleUtils.getRodsNamesWithVCFHeader(getToolkit(), null); - if ( priority == null || rodNames.size() != priority.size() ) + + if ( genotypeMergeOption == VariantContextUtils.GenotypeMergeType.PRIORITIZE && PRIORITY_STRING == null ) + throw new StingException("Priority string must be provided if you want to prioritize genotypes"); + + if ( genotypeMergeOption == VariantContextUtils.GenotypeMergeType.PRIORITIZE ) + priority = new ArrayList(Arrays.asList(PRIORITY_STRING.split(","))); + else + priority = new ArrayList(rodNames); + + if ( rodNames.size() != priority.size() ) throw new StingException("The priority list must contain exactly one rod binding per ROD provided to the GATK: rodNames=" + rodNames + " priority=" + priority); if ( ! rodNames.containsAll(rodNames) )