Improvements for Jason

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3756 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-07-09 20:18:37 +00:00
parent 405be230d0
commit c3c66e853c
2 changed files with 40 additions and 9 deletions

View File

@ -161,7 +161,7 @@ public class VariantContextUtils {
}
public enum GenotypeMergeType {
UNIQUIFY, PRIORITIZE, UNSORTED
UNIQUIFY, PRIORITIZE, UNSORTED, REQUIRE_UNIQUE
}
public enum VariantMergeType {
@ -191,7 +191,10 @@ public class VariantContextUtils {
return null;
if ( annotateOrigin && priorityListOfVCs == null )
throw new IllegalArgumentException("Cannot merge calls and annotate their origins with a complete priority list of VariantContexts");
throw new IllegalArgumentException("Cannot merge calls and annotate their origins without a complete priority list of VariantContexts");
if ( genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE )
verifyUniqueSampleNames(unsortedVCs);
List<VariantContext> VCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions);
@ -204,7 +207,7 @@ public class VariantContextUtils {
Map<String, Genotype> genotypes = new TreeMap<String, Genotype>();
double negLog10PError = -1;
Set<String> filters = new TreeSet<String>();
Map<String, Object> attributes = new TreeMap<String, Object>(first.getAttributes());
Map<String, Object> attributes = new TreeMap<String, Object>();
String rsID = null;
int depth = 0;
@ -239,6 +242,13 @@ public class VariantContextUtils {
depth += Integer.valueOf(vc.getAttributeAsString(VCFRecord.DEPTH_KEY));
if ( rsID == null && vc.hasAttribute("ID") )
rsID = vc.getAttributeAsString("ID");
for ( Map.Entry<String, Object> p : vc.getAttributes().entrySet() ) {
if ( ! attributes.containsKey(p.getKey()) || attributes.get(p.getKey()).equals(".") ) { // no value
//if ( vc != first ) System.out.printf("Adding key %s => %s%n", p.getKey(), p.getValue());
attributes.put(p.getKey(), p.getValue());
}
}
}
// if at least one record was unfiltered and we want a union, clear all of the filters
@ -292,6 +302,20 @@ public class VariantContextUtils {
}
}
static private void verifyUniqueSampleNames(Collection<VariantContext> unsortedVCs) {
Set<String> names = new HashSet<String>();
for ( VariantContext vc : unsortedVCs ) {
for ( String name : vc.getSampleNames() ) {
//System.out.printf("Checking %s %b%n", name, names.contains(name));
if ( names.contains(name) )
throw new StingException("REQUIRE_UNIQUE sample names is true but duplicate names were discovered " + name);
}
names.addAll(vc.getSampleNames());
}
}
static private Allele determineReferenceAllele(List<VariantContext> VCs) {
Allele ref = null;

View File

@ -55,7 +55,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
@Argument(shortName="variantMergeOptions", doc="How should we merge variant records across RODs? Union leaves the record if any record is unfiltered, Intersection requires all records to be unfiltered", required=false)
public VariantContextUtils.VariantMergeType variantMergeOption = VariantContextUtils.VariantMergeType.UNION;
@Argument(fullName="rod_priority_list", shortName="priority", doc="When taking the union of variants containing genotypes: a comma-separated string describing the priority ordering for the genotypes as far as which record gets emitted; a complete priority list MUST be provided", required=true)
@Argument(fullName="rod_priority_list", shortName="priority", doc="When taking the union of variants containing genotypes: a comma-separated string describing the priority ordering for the genotypes as far as which record gets emitted; a complete priority list MUST be provided", required=false)
public String PRIORITY_STRING = null;
@Argument(fullName="printComplexMerges", shortName="printComplexMerges", doc="Print out interesting sites requiring complex compatibility merging", required=false)
@ -66,12 +66,10 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
public void initialize() {
vcfWriter = new VCFWriter(out, true);
priority = new ArrayList<String>(Arrays.asList(PRIORITY_STRING.split(",")));
validateAnnotateUnionArguments();
// todo -- need to merge headers in an intelligent way
validateAnnotateUnionArguments(priority);
Map<String, VCFHeader> vcfRods = SampleUtils.getRodsWithVCFHeader(getToolkit(), null);
Set<String> samples = getSampleList(vcfRods, genotypeMergeOption);
@ -143,9 +141,18 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
}
private void validateAnnotateUnionArguments(List<String> priority) {
private void validateAnnotateUnionArguments() {
Set<String> rodNames = SampleUtils.getRodsNamesWithVCFHeader(getToolkit(), null);
if ( priority == null || rodNames.size() != priority.size() )
if ( genotypeMergeOption == VariantContextUtils.GenotypeMergeType.PRIORITIZE && PRIORITY_STRING == null )
throw new StingException("Priority string must be provided if you want to prioritize genotypes");
if ( genotypeMergeOption == VariantContextUtils.GenotypeMergeType.PRIORITIZE )
priority = new ArrayList<String>(Arrays.asList(PRIORITY_STRING.split(",")));
else
priority = new ArrayList<String>(rodNames);
if ( rodNames.size() != priority.size() )
throw new StingException("The priority list must contain exactly one rod binding per ROD provided to the GATK: rodNames=" + rodNames + " priority=" + priority);
if ( ! rodNames.containsAll(rodNames) )