Update to CombineVariants. Now splits merge options into variant and genotype options separately.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3746 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
bd2ba3eb37
commit
760aaeda88
|
|
@ -160,12 +160,16 @@ public class VariantContextUtils {
|
|||
return HardyWeinbergCalculation.hwCalculate(vc.getHomRefCount(), vc.getHetCount(), vc.getHomVarCount());
|
||||
}
|
||||
|
||||
public enum MergeType {
|
||||
UNION_VARIANTS, INTERSECT_VARIANTS, UNIQUIFY_GENOTYPES, PRIORITIZE_GENOTYPES, UNSORTED_GENOTYPES
|
||||
public enum GenotypeMergeType {
|
||||
UNIQUIFY, PRIORITIZE, UNSORTED
|
||||
}
|
||||
|
||||
public enum VariantMergeType {
|
||||
UNION, INTERSECT
|
||||
}
|
||||
|
||||
public static VariantContext simpleMerge(Collection<VariantContext> unsortedVCs) {
|
||||
return simpleMerge(unsortedVCs, null, EnumSet.of(MergeType.INTERSECT_VARIANTS, MergeType.UNSORTED_GENOTYPES), false, false);
|
||||
return simpleMerge(unsortedVCs, null, VariantMergeType.INTERSECT, GenotypeMergeType.UNSORTED, false, false);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -176,17 +180,20 @@ public class VariantContextUtils {
|
|||
*
|
||||
* @param unsortedVCs
|
||||
* @param priorityListOfVCs
|
||||
* @param mergeOptions
|
||||
* @param variantMergeOptions
|
||||
* @param genotypeMergeOptions
|
||||
* @return
|
||||
*/
|
||||
public static VariantContext simpleMerge(Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs, EnumSet<MergeType> mergeOptions, boolean annotateOrigin, boolean printMessages ) {
|
||||
public static VariantContext simpleMerge(Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs,
|
||||
VariantMergeType variantMergeOptions, GenotypeMergeType genotypeMergeOptions,
|
||||
boolean annotateOrigin, boolean printMessages ) {
|
||||
if ( unsortedVCs == null || unsortedVCs.size() == 0 )
|
||||
return null;
|
||||
|
||||
if ( annotateOrigin && priorityListOfVCs == null )
|
||||
throw new IllegalArgumentException("Cannot merge calls and annotate their origins with a complete priority list of VariantContexts");
|
||||
|
||||
List<VariantContext> VCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, mergeOptions);
|
||||
List<VariantContext> VCs = sortVariantContextsByPriority(unsortedVCs, priorityListOfVCs, genotypeMergeOptions);
|
||||
|
||||
// establish the baseline info from the first VC
|
||||
VariantContext first = VCs.get(0);
|
||||
|
|
@ -223,7 +230,7 @@ public class VariantContextUtils {
|
|||
|
||||
alleles.addAll(alleleMapping.values());
|
||||
|
||||
mergeGenotypes(genotypes, vc, alleleMapping, mergeOptions.contains(MergeType.UNIQUIFY_GENOTYPES));
|
||||
mergeGenotypes(genotypes, vc, alleleMapping, genotypeMergeOptions == GenotypeMergeType.UNIQUIFY);
|
||||
|
||||
negLog10PError = Math.max(negLog10PError, vc.isVariant() ? vc.getNegLog10PError() : -1);
|
||||
|
||||
|
|
@ -235,7 +242,7 @@ public class VariantContextUtils {
|
|||
}
|
||||
|
||||
// if at least one record was unfiltered and we want a union, clear all of the filters
|
||||
if ( mergeOptions.contains(MergeType.UNION_VARIANTS) && nFiltered != VCs.size() )
|
||||
if ( variantMergeOptions == VariantMergeType.UNION && nFiltered != VCs.size() )
|
||||
filters.clear();
|
||||
|
||||
// we care about where the call came from
|
||||
|
|
@ -362,11 +369,11 @@ public class VariantContextUtils {
|
|||
}
|
||||
}
|
||||
|
||||
public static List<VariantContext> sortVariantContextsByPriority(Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs, EnumSet<MergeType> mergeOptions ) {
|
||||
if ( mergeOptions.contains(MergeType.PRIORITIZE_GENOTYPES) && priorityListOfVCs == null )
|
||||
public static List<VariantContext> sortVariantContextsByPriority(Collection<VariantContext> unsortedVCs, List<String> priorityListOfVCs, GenotypeMergeType mergeOption ) {
|
||||
if ( mergeOption == GenotypeMergeType.PRIORITIZE && priorityListOfVCs == null )
|
||||
throw new IllegalArgumentException("Cannot merge calls by priority with a null priority list");
|
||||
|
||||
if ( priorityListOfVCs == null || mergeOptions.contains(MergeType.UNSORTED_GENOTYPES) )
|
||||
if ( priorityListOfVCs == null || mergeOption == GenotypeMergeType.UNSORTED )
|
||||
return new ArrayList<VariantContext>(unsortedVCs);
|
||||
else {
|
||||
ArrayList<VariantContext> sorted = new ArrayList<VariantContext>(unsortedVCs);
|
||||
|
|
|
|||
|
|
@ -49,28 +49,22 @@ import java.util.*;
|
|||
@Requires(value={})
|
||||
public class CombineVariants extends RodWalker<Integer, Integer> {
|
||||
// the types of combinations we currently allow
|
||||
public enum ComboType { UNION, MERGE }
|
||||
@Argument(fullName="combination_type", shortName="type", doc="combination type; MERGE are supported", required=true)
|
||||
protected ComboType COMBO_TYPE;
|
||||
@Argument(shortName="genotypeMergeOptions", doc="How should we merge genotype records for samples shared across the ROD files?", required=false)
|
||||
public VariantContextUtils.GenotypeMergeType genotypeMergeOption = VariantContextUtils.GenotypeMergeType.PRIORITIZE;
|
||||
|
||||
@Argument(shortName="variantMergeOptions", doc="How should we merge variant records across RODs? Union leaves the record if any record is unfiltered, Intersection requires all records to be unfiltered", required=false)
|
||||
public VariantContextUtils.VariantMergeType variantMergeOption = VariantContextUtils.VariantMergeType.UNION;
|
||||
|
||||
@Argument(fullName="rod_priority_list", shortName="priority", doc="When taking the union of variants containing genotypes: a comma-separated string describing the priority ordering for the genotypes as far as which record gets emitted; a complete priority list MUST be provided", required=true)
|
||||
protected String PRIORITY_STRING = null;
|
||||
public String PRIORITY_STRING = null;
|
||||
|
||||
@Argument(fullName="printComplexMerges", shortName="printComplexMerges", doc="Print out interesting sites requiring complex compatibility merging", required=false)
|
||||
protected boolean printComplexMerges = false;
|
||||
public boolean printComplexMerges = false;
|
||||
|
||||
private VCFWriter vcfWriter = null;
|
||||
private List<String> priority = null;
|
||||
protected EnumSet<VariantContextUtils.MergeType> mergeOptions;
|
||||
|
||||
protected final static EnumSet<VariantContextUtils.MergeType> mergeTypeOptions = EnumSet.of(VariantContextUtils.MergeType.UNION_VARIANTS, VariantContextUtils.MergeType.UNIQUIFY_GENOTYPES);
|
||||
protected final static EnumSet<VariantContextUtils.MergeType> unionTypeOptions = EnumSet.of(VariantContextUtils.MergeType.UNION_VARIANTS, VariantContextUtils.MergeType.PRIORITIZE_GENOTYPES);
|
||||
|
||||
public void initialize() {
|
||||
|
||||
//Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
//hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||
|
||||
vcfWriter = new VCFWriter(out, true);
|
||||
priority = new ArrayList<String>(Arrays.asList(PRIORITY_STRING.split(",")));
|
||||
|
||||
|
|
@ -78,9 +72,8 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
// todo -- need to merge headers in an intelligent way
|
||||
|
||||
validateAnnotateUnionArguments(priority);
|
||||
mergeOptions = COMBO_TYPE == ComboType.MERGE ? mergeTypeOptions : unionTypeOptions;
|
||||
Map<String, VCFHeader> vcfRods = SampleUtils.getRodsWithVCFHeader(getToolkit(), null);
|
||||
Set<String> samples = getSampleList(vcfRods, mergeOptions);
|
||||
Set<String> samples = getSampleList(vcfRods, genotypeMergeOption);
|
||||
|
||||
Set<VCFHeaderLine> headerLines = smartMergeHeaders(vcfRods.values());
|
||||
headerLines.add(new VCFHeaderLine("source", "CombineVariants"));
|
||||
|
|
@ -89,12 +82,12 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
// todo -- Eric, where's a better place to put this?
|
||||
public static Set<String> getSampleList(Map<String, VCFHeader> headers, EnumSet<VariantContextUtils.MergeType> mergeOptions ) {
|
||||
public static Set<String> getSampleList(Map<String, VCFHeader> headers, VariantContextUtils.GenotypeMergeType mergeOption ) {
|
||||
Set<String> samples = new TreeSet<String>();
|
||||
for ( Map.Entry<String, VCFHeader> val : headers.entrySet() ) {
|
||||
VCFHeader header = val.getValue();
|
||||
for ( String sample : header.getGenotypeSamples() ) {
|
||||
samples.add(VariantContextUtils.mergedSampleName(val.getKey(), sample, mergeOptions.contains(VariantContextUtils.MergeType.UNIQUIFY_GENOTYPES)));
|
||||
samples.add(VariantContextUtils.mergedSampleName(val.getKey(), sample, mergeOption == VariantContextUtils.GenotypeMergeType.UNIQUIFY));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -164,12 +157,9 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
|
||||
// get all of the vcf rods at this locus
|
||||
Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref, context.getLocation());
|
||||
VariantContext mergedVC = VariantContextUtils.simpleMerge(vcs, priority, mergeOptions, true, printComplexMerges);
|
||||
VariantContext mergedVC = VariantContextUtils.simpleMerge(vcs, priority, variantMergeOption, genotypeMergeOption, true, printComplexMerges);
|
||||
if ( mergedVC != null ) // only operate at the start of events
|
||||
//if ( ! mergedVC.isMixed() ) // todo remove restriction when VCF4 writer is fixed
|
||||
vcfWriter.add(mergedVC, ref.getBases());
|
||||
// else
|
||||
// logger.info(String.format("Ignoring complex event: " + mergedVC));
|
||||
|
||||
return vcs.isEmpty() ? 0 : 1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -344,7 +344,6 @@ public class VCFWriter {
|
|||
if ( allowedGenotypeAttributeKeys == null || allowedGenotypeAttributeKeys.contains(key) )
|
||||
vcfGenotypeAttributeKeys.add(key);
|
||||
}
|
||||
|
||||
}
|
||||
String genotypeFormatString = Utils.join(GENOTYPE_FIELD_SEPARATOR, vcfGenotypeAttributeKeys);
|
||||
|
||||
|
|
@ -426,7 +425,7 @@ public class VCFWriter {
|
|||
mGenotypeRecords.clear();
|
||||
mGenotypeRecords.addAll(genotypeObjects);
|
||||
// info fields
|
||||
Map<String, String> infoFields = new HashMap<String, String>();
|
||||
Map<String, String> infoFields = new TreeMap<String, String>();
|
||||
for ( Map.Entry<String, Object> elt : vc.getAttributes().entrySet() ) {
|
||||
String key = elt.getKey();
|
||||
if ( key.equals("ID") )
|
||||
|
|
|
|||
Loading…
Reference in New Issue