New mode for CombineVariants to assume the incoming VCFs have the same samples and disjoint calls. Drastically reduces the runtime for routine combining operations. Very useful with Queue.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5356 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2011-03-02 15:52:17 +00:00
parent 5e4b321f86
commit 8e1aa6059a
1 changed files with 12 additions and 0 deletions

View File

@ -79,6 +79,9 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
@Argument(fullName="setKey", shortName="setKey", doc="Key, by default set, in the INFO key=value tag emitted describing which set the combined VCF record came from. Set to null if you don't want the set field emitted.", required=false)
public String SET_KEY = "set";
@Argument(fullName="assumeIdenticalSamples", shortName="assumeIdenticalSamples", doc="If true, assume input VCFs have identical sample sets and disjoint calls so that one can simply perform a merge sort to combine the VCFs into one, drastically reducing the runtime.", required=false)
public boolean ASSUME_IDENTICAL_SAMPLES = false;
private List<String> priority = null;
public void initialize() {
@ -127,6 +130,15 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
// Need to provide reference bases to simpleMerge starting at current locus
Collection<VariantContext> vcs = tracker.getAllVariantContexts(ref, context.getLocation());
if ( ASSUME_IDENTICAL_SAMPLES ) {
final VariantContext vc = vcs.iterator().next();
if( vc != null ) {
vcfWriter.add( vc, ref.getBase() );
}
return vcs.isEmpty() ? 0 : 1;
}
VariantContext mergedVC = null;
if ( variantMergeOption == VariantContextUtils.VariantMergeType.MASTER ) {
mergedVC = VariantContextUtils.masterMerge(vcs, "master");