diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineVariantsIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineVariantsIntegrationTest.java index e06798fd2..293e4b21a 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineVariantsIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineVariantsIntegrationTest.java @@ -150,7 +150,8 @@ public class CombineVariantsIntegrationTest extends WalkerTest { @Test public void uniqueSNPs() { // parallelism must be disabled because the input VCF is malformed (DB=0) and parallelism actually fixes this which breaks the md5s - combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "e5ea6ac3905bd9eeea1a2ef5d2cb5af7", true); + //both of these files have the YRI trio and merging of duplicate samples without priority must be specified with UNSORTED merge type + combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", " -genotypeMergeOptions UNSORTED", "e5ea6ac3905bd9eeea1a2ef5d2cb5af7", true); } @Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "def52bcd3942bbe39cd7ebe845c4f206"); } diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineVariants.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineVariants.java index beb453b7b..aa69693b8 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineVariants.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/variantutils/CombineVariants.java @@ -208,14 +208,25 @@ public class CombineVariants extends RodWalker implements Tree logger.warn("VCF output file not an instance of VCFWriterStub; cannot enable sites only output option"); validateAnnotateUnionArguments(); - if ( PRIORITY_STRING == null && genotypeMergeOption == null) { - genotypeMergeOption = GATKVariantContextUtils.GenotypeMergeType.UNSORTED; + + final boolean sampleNamesAreUnique = SampleUtils.verifyUniqueSamplesNames(vcfRods); + + if (genotypeMergeOption == null) { + if (!sampleNamesAreUnique) + throw new UserException("Duplicate sample names were discovered but no genotypemergeoption was supplied. " + + "To combine samples without merging specify --genotypemergeoption UNIQUIFY. Merging duplicate samples " + + "without specified priority is unsupported, but can be achieved by specifying --genotypemergeoption UNSORTED."); + else + genotypeMergeOption = GATKVariantContextUtils.GenotypeMergeType.UNSORTED; + } + + if ( PRIORITY_STRING == null && genotypeMergeOption == GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE) { //PRIORITY_STRING = Utils.join(",", vcfRods.keySet()); Deleted by Ami (7/10/12) logger.info("Priority string is not provided, using arbitrary genotyping order: "+priority); } if (genotypeMergeOption == GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE && - !SampleUtils.verifyUniqueSamplesNames(vcfRods)) + !sampleNamesAreUnique) throw new IllegalStateException("REQUIRE_UNIQUE sample names is true but duplicate names were discovered."); samples = sitesOnlyVCF ? Collections.emptySet() : SampleUtils.getSampleList(vcfRods, genotypeMergeOption);