Merge pull request #750 from broadinstitute/ldg_uniqueSamplesInCombineVariants

Throw UserException if input VCFs have duplicate samples but no genotype...
This commit is contained in:
ldgauthier 2014-10-16 07:57:27 -04:00
commit 5fa5724e4a
2 changed files with 16 additions and 4 deletions

View File

@ -150,7 +150,8 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
@Test public void uniqueSNPs() {
// parallelism must be disabled because the input VCF is malformed (DB=0) and parallelism actually fixes this which breaks the md5s
combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "", "e5ea6ac3905bd9eeea1a2ef5d2cb5af7", true);
//both of these files have the YRI trio and merging of duplicate samples without priority must be specified with UNSORTED merge type
combine2("pilot2.snps.vcf4.genotypes.vcf", "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", " -genotypeMergeOptions UNSORTED", "e5ea6ac3905bd9eeea1a2ef5d2cb5af7", true);
}
@Test public void omniHM3Union() { combineSites(" -filteredRecordsMergeType KEEP_IF_ANY_UNFILTERED", "def52bcd3942bbe39cd7ebe845c4f206"); }

View File

@ -208,14 +208,25 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
logger.warn("VCF output file not an instance of VCFWriterStub; cannot enable sites only output option");
validateAnnotateUnionArguments();
if ( PRIORITY_STRING == null && genotypeMergeOption == null) {
genotypeMergeOption = GATKVariantContextUtils.GenotypeMergeType.UNSORTED;
final boolean sampleNamesAreUnique = SampleUtils.verifyUniqueSamplesNames(vcfRods);
if (genotypeMergeOption == null) {
if (!sampleNamesAreUnique)
throw new UserException("Duplicate sample names were discovered but no genotypemergeoption was supplied. " +
"To combine samples without merging specify --genotypemergeoption UNIQUIFY. Merging duplicate samples " +
"without specified priority is unsupported, but can be achieved by specifying --genotypemergeoption UNSORTED.");
else
genotypeMergeOption = GATKVariantContextUtils.GenotypeMergeType.UNSORTED;
}
if ( PRIORITY_STRING == null && genotypeMergeOption == GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE) {
//PRIORITY_STRING = Utils.join(",", vcfRods.keySet()); Deleted by Ami (7/10/12)
logger.info("Priority string is not provided, using arbitrary genotyping order: "+priority);
}
if (genotypeMergeOption == GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE &&
!SampleUtils.verifyUniqueSamplesNames(vcfRods))
!sampleNamesAreUnique)
throw new IllegalStateException("REQUIRE_UNIQUE sample names is true but duplicate names were discovered.");
samples = sitesOnlyVCF ? Collections.<String>emptySet() : SampleUtils.getSampleList(vcfRods, genotypeMergeOption);