From 1dfaacfeb5f1e7b31b10be94605f15622d2a4cbc Mon Sep 17 00:00:00 2001 From: Menachem Fromer Date: Thu, 22 Mar 2012 12:40:15 -0400 Subject: [PATCH] Check for consistency of the BAM and VCF sample names, with a command line disable to throw if you know what you are doing --- .../phasing/ReadBackedPhasingWalker.java | 23 +++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java index 9470ce2f4..734ade376 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java @@ -36,8 +36,10 @@ import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.HasGenomeLocation; +import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.variantcontext.*; @@ -121,8 +123,11 @@ public class ReadBackedPhasingWalker extends RodWalker samplesToPhase = null; + protected Set samplesToPhase = null; + + @Hidden + @Argument(fullName = "permitNoSampleOverlap", shortName = "permitNoSampleOverlap", doc = "Don't exit (just WARN) when the VCF and BAMs do not overlap in samples", required = false) + private boolean permitNoSampleOverlap = false; private GenomeLoc mostDownstreamLocusReached = null; @@ -205,8 +210,18 @@ public class ReadBackedPhasingWalker extends RodWalker rodNameToHeader = getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName)); - Set samples = new TreeSet(samplesToPhase == null ? rodNameToHeader.get(trackName).getGenotypeSamples() : samplesToPhase); - writer.writeHeader(new VCFHeader(hInfo, samples)); + Set vcfSamples = new TreeSet(samplesToPhase == null ? rodNameToHeader.get(trackName).getGenotypeSamples() : samplesToPhase); + writer.writeHeader(new VCFHeader(hInfo, vcfSamples)); + + Set readSamples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); + readSamples.retainAll(vcfSamples); + if (readSamples.isEmpty()) { + String noPhaseString = "No common samples in VCF and BAM headers" + (samplesToPhase == null ? "" : " (limited to sampleToPhase parameters)") + ", so nothing could possibly be phased!"; + if (permitNoSampleOverlap) + logger.warn(noPhaseString); + else + throw new UserException(noPhaseString); + } } public boolean generateExtendedEvents() {