Check for consistency of the BAM and VCF sample names, with a command line disable to throw if you know what you are doing

This commit is contained in:
Menachem Fromer 2012-03-22 12:40:15 -04:00
parent 8c09ff9459
commit 1dfaacfeb5
1 changed files with 19 additions and 4 deletions

View File

@ -36,8 +36,10 @@ import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.HasGenomeLocation;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.*;
@ -121,8 +123,11 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
public int MIN_MAPPING_QUALITY_SCORE = 20;
@Argument(fullName = "sampleToPhase", shortName = "sampleToPhase", doc = "Only include these samples when phasing", required = false)
protected Set
<String> samplesToPhase = null;
protected Set<String> samplesToPhase = null;
@Hidden
@Argument(fullName = "permitNoSampleOverlap", shortName = "permitNoSampleOverlap", doc = "Don't exit (just WARN) when the VCF and BAMs do not overlap in samples", required = false)
private boolean permitNoSampleOverlap = false;
private GenomeLoc mostDownstreamLocusReached = null;
@ -205,8 +210,18 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
// todo -- fix samplesToPhase
String trackName = variantCollection.variants.getName();
Map<String, VCFHeader> rodNameToHeader = getVCFHeadersFromRods(getToolkit(), Arrays.asList(trackName));
Set<String> samples = new TreeSet<String>(samplesToPhase == null ? rodNameToHeader.get(trackName).getGenotypeSamples() : samplesToPhase);
writer.writeHeader(new VCFHeader(hInfo, samples));
Set<String> vcfSamples = new TreeSet<String>(samplesToPhase == null ? rodNameToHeader.get(trackName).getGenotypeSamples() : samplesToPhase);
writer.writeHeader(new VCFHeader(hInfo, vcfSamples));
Set<String> readSamples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
readSamples.retainAll(vcfSamples);
if (readSamples.isEmpty()) {
String noPhaseString = "No common samples in VCF and BAM headers" + (samplesToPhase == null ? "" : " (limited to sampleToPhase parameters)") + ", so nothing could possibly be phased!";
if (permitNoSampleOverlap)
logger.warn(noPhaseString);
else
throw new UserException(noPhaseString);
}
}
public boolean generateExtendedEvents() {