From efbfdb64fed48c27bd052812ecec483124946945 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 19 Aug 2013 23:52:23 -0400 Subject: [PATCH] Qscript to Downsample and analyze an exome BAM this script downsamples an exome BAM several times and makes a coverage distribution analysis (of bases that pass filters) as well as haplotype caller calls with a NA12878 Knowledge Base assessment with comparison against multi-sample calling with the UG. This script was used for the "downsampling the exome" presentation --- .../sting/queue/util/QScriptUtils.scala | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala b/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala index 7b0e5d1be..b078bcd4f 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala @@ -79,8 +79,23 @@ object QScriptUtils { if (sample.isEmpty) sample = r.getSample else if (sample != r.getSample) - return true; + return true } false } + + /** + * Returns all distinct samples in the BAM file + * + * @param bam the bam file + * @return a set with all distinct samples (in no particular order) + */ + def getSamplesFromBAM(bam: File) : Set[String] = { + val reader = new SAMFileReader(bam) + var samples: Set[String] = Set() + for (rg <- reader.getFileHeader.getReadGroups) { + samples += rg.getSample + } + samples + } }