Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable

2011-07-16 09:12:13 -04:00 · 2011-07-16 09:12:13 -04:00 · 7524b00cbc
parent 6e8ec206c8 e106a5e734
commit 7524b00cbc
12 changed files with 204 additions and 90 deletions
--- a/build.xml
+++ b/build.xml
@ -780,6 +780,50 @@
        </sequential>
    </macrodef>

+    <!-- FAILED-TEST -->
+    <macrodef name="run-failed-test">
+        <attribute name="xmlfailedtestfile" />
+        <sequential>
+            <!-- Get the pipeline run type.  Default to dry.  -->
+            <condition property="pipeline.run" value="dry" else="${pipeline.run}">
+                <equals arg1="${pipeline.run}" arg2="$${pipeline.run}" />
+            </condition>
+
+            <condition property="cofoja.jvm.args" value="-javaagent:${cofoja.jar} -Dcom.google.java.contract.log.contract=false" else="">
+                <isset property="include.contracts" />
+            </condition>
+
+            <mkdir dir="${report}/failed_rerun" />
+            <echo message="Sting: Running @{xmlfailedtestfile} test cases!"/>
+            <taskdef resource="testngtasks" classpath="${lib.dir}/testng-5.14.1.jar"/>
+            <testng outputDir="${report}/failed_rerun"
+                    haltOnFailure="false" failureProperty="test.failure"
+                    verbose="2"
+                    workingDir="${basedir}"
+                    useDefaultListeners="false"
+                    listeners="org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.sting.StingTextReporter">
+                <jvmarg value="-Xmx${test.maxmemory}" />
+                <jvmarg value="-Djava.awt.headless=true" />
+                <jvmarg value="-Dpipeline.run=${pipeline.run}" />
+                <jvmarg value="-Djava.io.tmpdir=${java.io.tmpdir}" />
+                <jvmarg line="${cofoja.jvm.args}"/>
+<!--                 <jvmarg value="-Xdebug"/> -->
+<!--                 <jvmarg value="-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5005"/> -->
+                <classpath>
+                    <path refid="external.dependencies" />
+                    <pathelement location="${java.classes}" />
+                    <pathelement location="${scala.classes}" />
+                    <pathelement location="${java.contracts}" />
+                    <pathelement location="${java.test.classes}" />
+                    <pathelement location="${scala.test.classes}" />
+                </classpath>
+
+                <xmlfileset dir="${basedir}" includes="@{xmlfailedtestfile}" />
+            </testng>
+
+            <fail message="test failed" if="test.failure" />
+        </sequential>
+    </macrodef>

    <!-- our three different test conditions: Test, IntegrationTest, PerformanceTest -->
    <target name="test" depends="test.compile,tribble.test" description="Run unit tests">
@ -814,6 +858,22 @@
        <run-test testtype="${pipetype}"/>
    </target>

+    <target name="failed-test" depends="test.compile">
+        <run-failed-test xmlfailedtestfile="${report}/*UnitTest/testng-failed.xml" />
+    </target>
+
+    <target name="failed-integration" depends="test.compile">
+        <run-failed-test xmlfailedtestfile="${report}/*IntegrationTest/testng-failed.xml" />
+    </target>
+
+    <target name="failed-performance" depends="test.compile">
+        <run-failed-test xmlfailedtestfile="${report}/*PerformanceTest/testng-failed.xml" />
+    </target>
+
+    <target name="failed-pipeline" depends="test.compile">
+        <run-failed-test xmlfailedtestfile="${report}/*PipelineTest/testng-failed.xml" />
+    </target>
+
    <!-- ***************************************************************************** -->
    <!-- ***********                     Tribble                             ********* -->
    <!-- ***************************************************************************** -->
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java
@ -26,16 +26,12 @@ package org.broadinstitute.sting.gatk.walkers.diffengine;

 import org.broad.tribble.readers.AsciiLineReader;
 import org.broad.tribble.readers.LineReader;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
+import org.broadinstitute.sting.utils.codecs.vcf.*;
 import org.broadinstitute.sting.utils.variantcontext.Genotype;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;

 import java.io.*;
-import java.util.Arrays;
 import java.util.Map;
-import java.util.zip.GZIPInputStream;


 /**
@ -58,7 +54,13 @@ public class VCFDiffableReader implements DiffableReader {
            VCFCodec vcfCodec = new VCFCodec();

            // must be read as state is stored in reader itself
-            vcfCodec.readHeader(lineReader);
+            VCFHeader header = (VCFHeader)vcfCodec.readHeader(lineReader);
+            for ( VCFHeaderLine headerLine : header.getMetaData() ) {
+                String key = headerLine.getKey();
+                if ( headerLine instanceof VCFNamedHeaderLine )
+                    key += "_" + ((VCFNamedHeaderLine) headerLine).getName();
+                root.add(key, headerLine.toString());
+            }

            String line = lineReader.readLine();
            int count = 0;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java
@ -242,7 +242,7 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
        return new PhasingStatsAndOutput(phaseStats, completedList);
    }

-    private static final Set<String> KEYS_TO_KEEP_IN_REDUCED_VCF = new HashSet<String>(Arrays.asList("PQ"));
+    private static final Set<String> KEYS_TO_KEEP_IN_REDUCED_VCF = new HashSet<String>(Arrays.asList(PQ_KEY));

    private VariantContext reduceVCToSamples(VariantContext vc, List<String> samplesToPhase) {
 //        for ( String sample : samplesToPhase )
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java
@ -7,6 +7,8 @@ import org.broad.tribble.NameAwareCodec;
 import org.broad.tribble.TribbleException;
 import org.broad.tribble.readers.LineReader;
 import org.broad.tribble.util.ParsingUtils;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.variantcontext.Allele;
 import org.broadinstitute.sting.utils.variantcontext.Genotype;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -96,6 +98,9 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
        for ( String str : headerStrings ) {
            if ( !str.startsWith(VCFHeader.METADATA_INDICATOR) ) {
                String[] strings = str.substring(1).split(VCFConstants.FIELD_SEPARATOR);
+                if ( strings.length < VCFHeader.HEADER_FIELDS.values().length )
+                    throw new TribbleException.InvalidHeader("there are not enough columns present in the header line: " + str);
+
                int arrayIndex = 0;
                for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) {
                    try {
@ -159,12 +164,11 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
    }

    private Feature reallyDecode(String line) {
-        try {
            // the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line
            if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null;

            // our header cannot be null, we need the genotype sample names and counts
-            if (header == null) throw new IllegalStateException("VCF Header cannot be null when decoding a record");
+            if (header == null) throw new ReviewedStingException("VCF Header cannot be null when decoding a record");

            if (parts == null)
                parts = new String[Math.min(header.getColumnCount(), NUM_STANDARD_FIELDS+1)];
@ -174,17 +178,18 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
            // if we have don't have a header, or we have a header with no genotyping data check that we have eight columns.  Otherwise check that we have nine (normal colummns + genotyping data)
            if (( (header == null || (header != null && !header.hasGenotypingData())) && nParts != NUM_STANDARD_FIELDS) ||
                 (header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) )
-                throw new IllegalArgumentException("There aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) +
-                        " tokens, and saw " + nParts + " )");
+                throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) +
+                        " tokens, and saw " + nParts + " )", lineNo);

            return parseVCFLine(parts);
-        } catch (TribbleException e) {
-            throw new TribbleException.InvalidDecodeLine(e.getMessage(), line);
-        }
    }

    protected void generateException(String message) {
-        throw new TribbleException.InvalidDecodeLine(message, lineNo);
+        throw new UserException.MalformedVCF(message, lineNo);
+    }
+
+    private static void generateException(String message, int lineNo) {
+        throw new UserException.MalformedVCF(message, lineNo);
    }

    /**
@ -472,10 +477,6 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
        return true;
    }

-    private static void generateException(String message, int lineNo) {
-        throw new TribbleException.InvalidDecodeLine(message, lineNo);
-    }
-
    private static int computeForwardClipping(List<Allele> unclippedAlleles, String ref) {
        boolean clipping = true;
        // Note that the computation of forward clipping here is meant only to see whether there is a common
--- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
+++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
@ -154,6 +154,16 @@ public class UserException extends ReviewedStingException {
        }
    }

+    public static class MalformedVCF extends UserException {
+        public MalformedVCF(String message, String line) {
+            super(String.format("The provided VCF file is malformed at line %s: %s", line, message));
+        }
+
+        public MalformedVCF(String message, int lineNo) {
+            super(String.format("The provided VCF file is malformed at line nmber %d: %s", lineNo, message));
+        }
+    }
+
    public static class ReadMissingReadGroup extends MalformedBAM {
        public ReadMissingReadGroup(SAMRecord read) {
            super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK.  Please use http://www.broadinstitute.org/gsa/wiki/index.php/ReplaceReadGroups to fix this problem", read.getReadName()));
--- a/public/java/test/org/broadinstitute/sting/WalkerTest.java
+++ b/public/java/test/org/broadinstitute/sting/WalkerTest.java
@ -26,7 +26,9 @@
 package org.broadinstitute.sting;

 import org.apache.commons.lang.StringUtils;
+import org.broad.tribble.FeatureCodec;
 import org.broad.tribble.Tribble;
+import org.broad.tribble.index.Index;
 import org.broad.tribble.index.IndexFactory;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
 import org.broadinstitute.sting.gatk.CommandLineExecutable;
@ -64,10 +66,19 @@ public class WalkerTest extends BaseTest {
            }

            System.out.println("Verifying on-the-fly index " + indexFile + " for test " + name + " using file " + resultFile);
-            Assert.assertTrue(IndexFactory.onDiskIndexEqualToNewlyCreatedIndex(resultFile, indexFile, new VCFCodec()), "Index on disk from indexing on the fly not equal to the index created after the run completed");
+            Index indexFromOutputFile = IndexFactory.createIndex(resultFile, new VCFCodec());
+            Index dynamicIndex = IndexFactory.loadIndex(indexFile.getAbsolutePath());
+
+            if ( ! indexFromOutputFile.equals(dynamicIndex) ) {
+                Assert.fail(String.format("Index on disk from indexing on the fly not equal to the index created after the run completed.  FileIndex %s vs. on-the-fly %s%n",
+                        indexFromOutputFile.getProperties(),
+                        dynamicIndex.getProperties()));
+            }
        }
    }

+
+
    public List<String> assertMatchingMD5s(final String name, List<File> resultFiles, List<String> expectedMD5s) {
        List<String> md5s = new ArrayList<String>();
        for (int i = 0; i < resultFiles.size(); i++) {
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReaderUnitTest.java
@ -87,7 +87,7 @@ public class DiffableReaderUnitTest extends BaseTest {
        Assert.assertSame(diff.getParent(), DiffElement.ROOT);

        DiffNode node = diff.getValueAsNode();
-        Assert.assertEquals(node.getElements().size(), 9);
+        Assert.assertEquals(node.getElements().size(), 10);

        // chr1    2646    rs62635284      G       A       0.15    PASS    AC=2;AF=1.00;AN=2       GT:AD:DP:GL:GQ  1/1:53,75:3:-12.40,-0.90,-0.00:9.03
        DiffNode rec1 = node.getElement("chr1:2646").getValueAsNode();
--- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala
@ -3,14 +3,15 @@ package org.broadinstitute.sting.queue.qscripts
 import org.broadinstitute.sting.queue.extensions.gatk._
 import org.broadinstitute.sting.queue.QScript
 import org.broadinstitute.sting.queue.function.ListWriterFunction
-
-import scala.io.Source._
-import collection.JavaConversions._
-import org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel
 import org.broadinstitute.sting.queue.extensions.picard._
-import net.sf.samtools.{SAMFileReader, SAMReadGroupRecord}
+import org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel
+import org.broadinstitute.sting.utils.baq.BAQ.CalculationMode
+
+import collection.JavaConversions._
+import net.sf.samtools.SAMFileReader
 import net.sf.samtools.SAMFileHeader.SortOrder

+import org.broadinstitute.sting.queue.util.QScriptUtils

 class DataProcessingPipeline extends QScript {
  qscript =>
@ -29,7 +30,8 @@ class DataProcessingPipeline extends QScript {
  @Input(doc="Reference fasta file", fullName="reference", shortName="R", required=true)
  var reference: File = _

-
+  @Input(doc="dbsnp ROD to use (must be in VCF format)", fullName="dbsnp", shortName="D", required=true)
+  var dbSNP: File = _

  /****************************************************************************
  * Optional Parameters
@ -39,14 +41,12 @@ class DataProcessingPipeline extends QScript {
 //  @Input(doc="path to Picard's SortSam.jar (if re-aligning a previously processed BAM file)", fullName="path_to_sort_jar", shortName="sort", required=false)
 //  var sortSamJar: File = _
 //
-  @Input(doc="The path to the binary of bwa (usually BAM files have already been mapped - but if you want to remap this is the option)", fullName="path_to_bwa", shortName="bwa", required=false)
-  var bwaPath: File = _
-
-  @Input(doc="dbsnp ROD to use (must be in VCF format)", fullName="dbsnp", shortName="D", required=false)
-  var dbSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf")

  @Input(doc="extra VCF files to use as reference indels for Indel Realignment", fullName="extra_indels", shortName="indels", required=false)
-  var indels: File = new File("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/AFR+EUR+ASN+1KG.dindel_august_release_merged_pilot1.20110126.sites.vcf")
+  var indels: File = _
+
+  @Input(doc="The path to the binary of bwa (usually BAM files have already been mapped - but if you want to remap this is the option)", fullName="path_to_bwa", shortName="bwa", required=false)
+  var bwaPath: File = _

  @Input(doc="the project name determines the final output (BAM file) base name. Example NA12878 yields NA12878.processed.bam", fullName="project", shortName="p", required=false)
  var projectName: String = "project"
@ -103,18 +103,6 @@ class DataProcessingPipeline extends QScript {
                   val ds: String)
  {}

-  // Utility function to check if there are multiple samples in a BAM file (currently we can't deal with that)
-  def hasMultipleSamples(readGroups: java.util.List[SAMReadGroupRecord]): Boolean = {
-    var sample: String = ""
-    for (r <- readGroups) {
-      if (sample.isEmpty)
-        sample = r.getSample
-      else if (sample != r.getSample)
-          return true;
-    }
-    return false
-  }
-
  // Utility function to merge all bam files of similar samples. Generates one BAM file per sample.
  // It uses the sample information on the header of the input BAM files.
  //
@ -135,7 +123,7 @@ class DataProcessingPipeline extends QScript {

      // only allow one sample per file. Bam files with multiple samples would require pre-processing of the file
      // with PrintReads to separate the samples. Tell user to do it himself!
-      assert(!hasMultipleSamples(readGroups), "The pipeline requires that only one sample is present in a BAM file. Please separate the samples in " + bam)
+      assert(!QScriptUtils.hasMultipleSamples(readGroups), "The pipeline requires that only one sample is present in a BAM file. Please separate the samples in " + bam)

      // Fill out the sample table with the readgroups in this file
      for (rg <- readGroups) {
@ -166,12 +154,6 @@ class DataProcessingPipeline extends QScript {
    return sampleBamFiles.toMap
  }

-  // Checks how many contigs are in the dataset. Uses the BAM file header information.
-  def getNumberOfContigs(bamFile: File): Int = {
-    val samReader = new SAMFileReader(new File(bamFile))
-    return samReader.getFileHeader.getSequenceDictionary.getSequences.size()
-  }
-
  // Rebuilds the Read Group string to give BWA
  def addReadGroups(inBam: File, outBam: File, samReader: SAMFileReader) {
    val readGroups = samReader.getFileHeader.getReadGroups
@ -215,19 +197,6 @@ class DataProcessingPipeline extends QScript {
    return realignedBams
  }

-  // Reads a BAM LIST file and creates a scala list with all the files
-  def createListFromFile(in: File):List[File] = {
-    if (in.toString.endsWith("bam"))
-      return List(in)
-    var l: List[File] = List()
-    for (bam <- fromFile(in).getLines) {
-      if (!bam.startsWith("#") && !bam.isEmpty)
-        l :+= new File(bam.trim)
-    }
-    return l
-  }
-
-

  /****************************************************************************
  * Main script
@ -237,8 +206,8 @@ class DataProcessingPipeline extends QScript {
  def script = {

    // keep a record of the number of contigs in the first bam file in the list
-    val bams = createListFromFile(input)
-    nContigs = getNumberOfContigs(bams(0))
+    val bams = QScriptUtils.createListFromFile(input)
+    nContigs = QScriptUtils.getNumberOfContigs(bams(0))

    val realignedBams = if (useBWApe || useBWAse) {performAlignment(bams)} else {bams}

@ -319,7 +288,8 @@ class DataProcessingPipeline extends QScript {
    this.out = outIntervals
    this.mismatchFraction = 0.0
    this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
-    this.rodBind :+= RodBind("indels", "VCF", indels)
+    if (!indels.isEmpty)
+      this.rodBind :+= RodBind("indels", "VCF", indels)
    this.scatterCount = nContigs
    this.analysisName = queueLogDir + outIntervals + ".target"
    this.jobName = queueLogDir + outIntervals + ".target"
@ -330,7 +300,8 @@ class DataProcessingPipeline extends QScript {
    this.targetIntervals = tIntervals
    this.out = outBam
    this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
-    this.rodBind :+= RodBind("indels", "VCF", qscript.indels)
+    if (!indels.isEmpty)
+      this.rodBind :+= RodBind("indels", "VCF", indels)
    this.consensusDeterminationModel =  consensusDeterminationModel
    this.compress = 0
    this.scatterCount = nContigs
@ -353,7 +324,7 @@ class DataProcessingPipeline extends QScript {
  case class recal (inBam: File, inRecalFile: File, outBam: File) extends TableRecalibration with CommandLineGATKArgs {
    this.input_file :+= inBam
    this.recal_file = inRecalFile
-    this.baq = org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.CALCULATE_AS_NECESSARY
+    this.baq = CalculationMode.CALCULATE_AS_NECESSARY
    this.out = outBam
    if (!qscript.intervalString.isEmpty()) this.intervalsString ++= List(qscript.intervalString)
    else if (qscript.intervals != null) this.intervals :+= qscript.intervals
--- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala
@ -2,7 +2,7 @@ package org.broadinstitute.sting.queue.qscripts

 import org.broadinstitute.sting.queue.QScript
 import org.broadinstitute.sting.queue.extensions.gatk._
-import net.sf.samtools.SAMFileReader
+import org.broadinstitute.sting.queue.util.QScriptUtils

 /**
 * Created by IntelliJ IDEA.
@ -32,26 +32,25 @@ class RecalibrateBaseQualities extends QScript {
  val queueLogDir: String = ".qlog/"
  var nContigs: Int = 0

-  def getNumberOfContigs(bamFile: File): Int = {
-    val samReader = new SAMFileReader(new File(bamFile))
-    return samReader.getFileHeader.getSequenceDictionary.getSequences.size()
-  }
-
  def script = {

-    nContigs = getNumberOfContigs(input)
+    val bamList = QScriptUtils.createListFromFile(input)
+    nContigs = QScriptUtils.getNumberOfContigs(bamList(0))

-    val recalFile1: File = swapExt(input, ".bam", ".recal1.csv")
-    val recalFile2: File = swapExt(input, ".bam", ".recal2.csv")
-    val recalBam: File   = swapExt(input, ".bam", ".recal.bam")
-    val path1: String    = input + "before"
-    val path2: String    = input + "after"
-    
-    add(cov(input, recalFile1),
-        recal(input, recalFile1, recalBam),
-        cov(recalBam, recalFile2),
-        analyzeCovariates(recalFile1, path1),
-        analyzeCovariates(recalFile2, path2))
+    for (bam <- bamList) {
+
+      val recalFile1: File = swapExt(bam, ".bam", ".recal1.csv")
+      val recalFile2: File = swapExt(bam, ".bam", ".recal2.csv")
+      val recalBam: File   = swapExt(bam, ".bam", ".recal.bam")
+      val path1: String    = bam + ".before"
+      val path2: String    = bam + ".after"
+
+      add(cov(bam, recalFile1),
+          recal(bam, recalFile1, recalBam),
+          cov(recalBam, recalFile2),
+          analyzeCovariates(recalFile1, path1),
+          analyzeCovariates(recalFile2, path2))
+    }
  }

  trait CommandLineGATKArgs extends CommandLineGATK {
@ -84,7 +83,7 @@ class RecalibrateBaseQualities extends QScript {
  case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates {
    this.resources = R
    this.recal_file = inRecalFile
-    this.output_dir = outPath.toString
+    this.output_dir = outPath
    this.analysisName = queueLogDir + inRecalFile + ".analyze_covariates"
    this.jobName = queueLogDir + inRecalFile + ".analyze_covariates"
  }
--- a/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/util/QScriptUtils.scala
@ -0,0 +1,60 @@
+package org.broadinstitute.sting.queue.util
+
+import java.io.File
+import io.Source._
+import net.sf.samtools.{SAMReadGroupRecord, SAMFileReader}
+
+import collection.JavaConversions._
+
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: carneiro
+ * Date: 7/14/11
+ * Time: 4:57 PM
+ * To change this template use File | Settings | File Templates.
+ */
+
+object QScriptUtils {
+
+  /**
+   * Takes a bam list file and produces a scala list with each file allowing the bam list
+   * to have empty lines and comment lines (lines starting with #).
+   */
+  def createListFromFile(in: File):List[File] = {
+    // If the file provided ends with .bam, it is not a bam list, we treat it as a single file.
+    // and return a list with only this file.
+    if (in.toString.endsWith(".bam"))
+      return List(in)
+
+    var list: List[File] = List()
+    for (bam <- fromFile(in).getLines)
+      if (!bam.startsWith("#") && !bam.isEmpty )
+        list :+= new File(bam.trim())
+    list
+  }
+
+  /**
+   * Returns the number of contigs in the BAM file header.
+   */
+  def getNumberOfContigs(bamFile: File): Int = {
+    val samReader = new SAMFileReader(bamFile)
+    samReader.getFileHeader.getSequenceDictionary.getSequences.size()
+  }
+
+  /**
+   * Check if there are multiple samples in a BAM file
+   */
+  def hasMultipleSamples(readGroups: java.util.List[SAMReadGroupRecord]): Boolean = {
+    var sample: String = ""
+    for (r <- readGroups) {
+      if (sample.isEmpty)
+        sample = r.getSample
+      else if (sample != r.getSample)
+          return true;
+    }
+    false
+  }
+
+
+}
--- a/settings/repository/org.broad/tribble-4.jar
+++ b/settings/repository/org.broad/tribble-4.jar
--- a/settings/repository/org.broad/tribble-4.xml
+++ b/settings/repository/org.broad/tribble-4.xml
@ -1,4 +1,4 @@
 <ivy-module version="1.0">
-    <info organisation="org.broad" module="tribble" revision="3" 
+    <info organisation="org.broad" module="tribble" revision="4" 
          status="integration" publication="" />
 </ivy-module>