From 73f9d1f2174a54ee2fd6d796bfefdd8d43540ebf Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 6 Oct 2011 08:40:35 -0700 Subject: [PATCH] GATK read group requirement iron hand -- The GATK will now throw a user exception if it opens a SAM/BAM file that doesn't have at least one RG defined -- LIBS again throws an error if the complete list of samples isn't provided -- Updating ExmpleCountLociPipeline test to use the well-formated versions of the exampleBAM and exampleFASTA files in testdata, instead of the old broken ones in validation_data. -- Convenience constructors for UserExceptions.MalformedBAM --- .../sting/gatk/datasources/reads/SAMDataSource.java | 6 ++++++ .../sting/gatk/iterators/LocusIteratorByState.java | 5 +---- .../sting/utils/exceptions/UserException.java | 10 +++++++++- .../examples/ExampleCountLociPipelineTest.scala | 4 ++-- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index 572970349..74d39ecb0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -235,6 +235,12 @@ public class SAMDataSource { for(SAMFileReader reader: readers.values()) { // Get the sort order, forcing it to coordinate if unsorted. SAMFileHeader header = reader.getFileHeader(); + + if ( header.getReadGroups().isEmpty() ) { + throw new UserException.MalformedBAM(readers.getReaderID(reader).samFile, + "SAM file doesn't have any read groups defined in the header. The GATK no longer supports SAM files without read groups"); + } + SAMFileHeader.SortOrder sortOrder = header.getSortOrder() != SAMFileHeader.SortOrder.unsorted ? header.getSortOrder() : SAMFileHeader.SortOrder.coordinate; // Validate that all input files are sorted in the same order. diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java index eb5b51b33..2f3652d6a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java @@ -283,10 +283,7 @@ public class LocusIteratorByState extends LocusIterator { // currently the GATK expects this LocusIteratorByState to accept empty sample lists, when // there's no read data. So we need to throw this error only when samIterator.hasNext() is true if ( this.samples.isEmpty() && samIterator.hasNext() ) { - // actually we cannot process BAMs without read groups unless we tolerate empty - // sample lists. In the empty case we need to add the null element to the samples - this.samples.add(null); - //throw new IllegalArgumentException("samples list must not be empty"); + throw new IllegalArgumentException("samples list must not be empty"); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java index 77f1ed6c0..1dea726ae 100755 --- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java +++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java @@ -160,7 +160,15 @@ public class UserException extends ReviewedStingException { public static class MalformedBAM extends UserException { public MalformedBAM(SAMRecord read, String message) { - super(String.format("SAM/BAM file %s is malformed: %s", read.getFileSource() != null ? read.getFileSource().getReader() : "(none)", message)); + this(read.getFileSource() != null ? read.getFileSource().getReader().toString() : "(none)", message); + } + + public MalformedBAM(File file, String message) { + this(file.toString(), message); + } + + public MalformedBAM(String source, String message) { + super(String.format("SAM/BAM file %s is malformed: %s", source, message)); } } diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountLociPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountLociPipelineTest.scala index 1e6c93cff..5901cab46 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountLociPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountLociPipelineTest.scala @@ -36,8 +36,8 @@ class ExampleCountLociPipelineTest { spec.name = "countloci" spec.args = Array( " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCountLoci.scala", - " -R " + BaseTest.hg18Reference, - " -I " + BaseTest.validationDataLocation + "small_bam_for_countloci.bam", + " -R " + BaseTest.testDir + "exampleFASTA.fasta", + " -I " + BaseTest.testDir + "exampleBAM.bam", " -o " + testOut).mkString spec.fileMD5s += testOut -> "67823e4722495eb10a5e4c42c267b3a6" PipelineTest.executeTest(spec)