From 9e107513d03e4ba245c85b2d5ee0b4eef14d58b8 Mon Sep 17 00:00:00 2001 From: hanna Date: Sun, 7 Mar 2010 23:01:34 +0000 Subject: [PATCH] In the new sharding system, if no read group is present, hallucinate one. Added for test compatibility, but not sure whether we still need this feature. TODO: Poll the group about this feature. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2949 348d0f76-0448-11de-a6fe-93d51630548a --- build.xml | 2 +- .../BlockDrivenSAMDataSource.java | 14 ++++++++ .../IndexDrivenSAMDataSource.java | 2 +- .../simpleDataSources/SAMDataSource.java | 3 +- ...rator.java => ReadFormattingIterator.java} | 34 ++++++++++++++++--- 5 files changed, 46 insertions(+), 9 deletions(-) rename java/src/org/broadinstitute/sting/gatk/iterators/{ReadWrappingIterator.java => ReadFormattingIterator.java} (58%) diff --git a/build.xml b/build.xml index 584e0808c..d95553efc 100644 --- a/build.xml +++ b/build.xml @@ -305,8 +305,8 @@ - + diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/BlockDrivenSAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/BlockDrivenSAMDataSource.java index 9934d7e1f..fcc5d6575 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/BlockDrivenSAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/BlockDrivenSAMDataSource.java @@ -415,6 +415,20 @@ public class BlockDrivenSAMDataSource extends SAMDataSource { for(File readsFile: sourceInfo.getReadsFiles()) { SAMFileReader2 reader = new SAMFileReader2(readsFile,true); reader.setValidationStringency(sourceInfo.getValidationStringency()); + + // If no read group is present, hallucinate one. + // TODO: Straw poll to see whether this is really required. + final SAMFileHeader header = reader.getFileHeader(); + logger.debug(String.format("Sort order is: " + header.getSortOrder())); + + if (reader.getFileHeader().getReadGroups().size() < 1) { + SAMReadGroupRecord rec = new SAMReadGroupRecord(readsFile.getName()); + rec.setLibrary(readsFile.getName()); + rec.setSample(readsFile.getName()); + + reader.getFileHeader().addReadGroup(rec); + } + readers.put(new SAMReaderID(readsFile),reader); } } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/IndexDrivenSAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/IndexDrivenSAMDataSource.java index ecea7939e..b99be3fff 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/IndexDrivenSAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/IndexDrivenSAMDataSource.java @@ -415,7 +415,7 @@ public class IndexDrivenSAMDataSource extends SAMDataSource { private StingSAMIterator createIterator( DataStreamSegment segment ) { StingSAMIterator iterator = resourcePool.iterator(segment); StingSAMIterator malformedWrappedIterator = new MalformedSAMFilteringIterator( getHeader(), iterator, violations ); - StingSAMIterator readWrappingIterator = new ReadWrappingIterator(malformedWrappedIterator); + StingSAMIterator readWrappingIterator = new ReadFormattingIterator(malformedWrappedIterator); return readWrappingIterator; } } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java index 96ba4b54f..052279c44 100755 --- a/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/simpleDataSources/SAMDataSource.java @@ -12,7 +12,6 @@ import org.broadinstitute.sting.utils.sam.SAMReadViolationHistogram; import java.io.File; import java.util.Collection; -import java.util.Map; import java.util.List; import java.util.ArrayList; @@ -182,7 +181,7 @@ public abstract class SAMDataSource implements SimpleDataSource { Boolean noValidationOfReadOrder, Collection supplementalFilters) { wrappedIterator = new MalformedSAMFilteringIterator(getHeader(),wrappedIterator,violations ); - wrappedIterator = new ReadWrappingIterator(wrappedIterator); + wrappedIterator = new ReadFormattingIterator(wrappedIterator); // NOTE: this (and other filtering) should be done before on-the-fly sorting // as there is no reason to sort something that we will end of throwing away diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/ReadWrappingIterator.java b/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java similarity index 58% rename from java/src/org/broadinstitute/sting/gatk/iterators/ReadWrappingIterator.java rename to java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java index c11084e95..36d89cda5 100644 --- a/java/src/org/broadinstitute/sting/gatk/iterators/ReadWrappingIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java @@ -1,17 +1,27 @@ package org.broadinstitute.sting.gatk.iterators; import net.sf.samtools.SAMRecord; +import net.sf.samtools.SAMTag; +import net.sf.samtools.SAMReadGroupRecord; import org.broadinstitute.sting.gatk.Reads; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.apache.log4j.Logger; + +import java.util.List; /** - * An iterator which wraps each SAMRecord inside a wrapper class, bringing new functionality to the read while - * presenting the original SAMRecord interface. + * An iterator which does post-processing of a read, including potentially wrapping + * the read in something with a compatible interface or replacing the read entirely. * * @author mhanna * @version 0.1 */ -public class ReadWrappingIterator implements StingSAMIterator { +public class ReadFormattingIterator implements StingSAMIterator { + /** + * Logger. + */ + protected static Logger logger = Logger.getLogger(ReadFormattingIterator.class); + /** * Iterator to which to pass */ @@ -21,7 +31,7 @@ public class ReadWrappingIterator implements StingSAMIterator { * Decorate the given iterator inside a ReadWrappingIterator. * @param wrappedIterator iterator */ - public ReadWrappingIterator(StingSAMIterator wrappedIterator) { + public ReadFormattingIterator(StingSAMIterator wrappedIterator) { this.wrappedIterator = wrappedIterator; } @@ -65,7 +75,21 @@ public class ReadWrappingIterator implements StingSAMIterator { * no next exists. */ public SAMRecord next() { - return new GATKSAMRecord(wrappedIterator.next()); + SAMRecord read = wrappedIterator.next(); + + // if we don't have a read group, set one. + // TODO: Straw poll to see whether this is really required. + if (read.getAttribute(SAMTag.RG.toString()) == null && read.getReader() != null) { + List readGroups = read.getReader().getFileHeader().getReadGroups(); + if (readGroups.size() == 1) { + read.setAttribute(SAMTag.RG.toString(), readGroups.get(0).getReadGroupId()); + read.setAttribute(SAMTag.SM.toString(), readGroups.get(0).getReadGroupId()); + } else { + logger.warn("Unable to set read group of ungrouped read: unable to pick default group, there are " + readGroups.size() + " possible."); + } + } + + return new GATKSAMRecord(read); } /**