From bd4cacb832bc73abfbfd35fd6a228a1fa86d45da Mon Sep 17 00:00:00 2001 From: aaron Date: Fri, 24 Apr 2009 00:31:00 +0000 Subject: [PATCH] Added code to make a read group and sample name for BAM files that don't annotate them on reads. The defaults for both are now the filename, but this may be shortened in the future. The sample name for a read can be retrieved with the command: read.getAttribute(SAMTag.RG.toString()); git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@518 348d0f76-0448-11de-a6fe-93d51630548a --- .../dataSources/simpleDataSources/SAMDataSource.java | 12 +++++++++++- .../sting/gatk/executive/MicroManager.java | 1 - .../gatk/iterators/MergingSamRecordIterator2.java | 12 ++++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java b/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java index b1633edcb..52548dfa7 100755 --- a/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java +++ b/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.dataSources.simpleDataSources; import edu.mit.broad.picard.sam.SamFileHeaderMerger; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileReader; +import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMRecord; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.iterators.BoundedReadIterator; @@ -253,7 +254,6 @@ public class SAMDataSource implements SimpleDataSource { this.readsAtLastPos = 1; } lastPos = rec.getAlignmentStart(); - //System.out.print("t" + rec.getAlignmentStart() + " "); ++x; } else { // jump contigs @@ -310,6 +310,16 @@ public class SAMDataSource implements SimpleDataSource { List lst = new ArrayList(); for (File f : this.samFileList) { SAMFileReader reader = initializeSAMFile(f); + + if (reader.getFileHeader().getReadGroups().size() < 1) { + logger.warn("Setting header in reader " + f.getName()); + SAMReadGroupRecord rec = new SAMReadGroupRecord(f.getName()); + rec.setLibrary(f.getName()); + rec.setSample(f.getName()); + + reader.getFileHeader().addReadGroup(rec); + } + if (reader == null) { throw new SimpleDataSourceLoadException("SAMDataSource: Unable to load file: " + f); } diff --git a/java/src/org/broadinstitute/sting/gatk/executive/MicroManager.java b/java/src/org/broadinstitute/sting/gatk/executive/MicroManager.java index 36cdb09aa..802423e5f 100644 --- a/java/src/org/broadinstitute/sting/gatk/executive/MicroManager.java +++ b/java/src/org/broadinstitute/sting/gatk/executive/MicroManager.java @@ -128,7 +128,6 @@ public class MicroManager { walkerInitialized = true; } - System.err.println(traversalEngine.getSAMHeader().getSequenceDictionary().toString()); accumulator = traversalEngine.traverse( walker, shard, referenceProvider, locusProvider, accumulator ); readShard.close(); } diff --git a/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java b/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java index 8dff73791..44145f562 100644 --- a/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java +++ b/java/src/org/broadinstitute/sting/gatk/iterators/MergingSamRecordIterator2.java @@ -16,10 +16,12 @@ import edu.mit.broad.picard.sam.SamFileHeaderMerger; import edu.mit.broad.picard.util.PeekableIterator; import net.sf.samtools.*; import net.sf.samtools.util.CloseableIterator; +import org.apache.log4j.Logger; import java.lang.reflect.Constructor; import java.util.Comparator; import java.util.Iterator; +import java.util.List; import java.util.PriorityQueue; /** @@ -31,6 +33,7 @@ public class MergingSamRecordIterator2 implements CloseableIterator, protected PriorityQueue pq = null; protected final SamFileHeaderMerger samHeaderMerger; protected final SAMFileHeader.SortOrder sortOrder; + protected static Logger logger = Logger.getLogger(MergingSamRecordIterator2.class); protected boolean initialized = false; @@ -135,11 +138,20 @@ public class MergingSamRecordIterator2 implements CloseableIterator, if (oldProgramGroupId != null) { final String newProgramGroupId = this.samHeaderMerger.getProgramGroupId(iterator.getReader(), oldProgramGroupId); record.setAttribute(SAMTag.PG.toString(), newProgramGroupId); + } else { + List readGroups = iterator.getReader().getFileHeader().getReadGroups(); + if (readGroups.size() == 1) { + record.setAttribute(SAMTag.RG.toString(), readGroups.get(0).getReadGroupId()); + record.setAttribute(SAMTag.SM.toString(), readGroups.get(0).getReadGroupId()); + } else { + logger.warn("Unable to set read group of ungrouped read: unable to pick default group, there are " + readGroups.size() + " possible."); + } } record.setHeader(samHeaderMerger.getMergedHeader()); //System.out.printf("NEXT = %s %s %d%n", record.getReadName(), record.getReferenceName(), record.getAlignmentStart()); //System.out.printf("PEEK = %s %s %d%n", this.pq.peek().peek().getReadName(), this.pq.peek().peek().getReferenceName(), this.pq.peek().peek().getAlignmentStart()); + return record; }