diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 4884452d2..52544fbd2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -712,100 +712,6 @@ public class GenomeAnalysisEngine { return getReadsDataSource().getSAMFile(id); } - /** - * Returns sets of samples present in the (merged) input SAM stream, grouped by readers (i.e. underlying - * individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list - * returned by this method will contain 3 elements (one for each reader), with each element being a set of sample names - * found in the corresponding bam file. - * - * @return Sets of samples in the merged input SAM stream, grouped by readers - */ - public List> getSamplesByReaders() { - Collection readers = getReadsDataSource().getReaderIDs(); - - List> sample_sets = new ArrayList>(readers.size()); - - for (SAMReaderID r : readers) { - - Set samples = new HashSet(1); - sample_sets.add(samples); - - for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) { - samples.add(g.getSample()); - } - } - - return sample_sets; - - } - - /** - * Returns sets of libraries present in the (merged) input SAM stream, grouped by readers (i.e. underlying - * individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list - * returned by this method will contain 3 elements (one for each reader), with each element being a set of library names - * found in the corresponding bam file. - * - * @return Sets of libraries present in the (merged) input SAM stream, grouped by readers - */ - public List> getLibrariesByReaders() { - - - Collection readers = getReadsDataSource().getReaderIDs(); - - List> lib_sets = new ArrayList>(readers.size()); - - for (SAMReaderID r : readers) { - - Set libs = new HashSet(2); - lib_sets.add(libs); - - for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) { - libs.add(g.getLibrary()); - } - } - - return lib_sets; - - } - - /** - * **** UNLESS YOU HAVE GOOD REASON TO, DO NOT USE THIS METHOD; USE getFileToReadGroupIdMapping() INSTEAD **** - * - * Returns sets of (remapped) read groups in input SAM stream, grouped by readers (i.e. underlying - * individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list - * returned by this method will contain 3 elements (one for each reader), with each element being a set of remapped read groups - * (i.e. as seen by read.getReadGroup().getReadGroupId() in the merged stream) that come from the corresponding bam file. - * - * @return sets of (merged) read group ids in order of input bams - */ - public List> getMergedReadGroupsByReaders() { - - - Collection readers = getReadsDataSource().getReaderIDs(); - - List> rg_sets = new ArrayList>(readers.size()); - - for (SAMReaderID r : readers) { - - Set groups = new HashSet(5); - rg_sets.add(groups); - - for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) { - if (getReadsDataSource().hasReadGroupCollisions()) { // Check if there were read group clashes with hasGroupIdDuplicates and if so: - // use HeaderMerger to translate original read group id from the reader into the read group id in the - // merged stream, and save that remapped read group id to associate it with specific reader - groups.add(getReadsDataSource().getReadGroupId(r, g.getReadGroupId())); - } else { - // otherwise, pass through the unmapped read groups since this is what Picard does as well - groups.add(g.getReadGroupId()); - } - } - } - - return rg_sets; - - } - /** * Now that all files are open, validate the sequence dictionaries of the reads vs. the reference vrs the reference ordered data (if available). * @@ -925,6 +831,18 @@ public class GenomeAnalysisEngine { return readsDataSource.getHeader(reader); } + /** + * Returns an ordered list of the unmerged SAM file headers known to this engine. + * @return list of header for each input SAM file, in command line order + */ + public List getSAMFileHeaders() { + final List headers = new ArrayList(); + for ( final SAMReaderID id : getReadsDataSource().getReaderIDs() ) { + headers.add(getReadsDataSource().getHeader(id)); + } + return headers; + } + /** * Gets the master sequence dictionary for this GATK engine instance * @return a never-null dictionary listing all of the contigs known to this engine instance @@ -943,8 +861,6 @@ public class GenomeAnalysisEngine { return this.readsDataSource; } - - /** * Sets the collection of GATK main application arguments. * diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java index 664c319ab..3168d9a6b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java @@ -32,6 +32,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; +import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.refseq.RefSeqCodec; import org.broadinstitute.sting.utils.codecs.refseq.RefSeqFeature; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; @@ -281,20 +282,14 @@ public class DepthOfCoverageWalker extends LocusWalker getSamplesFromToolKit(DoCOutputType.Partition type) { HashSet partition = new HashSet(); if ( type == DoCOutputType.Partition.sample ) { - for ( Set sampleSet : getToolkit().getSamplesByReaders() ) { - for ( String s : sampleSet ) { - partition.add(s); - } - } + partition.addAll(SampleUtils.getSAMFileSamples(getToolkit())); } else if ( type == DoCOutputType.Partition.readgroup ) { for ( SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups() ) { partition.add(rg.getSample()+"_rg_"+rg.getReadGroupId()); } } else if ( type == DoCOutputType.Partition.library ) { - for ( Set libraries : getToolkit().getLibrariesByReaders() ) { - for ( String l : libraries ) { - partition.add(l); - } + for ( SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups() ) { + partition.add(rg.getLibrary()); } } else if ( type == DoCOutputType.Partition.center ) { for ( SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups() ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java index 8bba8eac2..84cb69b07 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java @@ -392,7 +392,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker { location = getToolkit().getGenomeLocParser().createGenomeLoc(getToolkit().getSAMFileHeader().getSequence(0).getSequenceName(),1); - normalSamples = getToolkit().getSamplesByReaders().get(0); + normalSamples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeaders().get(0)); try { // we already checked that bedOutput and output_file are not set simultaneously