Removed getXByReaders() function from the engine

-- These could be simplied in their downstream uses
-- Or they could be replaced with a generic getSAMFileHeaders() function and then apply the getSamples(header) as desired downstream
This commit is contained in:
Mark DePristo 2011-09-30 10:43:51 -04:00
parent 178ba24c27
commit 810e8ad011
3 changed files with 17 additions and 106 deletions

View File

@ -712,100 +712,6 @@ public class GenomeAnalysisEngine {
return getReadsDataSource().getSAMFile(id);
}
/**
* Returns sets of samples present in the (merged) input SAM stream, grouped by readers (i.e. underlying
* individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list
* returned by this method will contain 3 elements (one for each reader), with each element being a set of sample names
* found in the corresponding bam file.
*
* @return Sets of samples in the merged input SAM stream, grouped by readers
*/
public List<Set<String>> getSamplesByReaders() {
Collection<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
List<Set<String>> sample_sets = new ArrayList<Set<String>>(readers.size());
for (SAMReaderID r : readers) {
Set<String> samples = new HashSet<String>(1);
sample_sets.add(samples);
for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) {
samples.add(g.getSample());
}
}
return sample_sets;
}
/**
* Returns sets of libraries present in the (merged) input SAM stream, grouped by readers (i.e. underlying
* individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list
* returned by this method will contain 3 elements (one for each reader), with each element being a set of library names
* found in the corresponding bam file.
*
* @return Sets of libraries present in the (merged) input SAM stream, grouped by readers
*/
public List<Set<String>> getLibrariesByReaders() {
Collection<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
List<Set<String>> lib_sets = new ArrayList<Set<String>>(readers.size());
for (SAMReaderID r : readers) {
Set<String> libs = new HashSet<String>(2);
lib_sets.add(libs);
for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) {
libs.add(g.getLibrary());
}
}
return lib_sets;
}
/**
* **** UNLESS YOU HAVE GOOD REASON TO, DO NOT USE THIS METHOD; USE getFileToReadGroupIdMapping() INSTEAD ****
*
* Returns sets of (remapped) read groups in input SAM stream, grouped by readers (i.e. underlying
* individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list
* returned by this method will contain 3 elements (one for each reader), with each element being a set of remapped read groups
* (i.e. as seen by read.getReadGroup().getReadGroupId() in the merged stream) that come from the corresponding bam file.
*
* @return sets of (merged) read group ids in order of input bams
*/
public List<Set<String>> getMergedReadGroupsByReaders() {
Collection<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
List<Set<String>> rg_sets = new ArrayList<Set<String>>(readers.size());
for (SAMReaderID r : readers) {
Set<String> groups = new HashSet<String>(5);
rg_sets.add(groups);
for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) {
if (getReadsDataSource().hasReadGroupCollisions()) { // Check if there were read group clashes with hasGroupIdDuplicates and if so:
// use HeaderMerger to translate original read group id from the reader into the read group id in the
// merged stream, and save that remapped read group id to associate it with specific reader
groups.add(getReadsDataSource().getReadGroupId(r, g.getReadGroupId()));
} else {
// otherwise, pass through the unmapped read groups since this is what Picard does as well
groups.add(g.getReadGroupId());
}
}
}
return rg_sets;
}
/**
* Now that all files are open, validate the sequence dictionaries of the reads vs. the reference vrs the reference ordered data (if available).
*
@ -925,6 +831,18 @@ public class GenomeAnalysisEngine {
return readsDataSource.getHeader(reader);
}
/**
* Returns an ordered list of the unmerged SAM file headers known to this engine.
* @return list of header for each input SAM file, in command line order
*/
public List<SAMFileHeader> getSAMFileHeaders() {
final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>();
for ( final SAMReaderID id : getReadsDataSource().getReaderIDs() ) {
headers.add(getReadsDataSource().getHeader(id));
}
return headers;
}
/**
* Gets the master sequence dictionary for this GATK engine instance
* @return a never-null dictionary listing all of the contigs known to this engine instance
@ -943,8 +861,6 @@ public class GenomeAnalysisEngine {
return this.readsDataSource;
}
/**
* Sets the collection of GATK main application arguments.
*

View File

@ -32,6 +32,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqCodec;
import org.broadinstitute.sting.utils.codecs.refseq.RefSeqFeature;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
@ -281,20 +282,14 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<DoCOutputType.Partiti
private HashSet<String> getSamplesFromToolKit(DoCOutputType.Partition type) {
HashSet<String> partition = new HashSet<String>();
if ( type == DoCOutputType.Partition.sample ) {
for ( Set<String> sampleSet : getToolkit().getSamplesByReaders() ) {
for ( String s : sampleSet ) {
partition.add(s);
}
}
partition.addAll(SampleUtils.getSAMFileSamples(getToolkit()));
} else if ( type == DoCOutputType.Partition.readgroup ) {
for ( SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups() ) {
partition.add(rg.getSample()+"_rg_"+rg.getReadGroupId());
}
} else if ( type == DoCOutputType.Partition.library ) {
for ( Set<String> libraries : getToolkit().getLibrariesByReaders() ) {
for ( String l : libraries ) {
partition.add(l);
}
for ( SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups() ) {
partition.add(rg.getLibrary());
}
} else if ( type == DoCOutputType.Partition.center ) {
for ( SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups() ) {

View File

@ -392,7 +392,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
location = getToolkit().getGenomeLocParser().createGenomeLoc(getToolkit().getSAMFileHeader().getSequence(0).getSequenceName(),1);
normalSamples = getToolkit().getSamplesByReaders().get(0);
normalSamples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeaders().get(0));
try {
// we already checked that bedOutput and output_file are not set simultaneously