Optimize creation of reads in ArtificialBAMBuilder

-- Now caches the reads so subsequent calls to makeReads() don't reallocate the reads from scratch each time
This commit is contained in:
Mark DePristo 2013-01-18 16:52:49 -05:00
parent e050f649fd
commit ce160931d5
1 changed files with 32 additions and 20 deletions

View File

@ -62,6 +62,7 @@ public class ArtificialBAMBuilder {
int alignmentStart = 1; int alignmentStart = 1;
int readLength = 10; int readLength = 10;
private final ArrayList<String> samples = new ArrayList<String>(); private final ArrayList<String> samples = new ArrayList<String>();
private List<GATKSAMRecord> createdReads = null;
private LinkedList<GATKSAMRecord> additionalReads = new LinkedList<GATKSAMRecord>(); private LinkedList<GATKSAMRecord> additionalReads = new LinkedList<GATKSAMRecord>();
@ -102,6 +103,7 @@ public class ArtificialBAMBuilder {
} }
public ArtificialBAMBuilder createAndSetHeader(final int nSamples) { public ArtificialBAMBuilder createAndSetHeader(final int nSamples) {
createdReads = null;
this.header = new SAMFileHeader(); this.header = new SAMFileHeader();
header.setSortOrder(SAMFileHeader.SortOrder.coordinate); header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
header.setSequenceDictionary(parser.getContigs()); header.setSequenceDictionary(parser.getContigs());
@ -120,10 +122,12 @@ public class ArtificialBAMBuilder {
} }
public void addReads(final GATKSAMRecord readToAdd) { public void addReads(final GATKSAMRecord readToAdd) {
createdReads = null;
additionalReads.add(readToAdd); additionalReads.add(readToAdd);
} }
public void addReads(final Collection<GATKSAMRecord> readsToAdd) { public void addReads(final Collection<GATKSAMRecord> readsToAdd) {
createdReads = null;
additionalReads.addAll(readsToAdd); additionalReads.addAll(readsToAdd);
} }
@ -140,26 +144,34 @@ public class ArtificialBAMBuilder {
* @return a ordered list of reads * @return a ordered list of reads
*/ */
public List<GATKSAMRecord> makeReads() { public List<GATKSAMRecord> makeReads() {
final String baseName = "read"; if ( createdReads == null ) {
List<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>(nReadsPerLocus*nLoci); final String baseName = "read";
for ( int locusI = 0; locusI < nLoci; locusI++) { final LinkedList<GATKSAMReadGroupRecord> readGroups = new LinkedList<GATKSAMReadGroupRecord>();
final int locus = locusI * (skipNLoci + 1); for ( final SAMReadGroupRecord rg : header.getReadGroups())
for ( int readI = 0; readI < nReadsPerLocus; readI++ ) { readGroups.add(new GATKSAMReadGroupRecord(rg));
for ( final SAMReadGroupRecord rg : header.getReadGroups() ) {
final String readName = String.format("%s.%d.%d.%s", baseName, locus, readI, rg.getId()); List<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>(nReadsPerLocus*nLoci);
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, readName, 0, alignmentStart + locus, readLength); for ( int locusI = 0; locusI < nLoci; locusI++) {
read.setReadGroup(new GATKSAMReadGroupRecord(rg)); final int locus = locusI * (skipNLoci + 1);
reads.add(read); for ( int readI = 0; readI < nReadsPerLocus; readI++ ) {
for ( final GATKSAMReadGroupRecord rg : readGroups ) {
final String readName = String.format("%s.%d.%d.%s", baseName, locus, readI, rg.getId());
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, readName, 0, alignmentStart + locus, readLength);
read.setReadGroup(rg);
reads.add(read);
}
} }
} }
if ( ! additionalReads.isEmpty() ) {
reads.addAll(additionalReads);
Collections.sort(reads, new SAMRecordCoordinateComparator());
}
createdReads = new ArrayList<GATKSAMRecord>(reads);
} }
if ( ! additionalReads.isEmpty() ) { return createdReads;
reads.addAll(additionalReads);
Collections.sort(reads, new SAMRecordCoordinateComparator());
}
return reads;
} }
/** /**
@ -192,13 +204,13 @@ public class ArtificialBAMBuilder {
public int getnReadsPerLocus() { return nReadsPerLocus; } public int getnReadsPerLocus() { return nReadsPerLocus; }
public int getnLoci() { return nLoci; } public int getnLoci() { return nLoci; }
public int getSkipNLoci() { return skipNLoci; } public int getSkipNLoci() { return skipNLoci; }
public ArtificialBAMBuilder setSkipNLoci(int skipNLoci) { this.skipNLoci = skipNLoci; return this; } public ArtificialBAMBuilder setSkipNLoci(int skipNLoci) { this.skipNLoci = skipNLoci; createdReads = null; return this; }
public int getAlignmentStart() { return alignmentStart; } public int getAlignmentStart() { return alignmentStart; }
public ArtificialBAMBuilder setAlignmentStart(int alignmentStart) { this.alignmentStart = alignmentStart; return this; } public ArtificialBAMBuilder setAlignmentStart(int alignmentStart) { this.alignmentStart = alignmentStart; createdReads = null; return this; }
public int getReadLength() { return readLength; } public int getReadLength() { return readLength; }
public ArtificialBAMBuilder setReadLength(int readLength) { this.readLength = readLength; return this; } public ArtificialBAMBuilder setReadLength(int readLength) { this.readLength = readLength; createdReads = null; return this; }
public SAMFileHeader getHeader() { return header; } public SAMFileHeader getHeader() { return header; }
public ArtificialBAMBuilder setHeader(SAMFileHeader header) { this.header = header; return this; } public ArtificialBAMBuilder setHeader(SAMFileHeader header) { this.header = header; createdReads = null; return this; }
public int getAlignmentEnd() { public int getAlignmentEnd() {
return alignmentStart + nLoci * (skipNLoci + 1) + readLength; return alignmentStart + nLoci * (skipNLoci + 1) + readLength;