Merge pull request #47 from broadinstitute/aw_reduceread_perf_1_GSA-761

Reduce memory footprint of SyntheticRead by replacing several Lists with...
This commit is contained in:
Eric Banks 2013-02-20 04:49:07 -08:00
commit 551d33686c
2 changed files with 101 additions and 45 deletions

View File

@ -69,6 +69,12 @@ public enum BaseIndex {
public byte getByte() { return b; } public byte getByte() { return b; }
/**
* Ordinal is stored in SyntheticRead rather than enum to save object reference, and store as byte for compactness.
* It is stored as byte, and this method merely eliminates a cast.
*/
public byte getOrdinalByte() { return (byte)ordinal(); }
private BaseIndex(char base, int index) { private BaseIndex(char base, int index) {
this.b = (byte)base; this.b = (byte)base;
this.index = index; this.index = index;

View File

@ -76,11 +76,54 @@ import java.util.List;
* @since 8/26/11 * @since 8/26/11
*/ */
public class SyntheticRead { public class SyntheticRead {
private List<BaseIndex> bases; // Rather than storing a separate list for each attribute in SingleBaseInfo, store one list to reduce
private List<Byte> counts; // memory footprint.
private List<Byte> quals; // TODO: better name
private List<Byte> insertionQuals; private static class SingleBaseInfo {
private List<Byte> deletionQuals; byte baseIndexOrdinal; // enum BaseIndex.ordinal
byte count;
byte qual;
byte insertionQual;
byte deletionQual;
SingleBaseInfo(byte baseIndexOrdinal, byte count, byte qual, byte insertionQual, byte deletionQual) {
this.baseIndexOrdinal = baseIndexOrdinal;
this.count = count;
this.qual = qual;
this.insertionQual = insertionQual;
this.deletionQual = deletionQual;
}
}
// This class is merely sharing of code for convertVariableGivenBases().
private abstract class SingleBaseInfoIterator implements Iterator<Byte> {
final Iterator<SingleBaseInfo> it;
SingleBaseInfoIterator() {
this.it = basesCountsQuals.iterator();
}
public boolean hasNext() {
return it.hasNext();
}
public void remove() {
throw new UnsupportedOperationException();
}
}
// Map from ordinal to enum value.
private static final BaseIndex[] BaseIndexByOrdinal = new BaseIndex[BaseIndex.values().length];
static
{
for (final BaseIndex baseIndex : BaseIndex.values()) {
BaseIndexByOrdinal[baseIndex.ordinal()] = baseIndex;
}
}
private final List<SingleBaseInfo> basesCountsQuals;
private double mappingQuality; // the average of the rms of the mapping qualities of all the reads that contributed to this consensus private double mappingQuality; // the average of the rms of the mapping qualities of all the reads that contributed to this consensus
private String readTag; private String readTag;
@ -108,11 +151,7 @@ public class SyntheticRead {
*/ */
public SyntheticRead(SAMFileHeader header, GATKSAMReadGroupRecord readGroupRecord, String contig, int contigIndex, String readName, int refStart, String readTag, boolean hasIndelQualities, boolean isNegativeRead) { public SyntheticRead(SAMFileHeader header, GATKSAMReadGroupRecord readGroupRecord, String contig, int contigIndex, String readName, int refStart, String readTag, boolean hasIndelQualities, boolean isNegativeRead) {
final int initialCapacity = 10000; final int initialCapacity = 10000;
bases = new ArrayList<BaseIndex>(initialCapacity); basesCountsQuals = new ArrayList<SingleBaseInfo>(initialCapacity);
counts = new ArrayList<Byte>(initialCapacity);
quals = new ArrayList<Byte>(initialCapacity);
insertionQuals = new ArrayList<Byte>(initialCapacity);
deletionQuals = new ArrayList<Byte>(initialCapacity);
mappingQuality = 0.0; mappingQuality = 0.0;
this.readTag = readTag; this.readTag = readTag;
@ -127,11 +166,10 @@ public class SyntheticRead {
} }
public SyntheticRead(List<BaseIndex> bases, List<Byte> counts, List<Byte> quals, List<Byte> insertionQuals, List<Byte> deletionQuals, double mappingQuality, String readTag, SAMFileHeader header, GATKSAMReadGroupRecord readGroupRecord, String contig, int contigIndex, String readName, int refStart, boolean hasIndelQualities, boolean isNegativeRead) { public SyntheticRead(List<BaseIndex> bases, List<Byte> counts, List<Byte> quals, List<Byte> insertionQuals, List<Byte> deletionQuals, double mappingQuality, String readTag, SAMFileHeader header, GATKSAMReadGroupRecord readGroupRecord, String contig, int contigIndex, String readName, int refStart, boolean hasIndelQualities, boolean isNegativeRead) {
this.bases = bases; basesCountsQuals = new ArrayList<SingleBaseInfo>(bases.size());
this.counts = counts; for (int i = 0; i < bases.size(); ++i) {
this.quals = quals; basesCountsQuals.add(new SingleBaseInfo(bases.get(i).getOrdinalByte(), counts.get(i), quals.get(i), insertionQuals.get(i), deletionQuals.get(i)));
this.insertionQuals = insertionQuals; }
this.deletionQuals = deletionQuals;
this.mappingQuality = mappingQuality; this.mappingQuality = mappingQuality;
this.readTag = readTag; this.readTag = readTag;
this.header = header; this.header = header;
@ -153,16 +191,12 @@ public class SyntheticRead {
*/ */
@Requires("count <= Byte.MAX_VALUE") @Requires("count <= Byte.MAX_VALUE")
public void add(BaseIndex base, byte count, byte qual, byte insQual, byte delQual, double mappingQuality) { public void add(BaseIndex base, byte count, byte qual, byte insQual, byte delQual, double mappingQuality) {
counts.add(count); basesCountsQuals.add(new SingleBaseInfo(base.getOrdinalByte(), count, qual, insQual, delQual));
bases.add(base);
quals.add(qual);
insertionQuals.add(insQual);
deletionQuals.add(delQual);
this.mappingQuality += mappingQuality; this.mappingQuality += mappingQuality;
} }
public BaseIndex getBase(final int readCoordinate) { public BaseIndex getBase(final int readCoordinate) {
return bases.get(readCoordinate); return BaseIndexByOrdinal[basesCountsQuals.get(readCoordinate).baseIndexOrdinal];
} }
public int getRefStart() { public int getRefStart() {
@ -192,7 +226,7 @@ public class SyntheticRead {
read.setReadName(readName); read.setReadName(readName);
read.setBaseQualities(convertBaseQualities(), EventType.BASE_SUBSTITUTION); read.setBaseQualities(convertBaseQualities(), EventType.BASE_SUBSTITUTION);
read.setReadBases(convertReadBases()); read.setReadBases(convertReadBases());
read.setMappingQuality((int) Math.ceil(mappingQuality / bases.size())); read.setMappingQuality((int) Math.ceil(mappingQuality / basesCountsQuals.size()));
read.setReadGroup(readGroupRecord); read.setReadGroup(readGroupRecord);
read.setAttribute(readTag, convertBaseCounts()); read.setAttribute(readTag, convertBaseCounts());
@ -210,30 +244,46 @@ public class SyntheticRead {
* @return true if it is, false if it isn't. * @return true if it is, false if it isn't.
*/ */
private boolean isAllDeletions() { private boolean isAllDeletions() {
for (BaseIndex b : bases) for (SingleBaseInfo b : basesCountsQuals)
if (b != BaseIndex.D) if (b.baseIndexOrdinal != BaseIndex.D.getOrdinalByte())
return false; return false;
return true; return true;
} }
public int size () { public int size () {
return bases.size(); return basesCountsQuals.size();
} }
private byte [] convertBaseQualities() { private byte [] convertBaseQualities() {
return convertVariableGivenBases(bases, quals); return convertVariableGivenBases(new SingleBaseInfoIterator() {
public Byte next() {
return it.next().qual;
}
});
} }
private byte [] convertInsertionQualities() { private byte [] convertInsertionQualities() {
return convertVariableGivenBases(bases, insertionQuals); return convertVariableGivenBases(new SingleBaseInfoIterator() {
public Byte next() {
return it.next().insertionQual;
}
});
} }
private byte [] convertDeletionQualities() { private byte [] convertDeletionQualities() {
return convertVariableGivenBases(bases, deletionQuals); return convertVariableGivenBases(new SingleBaseInfoIterator() {
public Byte next() {
return it.next().deletionQual;
}
});
} }
protected byte [] convertBaseCounts() { protected byte [] convertBaseCounts() {
byte[] countsArray = convertVariableGivenBases(bases, counts); byte[] countsArray = convertVariableGivenBases(new SingleBaseInfoIterator() {
public Byte next() {
return it.next().count;
}
});
if (countsArray.length == 0) if (countsArray.length == 0)
throw new ReviewedStingException("Reduced read has counts array of length 0"); throw new ReviewedStingException("Reduced read has counts array of length 0");
@ -247,12 +297,14 @@ public class SyntheticRead {
} }
private byte [] convertReadBases() { private byte [] convertReadBases() {
byte [] readArray = new byte[getReadLengthWithNoDeletions(bases)]; byte [] readArray = new byte[getReadLengthWithNoDeletions()];
int i = 0; int i = 0;
for (BaseIndex baseIndex : bases) for (final SingleBaseInfo singleBaseInfo : basesCountsQuals) {
final BaseIndex baseIndex = BaseIndexByOrdinal[singleBaseInfo.baseIndexOrdinal];
if (baseIndex != BaseIndex.D) if (baseIndex != BaseIndex.D)
readArray[i++] = baseIndex.getByte(); readArray[i++] = baseIndex.getByte();
}
return readArray; return readArray;
} }
@ -267,7 +319,8 @@ public class SyntheticRead {
LinkedList<CigarElement> cigarElements = new LinkedList<CigarElement>(); LinkedList<CigarElement> cigarElements = new LinkedList<CigarElement>();
CigarOperator cigarOperator = null; CigarOperator cigarOperator = null;
int length = 0; int length = 0;
for (BaseIndex b : bases) { for (final SingleBaseInfo singleBaseInfo : basesCountsQuals) {
final BaseIndex b = BaseIndexByOrdinal[singleBaseInfo.baseIndexOrdinal];
CigarOperator op; CigarOperator op;
switch (b) { switch (b) {
case D: case D:
@ -303,18 +356,16 @@ public class SyntheticRead {
/** /**
* Shared functionality for all conversion utilities * Shared functionality for all conversion utilities
* *
* @param bases the read bases * @param variableIterator the list to convert
* @param variable the list to convert
* @return a converted variable given the bases and skipping deletions * @return a converted variable given the bases and skipping deletions
*/ */
private static byte [] convertVariableGivenBases (List<BaseIndex> bases, List<Byte> variable) { private byte [] convertVariableGivenBases (Iterator<Byte> variableIterator) {
byte [] variableArray = new byte[getReadLengthWithNoDeletions(bases)]; byte [] variableArray = new byte[getReadLengthWithNoDeletions()];
int i = 0; int i = 0;
Iterator<Byte> variableIterator = variable.iterator(); for (final SingleBaseInfo singleBaseInfo : basesCountsQuals) {
for (BaseIndex baseIndex : bases) {
byte count = variableIterator.next(); byte count = variableIterator.next();
if (baseIndex != BaseIndex.D) if (singleBaseInfo.baseIndexOrdinal != BaseIndex.D.getOrdinalByte())
variableArray[i++] = count; variableArray[i++] = count;
} }
return variableArray; return variableArray;
@ -324,13 +375,12 @@ public class SyntheticRead {
/** /**
* Shared functionality for all conversion utilities * Shared functionality for all conversion utilities
* *
* @param bases the read bases
* @return the length of the read with no deletions * @return the length of the read with no deletions
*/ */
private static int getReadLengthWithNoDeletions(List<BaseIndex> bases) { private int getReadLengthWithNoDeletions() {
int readLength = bases.size(); int readLength = basesCountsQuals.size();
for (BaseIndex baseIndex : bases) for (final SingleBaseInfo singleBaseInfo : basesCountsQuals)
if (baseIndex == BaseIndex.D) if (singleBaseInfo.baseIndexOrdinal == BaseIndex.D.getOrdinalByte())
readLength--; readLength--;
return readLength; return readLength;
} }