Moving reduced read functionality into GATKSAMRecord

-- More functions take / produce GATKSAMRecords instead of SAMRecord
This commit is contained in:
Mark DePristo 2011-10-21 13:28:05 -04:00
parent 1b01e24e23
commit b863390cb1
7 changed files with 67 additions and 36 deletions

View File

@ -393,7 +393,7 @@ public class PairHMMIndelErrorModel {
for (PileupElement p: pileup) {
// > 1 when the read is a consensus read representing multiple independent observations
final boolean isReduced = ReadUtils.isReducedRead(p.getRead());
final boolean isReduced = p.isReducedRead();
readCounts[readIdx] = isReduced ? p.getReducedCount() : 1;
// check if we've already computed likelihoods for this pileup element (i.e. for this read at this location)

View File

@ -4,6 +4,7 @@ import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
/**
@ -82,16 +83,16 @@ public class PileupElement {
// --------------------------------------------------------------------------
public boolean isReducedRead() {
return ReadUtils.isReducedRead(getRead());
return ((GATKSAMRecord)read).isReducedRead();
}
public int getReducedCount() {
if ( ! isReducedRead() ) throw new IllegalArgumentException("Cannot get reduced count for non-reduced read " + getRead().getReadName());
return ReadUtils.getReducedCount(getRead(), offset);
return ((GATKSAMRecord)read).getReducedCount(offset);
}
public byte getReducedQual() {
if ( ! isReducedRead() ) throw new IllegalArgumentException("Cannot get reduced qual for non-reduced read " + getRead().getReadName());
return ReadUtils.getReducedQual(getRead(), offset);
return getQual();
}
}

View File

@ -134,6 +134,7 @@ public class ArtificialSAMUtils {
/**
* Create an artificial read based on the parameters. The cigar string will be *M, where * is the length of the read
*
*
* @param header the SAM header to associate the read with
* @param name the name of the read
* @param refIndex the reference index, i.e. what chromosome to associate it with
@ -142,11 +143,11 @@ public class ArtificialSAMUtils {
*
* @return the artificial read
*/
public static SAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, int length ) {
public static GATKSAMRecord createArtificialRead(SAMFileHeader header, String name, int refIndex, int alignmentStart, int length) {
if( (refIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart != SAMRecord.NO_ALIGNMENT_START) ||
(refIndex != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart == SAMRecord.NO_ALIGNMENT_START) )
throw new ReviewedStingException("Invalid alignment start for artificial read, start = " + alignmentStart);
SAMRecord record = new GATKSAMRecord(header);
GATKSAMRecord record = new GATKSAMRecord(header);
record.setReadName(name);
record.setReferenceIndex(refIndex);
record.setAlignmentStart(alignmentStart);
@ -166,6 +167,7 @@ public class ArtificialSAMUtils {
if (refIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
record.setReadUnmappedFlag(true);
}
return record;
}
@ -181,16 +183,17 @@ public class ArtificialSAMUtils {
*
* @return the artificial read
*/
public static SAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, byte[] bases, byte[] qual ) {
public static GATKSAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, byte[] bases, byte[] qual ) {
if (bases.length != qual.length) {
throw new ReviewedStingException("Passed in read string is different length then the quality array");
}
SAMRecord rec = createArtificialRead(header, name, refIndex, alignmentStart, bases.length);
GATKSAMRecord rec = createArtificialRead(header, name, refIndex, alignmentStart, bases.length);
rec.setReadBases(bases);
rec.setBaseQualities(qual);
if (refIndex == -1) {
rec.setReadUnmappedFlag(true);
}
return rec;
}

View File

@ -45,9 +45,11 @@ public class GATKSAMRecord extends BAMRecord {
// the SAMRecord data we're caching
private String mReadString = null;
private GATKSAMReadGroupRecord mReadGroup = null;
private byte[] reducedReadCounts = null;
// because some values can be null, we don't want to duplicate effort
private boolean retrievedReadGroup = false;
private boolean retrievedReduceReadCounts = false;
// These temporary attributes were added here to make life easier for
// certain algorithms by providing a way to label or attach arbitrary data to
@ -60,8 +62,27 @@ public class GATKSAMRecord extends BAMRecord {
* @param header
*/
public GATKSAMRecord(final SAMFileHeader header) {
super(header, SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX, SAMRecord.NO_ALIGNMENT_START,
(short)0, (short)255, 0, 1, 0, 1, 0, 0, 0, null);
this(new SAMRecord(header));
}
/**
* HACK TO CREATE GATKSAMRECORD BASED ONLY A SAMRECORD FOR TESTING PURPOSES ONLY
* @param read
*/
public GATKSAMRecord(final SAMRecord read) {
super(read.getHeader(), read.getMateReferenceIndex(),
read.getAlignmentStart(),
read.getReadName() != null ? (short)read.getReadNameLength() : 0,
(short)read.getMappingQuality(),
0,
read.getCigarLength(),
read.getFlags(),
read.getReadLength(),
read.getMateReferenceIndex(),
read.getMateAlignmentStart(),
read.getInferredInsertSize(),
new byte[]{});
super.clearAttributes();
}
public GATKSAMRecord(final SAMFileHeader header,
@ -121,6 +142,29 @@ public class GATKSAMRecord extends BAMRecord {
retrievedReadGroup = true;
}
//
//
// Reduced read functions
//
//
public byte[] getReducedReadCounts() {
if ( ! retrievedReduceReadCounts ) {
reducedReadCounts = getByteArrayAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG);
retrievedReduceReadCounts = true;
}
return reducedReadCounts;
}
public boolean isReducedRead() {
return getReducedReadCounts() != null;
}
public final byte getReducedCount(final int i) {
return getReducedReadCounts()[i];
}
/**
* Checks whether an attribute has been set for the given key.
*

View File

@ -52,24 +52,6 @@ public class ReadUtils {
// ----------------------------------------------------------------------------------------------------
public static final String REDUCED_READ_QUALITY_TAG = "RQ";
public static final String REDUCED_READ_CONSENSUS_COUNTS_TAG = "CC";
public final static byte[] getReducedReadQualityTagValue(final SAMRecord read) {
// TODO -- warning of performance problem. Should be cached in GATKSAMRecord
return read.getByteArrayAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG);
}
public final static boolean isReducedRead(final SAMRecord read) {
return getReducedReadQualityTagValue(read) != null;
}
public final static byte getReducedQual(final SAMRecord read, final int i) {
return read.getBaseQualities()[i];
}
public final static byte getReducedCount(final SAMRecord read, final int i) {
return getReducedReadQualityTagValue(read)[i];
}
// ----------------------------------------------------------------------------------------------------
//

View File

@ -5,6 +5,7 @@ import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeTest;
@ -12,7 +13,7 @@ import org.testng.annotations.Test;
public class ReadUtilsUnitTest extends BaseTest {
SAMRecord read, reducedRead;
GATKSAMRecord read, reducedRead;
final static String BASES = "ACTG";
final static String QUALS = "!+5?";
final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40};
@ -47,13 +48,12 @@ public class ReadUtilsUnitTest extends BaseTest {
@Test
public void testReducedReads() {
Assert.assertFalse(ReadUtils.isReducedRead(read), "isReducedRead is false for normal read");
Assert.assertEquals(ReadUtils.getReducedReadQualityTagValue(read), null, "No reduced read tag in normal read");
Assert.assertFalse(read.isReducedRead(), "isReducedRead is false for normal read");
Assert.assertEquals(read.getReducedReadCounts(), null, "No reduced read tag in normal read");
Assert.assertTrue(ReadUtils.isReducedRead(reducedRead), "isReducedRead is true for reduced read");
Assert.assertTrue(reducedRead.isReducedRead(), "isReducedRead is true for reduced read");
for ( int i = 0; i < reducedRead.getReadLength(); i++) {
Assert.assertEquals(ReadUtils.getReducedQual(reducedRead, i), read.getBaseQualities()[i], "Reduced read quality not set to the expected value at " + i);
Assert.assertEquals(ReadUtils.getReducedCount(reducedRead, i), REDUCED_READ_COUNTS[i], "Reduced read count not set to the expected value at " + i);
Assert.assertEquals(reducedRead.getReducedCount(i), REDUCED_READ_COUNTS[i], "Reduced read count not set to the expected value at " + i);
}
}

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.utils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert;
import org.testng.annotations.Test;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
@ -28,7 +29,7 @@ public class ReservoirDownsamplerUnitTest {
@Test
public void testOneElementWithPoolSizeOne() {
List<SAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
List<GATKSAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(1);
downsampler.addAll(reads);
@ -40,7 +41,7 @@ public class ReservoirDownsamplerUnitTest {
@Test
public void testOneElementWithPoolSizeGreaterThanOne() {
List<SAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
List<GATKSAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(5);
downsampler.addAll(reads);