Moving reduced read functionality into GATKSAMRecord

-- More functions take / produce GATKSAMRecords instead of SAMRecord
This commit is contained in:
Mark DePristo 2011-10-21 13:28:05 -04:00
parent 1b01e24e23
commit b863390cb1
7 changed files with 67 additions and 36 deletions

View File

@ -393,7 +393,7 @@ public class PairHMMIndelErrorModel {
for (PileupElement p: pileup) { for (PileupElement p: pileup) {
// > 1 when the read is a consensus read representing multiple independent observations // > 1 when the read is a consensus read representing multiple independent observations
final boolean isReduced = ReadUtils.isReducedRead(p.getRead()); final boolean isReduced = p.isReducedRead();
readCounts[readIdx] = isReduced ? p.getReducedCount() : 1; readCounts[readIdx] = isReduced ? p.getReducedCount() : 1;
// check if we've already computed likelihoods for this pileup element (i.e. for this read at this location) // check if we've already computed likelihoods for this pileup element (i.e. for this read at this location)

View File

@ -4,6 +4,7 @@ import com.google.java.contract.Ensures;
import com.google.java.contract.Requires; import com.google.java.contract.Requires;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.sam.ReadUtils;
/** /**
@ -82,16 +83,16 @@ public class PileupElement {
// -------------------------------------------------------------------------- // --------------------------------------------------------------------------
public boolean isReducedRead() { public boolean isReducedRead() {
return ReadUtils.isReducedRead(getRead()); return ((GATKSAMRecord)read).isReducedRead();
} }
public int getReducedCount() { public int getReducedCount() {
if ( ! isReducedRead() ) throw new IllegalArgumentException("Cannot get reduced count for non-reduced read " + getRead().getReadName()); if ( ! isReducedRead() ) throw new IllegalArgumentException("Cannot get reduced count for non-reduced read " + getRead().getReadName());
return ReadUtils.getReducedCount(getRead(), offset); return ((GATKSAMRecord)read).getReducedCount(offset);
} }
public byte getReducedQual() { public byte getReducedQual() {
if ( ! isReducedRead() ) throw new IllegalArgumentException("Cannot get reduced qual for non-reduced read " + getRead().getReadName()); if ( ! isReducedRead() ) throw new IllegalArgumentException("Cannot get reduced qual for non-reduced read " + getRead().getReadName());
return ReadUtils.getReducedQual(getRead(), offset); return getQual();
} }
} }

View File

@ -134,6 +134,7 @@ public class ArtificialSAMUtils {
/** /**
* Create an artificial read based on the parameters. The cigar string will be *M, where * is the length of the read * Create an artificial read based on the parameters. The cigar string will be *M, where * is the length of the read
* *
*
* @param header the SAM header to associate the read with * @param header the SAM header to associate the read with
* @param name the name of the read * @param name the name of the read
* @param refIndex the reference index, i.e. what chromosome to associate it with * @param refIndex the reference index, i.e. what chromosome to associate it with
@ -142,11 +143,11 @@ public class ArtificialSAMUtils {
* *
* @return the artificial read * @return the artificial read
*/ */
public static SAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, int length ) { public static GATKSAMRecord createArtificialRead(SAMFileHeader header, String name, int refIndex, int alignmentStart, int length) {
if( (refIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart != SAMRecord.NO_ALIGNMENT_START) || if( (refIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart != SAMRecord.NO_ALIGNMENT_START) ||
(refIndex != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart == SAMRecord.NO_ALIGNMENT_START) ) (refIndex != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart == SAMRecord.NO_ALIGNMENT_START) )
throw new ReviewedStingException("Invalid alignment start for artificial read, start = " + alignmentStart); throw new ReviewedStingException("Invalid alignment start for artificial read, start = " + alignmentStart);
SAMRecord record = new GATKSAMRecord(header); GATKSAMRecord record = new GATKSAMRecord(header);
record.setReadName(name); record.setReadName(name);
record.setReferenceIndex(refIndex); record.setReferenceIndex(refIndex);
record.setAlignmentStart(alignmentStart); record.setAlignmentStart(alignmentStart);
@ -166,6 +167,7 @@ public class ArtificialSAMUtils {
if (refIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { if (refIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
record.setReadUnmappedFlag(true); record.setReadUnmappedFlag(true);
} }
return record; return record;
} }
@ -181,16 +183,17 @@ public class ArtificialSAMUtils {
* *
* @return the artificial read * @return the artificial read
*/ */
public static SAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, byte[] bases, byte[] qual ) { public static GATKSAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, byte[] bases, byte[] qual ) {
if (bases.length != qual.length) { if (bases.length != qual.length) {
throw new ReviewedStingException("Passed in read string is different length then the quality array"); throw new ReviewedStingException("Passed in read string is different length then the quality array");
} }
SAMRecord rec = createArtificialRead(header, name, refIndex, alignmentStart, bases.length); GATKSAMRecord rec = createArtificialRead(header, name, refIndex, alignmentStart, bases.length);
rec.setReadBases(bases); rec.setReadBases(bases);
rec.setBaseQualities(qual); rec.setBaseQualities(qual);
if (refIndex == -1) { if (refIndex == -1) {
rec.setReadUnmappedFlag(true); rec.setReadUnmappedFlag(true);
} }
return rec; return rec;
} }

View File

@ -45,9 +45,11 @@ public class GATKSAMRecord extends BAMRecord {
// the SAMRecord data we're caching // the SAMRecord data we're caching
private String mReadString = null; private String mReadString = null;
private GATKSAMReadGroupRecord mReadGroup = null; private GATKSAMReadGroupRecord mReadGroup = null;
private byte[] reducedReadCounts = null;
// because some values can be null, we don't want to duplicate effort // because some values can be null, we don't want to duplicate effort
private boolean retrievedReadGroup = false; private boolean retrievedReadGroup = false;
private boolean retrievedReduceReadCounts = false;
// These temporary attributes were added here to make life easier for // These temporary attributes were added here to make life easier for
// certain algorithms by providing a way to label or attach arbitrary data to // certain algorithms by providing a way to label or attach arbitrary data to
@ -60,8 +62,27 @@ public class GATKSAMRecord extends BAMRecord {
* @param header * @param header
*/ */
public GATKSAMRecord(final SAMFileHeader header) { public GATKSAMRecord(final SAMFileHeader header) {
super(header, SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX, SAMRecord.NO_ALIGNMENT_START, this(new SAMRecord(header));
(short)0, (short)255, 0, 1, 0, 1, 0, 0, 0, null); }
/**
* HACK TO CREATE GATKSAMRECORD BASED ONLY A SAMRECORD FOR TESTING PURPOSES ONLY
* @param read
*/
public GATKSAMRecord(final SAMRecord read) {
super(read.getHeader(), read.getMateReferenceIndex(),
read.getAlignmentStart(),
read.getReadName() != null ? (short)read.getReadNameLength() : 0,
(short)read.getMappingQuality(),
0,
read.getCigarLength(),
read.getFlags(),
read.getReadLength(),
read.getMateReferenceIndex(),
read.getMateAlignmentStart(),
read.getInferredInsertSize(),
new byte[]{});
super.clearAttributes();
} }
public GATKSAMRecord(final SAMFileHeader header, public GATKSAMRecord(final SAMFileHeader header,
@ -121,6 +142,29 @@ public class GATKSAMRecord extends BAMRecord {
retrievedReadGroup = true; retrievedReadGroup = true;
} }
//
//
// Reduced read functions
//
//
public byte[] getReducedReadCounts() {
if ( ! retrievedReduceReadCounts ) {
reducedReadCounts = getByteArrayAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG);
retrievedReduceReadCounts = true;
}
return reducedReadCounts;
}
public boolean isReducedRead() {
return getReducedReadCounts() != null;
}
public final byte getReducedCount(final int i) {
return getReducedReadCounts()[i];
}
/** /**
* Checks whether an attribute has been set for the given key. * Checks whether an attribute has been set for the given key.
* *

View File

@ -52,24 +52,6 @@ public class ReadUtils {
// ---------------------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------------------
public static final String REDUCED_READ_QUALITY_TAG = "RQ"; public static final String REDUCED_READ_QUALITY_TAG = "RQ";
public static final String REDUCED_READ_CONSENSUS_COUNTS_TAG = "CC";
public final static byte[] getReducedReadQualityTagValue(final SAMRecord read) {
// TODO -- warning of performance problem. Should be cached in GATKSAMRecord
return read.getByteArrayAttribute(ReadUtils.REDUCED_READ_QUALITY_TAG);
}
public final static boolean isReducedRead(final SAMRecord read) {
return getReducedReadQualityTagValue(read) != null;
}
public final static byte getReducedQual(final SAMRecord read, final int i) {
return read.getBaseQualities()[i];
}
public final static byte getReducedCount(final SAMRecord read, final int i) {
return getReducedReadQualityTagValue(read)[i];
}
// ---------------------------------------------------------------------------------------------------- // ----------------------------------------------------------------------------------------------------
// //

View File

@ -5,6 +5,7 @@ import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.testng.Assert; import org.testng.Assert;
import org.testng.annotations.BeforeTest; import org.testng.annotations.BeforeTest;
@ -12,7 +13,7 @@ import org.testng.annotations.Test;
public class ReadUtilsUnitTest extends BaseTest { public class ReadUtilsUnitTest extends BaseTest {
SAMRecord read, reducedRead; GATKSAMRecord read, reducedRead;
final static String BASES = "ACTG"; final static String BASES = "ACTG";
final static String QUALS = "!+5?"; final static String QUALS = "!+5?";
final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40}; final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40};
@ -47,13 +48,12 @@ public class ReadUtilsUnitTest extends BaseTest {
@Test @Test
public void testReducedReads() { public void testReducedReads() {
Assert.assertFalse(ReadUtils.isReducedRead(read), "isReducedRead is false for normal read"); Assert.assertFalse(read.isReducedRead(), "isReducedRead is false for normal read");
Assert.assertEquals(ReadUtils.getReducedReadQualityTagValue(read), null, "No reduced read tag in normal read"); Assert.assertEquals(read.getReducedReadCounts(), null, "No reduced read tag in normal read");
Assert.assertTrue(ReadUtils.isReducedRead(reducedRead), "isReducedRead is true for reduced read"); Assert.assertTrue(reducedRead.isReducedRead(), "isReducedRead is true for reduced read");
for ( int i = 0; i < reducedRead.getReadLength(); i++) { for ( int i = 0; i < reducedRead.getReadLength(); i++) {
Assert.assertEquals(ReadUtils.getReducedQual(reducedRead, i), read.getBaseQualities()[i], "Reduced read quality not set to the expected value at " + i); Assert.assertEquals(reducedRead.getReducedCount(i), REDUCED_READ_COUNTS[i], "Reduced read count not set to the expected value at " + i);
Assert.assertEquals(ReadUtils.getReducedCount(reducedRead, i), REDUCED_READ_COUNTS[i], "Reduced read count not set to the expected value at " + i);
} }
} }

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.utils; package org.broadinstitute.sting.utils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.testng.Assert; import org.testng.Assert;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
@ -28,7 +29,7 @@ public class ReservoirDownsamplerUnitTest {
@Test @Test
public void testOneElementWithPoolSizeOne() { public void testOneElementWithPoolSizeOne() {
List<SAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76)); List<GATKSAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(1); ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(1);
downsampler.addAll(reads); downsampler.addAll(reads);
@ -40,7 +41,7 @@ public class ReservoirDownsamplerUnitTest {
@Test @Test
public void testOneElementWithPoolSizeGreaterThanOne() { public void testOneElementWithPoolSizeGreaterThanOne() {
List<SAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76)); List<GATKSAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(5); ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(5);
downsampler.addAll(reads); downsampler.addAll(reads);