The GATK engine now ensures that incoming GATKSAMRecords have GATKSAMReadGroupRecord objects in their header

-- Update SAMDataSource so that the merged header contains GATKSAMReadGroupRecord
-- Now getting the NGSPlatform for a GATKSAMRecord is actually efficient, instead of computing the NGS platform over and over from the PL string
-- Updated a few places in the code where the input argument is actually a GATKSAMRecord, not a SAMRecord for type safety
This commit is contained in:
Mark DePristo 2012-12-18 15:46:34 -05:00
parent c8cd6ac465
commit f6d5499582
9 changed files with 88 additions and 88 deletions

View File

@ -44,6 +44,7 @@ import org.broadinstitute.sting.utils.SimpleTimer;
import org.broadinstitute.sting.utils.baq.ReadTransformingIterator;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory;
import java.io.File;
@ -894,9 +895,11 @@ public class SAMDataSource {
long lastTick = timer.currentTime();
for(final SAMReaderID readerID: readerIDs) {
final ReaderInitializer init = new ReaderInitializer(readerID).call();
if (removeProgramRecords) {
init.reader.getFileHeader().setProgramRecords(new ArrayList<SAMProgramRecord>());
}
if (threadAllocation.getNumIOThreads() > 0) {
inputStreams.put(init.readerID, init.blockInputStream); // get from initializer
}
@ -916,6 +919,13 @@ public class SAMDataSource {
for(SAMFileReader reader: readers.values())
headers.add(reader.getFileHeader());
headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,headers,true);
// update all read groups to GATKSAMRecordReadGroups
final List<SAMReadGroupRecord> gatkReadGroups = new LinkedList<SAMReadGroupRecord>();
for ( final SAMReadGroupRecord rg : headerMerger.getMergedHeader().getReadGroups() ) {
gatkReadGroups.add(new GATKSAMReadGroupRecord(rg));
}
headerMerger.getMergedHeader().setReadGroups(gatkReadGroups);
}
final private void printReaderPerformance(final int nExecutedTotal,

View File

@ -26,6 +26,7 @@
package org.broadinstitute.sting.gatk.filters;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
/**
@ -37,6 +38,6 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
public class Platform454Filter extends ReadFilter {
public boolean filterOut(SAMRecord rec) {
return (ReadUtils.is454Read(rec));
return (ReadUtils.is454Read((GATKSAMRecord)rec));
}
}

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.filters;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
/**
@ -41,7 +42,7 @@ public class PlatformFilter extends ReadFilter {
public boolean filterOut(SAMRecord rec) {
for ( String name : PLFilterNames )
if ( ReadUtils.isPlatformRead(rec, name.toUpperCase() ))
if ( ReadUtils.isPlatformRead((GATKSAMRecord)rec, name.toUpperCase() ))
return true;
return false;
}

View File

@ -30,6 +30,7 @@ import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.variant.variantcontext.Allele;
@ -421,7 +422,7 @@ public class HaplotypeIndelErrorModel {
double[][] haplotypeLikehoodMatrix = new double[haplotypesInVC.size()][haplotypesInVC.size()];
double readLikelihoods[][] = new double[pileup.getReads().size()][haplotypesInVC.size()];
int i=0;
for (SAMRecord read : pileup.getReads()) {
for (GATKSAMRecord read : pileup.getReads()) {
if(ReadUtils.is454Read(read)) {
continue;
}

View File

@ -529,7 +529,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
sawReadInCurrentInterval = false;
}
private boolean doNotTryToClean(SAMRecord read) {
private boolean doNotTryToClean(GATKSAMRecord read) {
return read.getReadUnmappedFlag() ||
read.getNotPrimaryAlignmentFlag() ||
read.getReadFailsVendorQualityCheckFlag() ||
@ -835,7 +835,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
// TODO -- get rid of this try block when Picard does the right thing for reads aligned off the end of the reference
try {
if ( read.getAttribute(SAMTag.NM.name()) != null )
read.setAttribute(SAMTag.NM.name(), SequenceUtil.calculateSamNmTag(read, reference, leftmostIndex-1));
read.setAttribute(SAMTag.NM.name(), SequenceUtil.calculateSamNmTag(read, reference, leftmostIndex - 1));
if ( read.getAttribute(SAMTag.UQ.name()) != null )
read.setAttribute(SAMTag.UQ.name(), SequenceUtil.sumQualitiesOfMismatches(read, reference, leftmostIndex-1));
} catch (Exception e) {

View File

@ -24,8 +24,7 @@
package org.broadinstitute.sting.utils;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/**
* A canonical, master list of the standard NGS platforms. These values
@ -64,25 +63,15 @@ public enum NGSPlatform {
}
/**
* Convenience constructor -- calculates the NGSPlatfrom from a SAMRecord.
* Note you should not use this function if you have a GATKSAMRecord -- use the
* accessor method instead.
* Convenience get -- get the NGSPlatfrom from a SAMRecord.
*
* @param read
* Just gets the platform from the GATKReadGroupRecord associated with this read.
*
* @param read a GATKSAMRecord
* @return an NGSPlatform object matching the PL field of the header, of UNKNOWN if there was no match
*/
public static final NGSPlatform fromRead(SAMRecord read) {
return fromReadGroup(read.getReadGroup());
}
/**
* Returns the NGSPlatform corresponding to the PL tag in the read group
* @param rg
* @return an NGSPlatform object matching the PL field of the header, of UNKNOWN if there was no match
*/
public static final NGSPlatform fromReadGroup(SAMReadGroupRecord rg) {
if ( rg == null ) return UNKNOWN;
return fromReadGroupPL(rg.getPlatform());
public static NGSPlatform fromRead(GATKSAMRecord read) {
return read.getReadGroup().getNGSPlatform();
}
/**
@ -90,7 +79,7 @@ public enum NGSPlatform {
* @param plFromRG -- the PL field (or equivalent) in a ReadGroup object
* @return an NGSPlatform object matching the PL field of the header, or UNKNOWN if there was no match
*/
public static final NGSPlatform fromReadGroupPL(final String plFromRG) {
public static NGSPlatform fromReadGroupPL(final String plFromRG) {
if ( plFromRG == null ) return UNKNOWN;
// todo -- algorithm could be implemented more efficiently, as the list of all
@ -113,7 +102,7 @@ public enum NGSPlatform {
* @param platform the read group string that describes the platform used
* @return true if the platform is known (i.e. it's in the list and is not UNKNOWN)
*/
public static final boolean isKnown (final String platform) {
public static final boolean isKnown(final String platform) {
return fromReadGroupPL(platform) != UNKNOWN;
}
}

View File

@ -12,9 +12,6 @@ import org.broadinstitute.sting.utils.NGSPlatform;
*
*/
public class GATKSAMReadGroupRecord extends SAMReadGroupRecord {
public static final String LANE_TAG = "LN";
// the SAMReadGroupRecord data we're caching
private String mSample = null;
private String mPlatform = null;
@ -33,46 +30,14 @@ public class GATKSAMReadGroupRecord extends SAMReadGroupRecord {
super(record.getReadGroupId(), record);
}
public GATKSAMReadGroupRecord(SAMReadGroupRecord record, NGSPlatform pl) {
super(record.getReadGroupId(), record);
setPlatform(pl.getDefaultPlatform());
mNGSPlatform = pl;
retrievedPlatform = retrievedNGSPlatform = true;
}
///////////////////////////////////////////////////////////////////////////////
// *** The following methods are overloaded to cache the appropriate data ***//
///////////////////////////////////////////////////////////////////////////////
public String getSample() {
if ( !retrievedSample ) {
mSample = super.getSample();
retrievedSample = true;
}
return mSample;
}
public void setSample(String s) {
super.setSample(s);
mSample = s;
retrievedSample = true;
}
public String getPlatform() {
if ( !retrievedPlatform ) {
mPlatform = super.getPlatform();
retrievedPlatform = true;
}
return mPlatform;
}
public void setPlatform(String s) {
super.setPlatform(s);
mPlatform = s;
retrievedPlatform = true;
retrievedNGSPlatform = false; // recalculate the NGSPlatform
}
/**
* Get the NGSPlatform enum telling us the platform of this read group
*
* This function call is caching, so subsequent calls to it are free, while
* the first time it's called there's a bit of work to resolve the enum
*
* @return an NGSPlatform enum value
*/
public NGSPlatform getNGSPlatform() {
if ( ! retrievedNGSPlatform ) {
mNGSPlatform = NGSPlatform.fromReadGroupPL(getPlatform());
@ -82,11 +47,40 @@ public class GATKSAMReadGroupRecord extends SAMReadGroupRecord {
return mNGSPlatform;
}
public String getLane() {
return this.getAttribute(LANE_TAG);
///////////////////////////////////////////////////////////////////////////////
// *** The following methods are overloaded to cache the appropriate data ***//
///////////////////////////////////////////////////////////////////////////////
@Override
public String getSample() {
if ( !retrievedSample ) {
mSample = super.getSample();
retrievedSample = true;
}
return mSample;
}
public void setLane(String lane) {
this.setAttribute(LANE_TAG, lane);
@Override
public void setSample(String s) {
super.setSample(s);
mSample = s;
retrievedSample = true;
}
@Override
public String getPlatform() {
if ( !retrievedPlatform ) {
mPlatform = super.getPlatform();
retrievedPlatform = true;
}
return mPlatform;
}
@Override
public void setPlatform(String s) {
super.setPlatform(s);
mPlatform = s;
retrievedPlatform = true;
retrievedNGSPlatform = false; // recalculate the NGSPlatform
}
}

View File

@ -25,9 +25,9 @@
package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.*;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.NGSPlatform;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.recalibration.EventType;
import java.util.Arrays;
import java.util.HashMap;
@ -141,16 +141,26 @@ public class GATKSAMRecord extends BAMRecord {
mReadString = s;
}
/**
* Get the GATKSAMReadGroupRecord of this read
* @return a non-null GATKSAMReadGroupRecord
*/
@Override
public GATKSAMReadGroupRecord getReadGroup() {
if ( !retrievedReadGroup ) {
SAMReadGroupRecord tempReadGroup = super.getReadGroup();
mReadGroup = (tempReadGroup == null ? null : new GATKSAMReadGroupRecord(tempReadGroup));
if ( ! retrievedReadGroup ) {
mReadGroup = (GATKSAMReadGroupRecord)super.getReadGroup();
retrievedReadGroup = true;
}
return mReadGroup;
}
public void setReadGroup( final GATKSAMReadGroupRecord readGroup ) {
mReadGroup = readGroup;
retrievedReadGroup = true;
setAttribute("RG", mReadGroup.getId()); // todo -- this should be standardized, but we don't have access to SAMTagUtils!
}
@Override
public int hashCode() {
return super.hashCode();
@ -259,12 +269,6 @@ public class GATKSAMRecord extends BAMRecord {
return getReadGroup().getNGSPlatform();
}
public void setReadGroup( final GATKSAMReadGroupRecord readGroup ) {
mReadGroup = readGroup;
retrievedReadGroup = true;
setAttribute("RG", mReadGroup.getId()); // todo -- this should be standardized, but we don't have access to SAMTagUtils!
}
///////////////////////////////////////////////////////////////////////////////
// *** ReduceReads functions ***//
///////////////////////////////////////////////////////////////////////////////

View File

@ -226,7 +226,7 @@ public class ReadUtils {
* @param read the read to test
* @return checks the read group tag PL for the default 454 tag
*/
public static boolean is454Read(SAMRecord read) {
public static boolean is454Read(GATKSAMRecord read) {
return NGSPlatform.fromRead(read) == NGSPlatform.LS454;
}
@ -236,7 +236,7 @@ public class ReadUtils {
* @param read the read to test
* @return checks the read group tag PL for the default ion tag
*/
public static boolean isIonRead(SAMRecord read) {
public static boolean isIonRead(GATKSAMRecord read) {
return NGSPlatform.fromRead(read) == NGSPlatform.ION_TORRENT;
}
@ -246,7 +246,7 @@ public class ReadUtils {
* @param read the read to test
* @return checks the read group tag PL for the default SOLiD tag
*/
public static boolean isSOLiDRead(SAMRecord read) {
public static boolean isSOLiDRead(GATKSAMRecord read) {
return NGSPlatform.fromRead(read) == NGSPlatform.SOLID;
}
@ -256,7 +256,7 @@ public class ReadUtils {
* @param read the read to test
* @return checks the read group tag PL for the default SLX tag
*/
public static boolean isIlluminaRead(SAMRecord read) {
public static boolean isIlluminaRead(GATKSAMRecord read) {
return NGSPlatform.fromRead(read) == NGSPlatform.ILLUMINA;
}
@ -268,7 +268,7 @@ public class ReadUtils {
* @param name the upper-cased platform name to test
* @return whether or not name == PL tag in the read group of read
*/
public static boolean isPlatformRead(SAMRecord read, String name) {
public static boolean isPlatformRead(GATKSAMRecord read, String name) {
SAMReadGroupRecord readGroup = read.getReadGroup();
if (readGroup != null) {