The GATK engine now ensures that incoming GATKSAMRecords have GATKSAMReadGroupRecord objects in their header

-- Update SAMDataSource so that the merged header contains GATKSAMReadGroupRecord
-- Now getting the NGSPlatform for a GATKSAMRecord is actually efficient, instead of computing the NGS platform over and over from the PL string
-- Updated a few places in the code where the input argument is actually a GATKSAMRecord, not a SAMRecord for type safety
This commit is contained in:
Mark DePristo 2012-12-18 15:46:34 -05:00
parent c8cd6ac465
commit f6d5499582
9 changed files with 88 additions and 88 deletions

View File

@ -44,6 +44,7 @@ import org.broadinstitute.sting.utils.SimpleTimer;
import org.broadinstitute.sting.utils.baq.ReadTransformingIterator; import org.broadinstitute.sting.utils.baq.ReadTransformingIterator;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory; import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory;
import java.io.File; import java.io.File;
@ -894,9 +895,11 @@ public class SAMDataSource {
long lastTick = timer.currentTime(); long lastTick = timer.currentTime();
for(final SAMReaderID readerID: readerIDs) { for(final SAMReaderID readerID: readerIDs) {
final ReaderInitializer init = new ReaderInitializer(readerID).call(); final ReaderInitializer init = new ReaderInitializer(readerID).call();
if (removeProgramRecords) { if (removeProgramRecords) {
init.reader.getFileHeader().setProgramRecords(new ArrayList<SAMProgramRecord>()); init.reader.getFileHeader().setProgramRecords(new ArrayList<SAMProgramRecord>());
} }
if (threadAllocation.getNumIOThreads() > 0) { if (threadAllocation.getNumIOThreads() > 0) {
inputStreams.put(init.readerID, init.blockInputStream); // get from initializer inputStreams.put(init.readerID, init.blockInputStream); // get from initializer
} }
@ -916,6 +919,13 @@ public class SAMDataSource {
for(SAMFileReader reader: readers.values()) for(SAMFileReader reader: readers.values())
headers.add(reader.getFileHeader()); headers.add(reader.getFileHeader());
headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,headers,true); headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,headers,true);
// update all read groups to GATKSAMRecordReadGroups
final List<SAMReadGroupRecord> gatkReadGroups = new LinkedList<SAMReadGroupRecord>();
for ( final SAMReadGroupRecord rg : headerMerger.getMergedHeader().getReadGroups() ) {
gatkReadGroups.add(new GATKSAMReadGroupRecord(rg));
}
headerMerger.getMergedHeader().setReadGroups(gatkReadGroups);
} }
final private void printReaderPerformance(final int nExecutedTotal, final private void printReaderPerformance(final int nExecutedTotal,

View File

@ -26,6 +26,7 @@
package org.broadinstitute.sting.gatk.filters; package org.broadinstitute.sting.gatk.filters;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.sam.ReadUtils;
/** /**
@ -37,6 +38,6 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
public class Platform454Filter extends ReadFilter { public class Platform454Filter extends ReadFilter {
public boolean filterOut(SAMRecord rec) { public boolean filterOut(SAMRecord rec) {
return (ReadUtils.is454Read(rec)); return (ReadUtils.is454Read((GATKSAMRecord)rec));
} }
} }

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.filters;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.sam.ReadUtils;
/** /**
@ -41,7 +42,7 @@ public class PlatformFilter extends ReadFilter {
public boolean filterOut(SAMRecord rec) { public boolean filterOut(SAMRecord rec) {
for ( String name : PLFilterNames ) for ( String name : PLFilterNames )
if ( ReadUtils.isPlatformRead(rec, name.toUpperCase() )) if ( ReadUtils.isPlatformRead((GATKSAMRecord)rec, name.toUpperCase() ))
return true; return true;
return false; return false;
} }

View File

@ -30,6 +30,7 @@ import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.variant.variantcontext.Allele; import org.broadinstitute.variant.variantcontext.Allele;
@ -421,7 +422,7 @@ public class HaplotypeIndelErrorModel {
double[][] haplotypeLikehoodMatrix = new double[haplotypesInVC.size()][haplotypesInVC.size()]; double[][] haplotypeLikehoodMatrix = new double[haplotypesInVC.size()][haplotypesInVC.size()];
double readLikelihoods[][] = new double[pileup.getReads().size()][haplotypesInVC.size()]; double readLikelihoods[][] = new double[pileup.getReads().size()][haplotypesInVC.size()];
int i=0; int i=0;
for (SAMRecord read : pileup.getReads()) { for (GATKSAMRecord read : pileup.getReads()) {
if(ReadUtils.is454Read(read)) { if(ReadUtils.is454Read(read)) {
continue; continue;
} }

View File

@ -529,7 +529,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
sawReadInCurrentInterval = false; sawReadInCurrentInterval = false;
} }
private boolean doNotTryToClean(SAMRecord read) { private boolean doNotTryToClean(GATKSAMRecord read) {
return read.getReadUnmappedFlag() || return read.getReadUnmappedFlag() ||
read.getNotPrimaryAlignmentFlag() || read.getNotPrimaryAlignmentFlag() ||
read.getReadFailsVendorQualityCheckFlag() || read.getReadFailsVendorQualityCheckFlag() ||
@ -835,7 +835,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
// TODO -- get rid of this try block when Picard does the right thing for reads aligned off the end of the reference // TODO -- get rid of this try block when Picard does the right thing for reads aligned off the end of the reference
try { try {
if ( read.getAttribute(SAMTag.NM.name()) != null ) if ( read.getAttribute(SAMTag.NM.name()) != null )
read.setAttribute(SAMTag.NM.name(), SequenceUtil.calculateSamNmTag(read, reference, leftmostIndex-1)); read.setAttribute(SAMTag.NM.name(), SequenceUtil.calculateSamNmTag(read, reference, leftmostIndex - 1));
if ( read.getAttribute(SAMTag.UQ.name()) != null ) if ( read.getAttribute(SAMTag.UQ.name()) != null )
read.setAttribute(SAMTag.UQ.name(), SequenceUtil.sumQualitiesOfMismatches(read, reference, leftmostIndex-1)); read.setAttribute(SAMTag.UQ.name(), SequenceUtil.sumQualitiesOfMismatches(read, reference, leftmostIndex-1));
} catch (Exception e) { } catch (Exception e) {

View File

@ -24,8 +24,7 @@
package org.broadinstitute.sting.utils; package org.broadinstitute.sting.utils;
import net.sf.samtools.SAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import net.sf.samtools.SAMRecord;
/** /**
* A canonical, master list of the standard NGS platforms. These values * A canonical, master list of the standard NGS platforms. These values
@ -64,25 +63,15 @@ public enum NGSPlatform {
} }
/** /**
* Convenience constructor -- calculates the NGSPlatfrom from a SAMRecord. * Convenience get -- get the NGSPlatfrom from a SAMRecord.
* Note you should not use this function if you have a GATKSAMRecord -- use the
* accessor method instead.
* *
* @param read * Just gets the platform from the GATKReadGroupRecord associated with this read.
*
* @param read a GATKSAMRecord
* @return an NGSPlatform object matching the PL field of the header, of UNKNOWN if there was no match * @return an NGSPlatform object matching the PL field of the header, of UNKNOWN if there was no match
*/ */
public static final NGSPlatform fromRead(SAMRecord read) { public static NGSPlatform fromRead(GATKSAMRecord read) {
return fromReadGroup(read.getReadGroup()); return read.getReadGroup().getNGSPlatform();
}
/**
* Returns the NGSPlatform corresponding to the PL tag in the read group
* @param rg
* @return an NGSPlatform object matching the PL field of the header, of UNKNOWN if there was no match
*/
public static final NGSPlatform fromReadGroup(SAMReadGroupRecord rg) {
if ( rg == null ) return UNKNOWN;
return fromReadGroupPL(rg.getPlatform());
} }
/** /**
@ -90,7 +79,7 @@ public enum NGSPlatform {
* @param plFromRG -- the PL field (or equivalent) in a ReadGroup object * @param plFromRG -- the PL field (or equivalent) in a ReadGroup object
* @return an NGSPlatform object matching the PL field of the header, or UNKNOWN if there was no match * @return an NGSPlatform object matching the PL field of the header, or UNKNOWN if there was no match
*/ */
public static final NGSPlatform fromReadGroupPL(final String plFromRG) { public static NGSPlatform fromReadGroupPL(final String plFromRG) {
if ( plFromRG == null ) return UNKNOWN; if ( plFromRG == null ) return UNKNOWN;
// todo -- algorithm could be implemented more efficiently, as the list of all // todo -- algorithm could be implemented more efficiently, as the list of all
@ -113,7 +102,7 @@ public enum NGSPlatform {
* @param platform the read group string that describes the platform used * @param platform the read group string that describes the platform used
* @return true if the platform is known (i.e. it's in the list and is not UNKNOWN) * @return true if the platform is known (i.e. it's in the list and is not UNKNOWN)
*/ */
public static final boolean isKnown (final String platform) { public static final boolean isKnown(final String platform) {
return fromReadGroupPL(platform) != UNKNOWN; return fromReadGroupPL(platform) != UNKNOWN;
} }
} }

View File

@ -12,9 +12,6 @@ import org.broadinstitute.sting.utils.NGSPlatform;
* *
*/ */
public class GATKSAMReadGroupRecord extends SAMReadGroupRecord { public class GATKSAMReadGroupRecord extends SAMReadGroupRecord {
public static final String LANE_TAG = "LN";
// the SAMReadGroupRecord data we're caching // the SAMReadGroupRecord data we're caching
private String mSample = null; private String mSample = null;
private String mPlatform = null; private String mPlatform = null;
@ -33,46 +30,14 @@ public class GATKSAMReadGroupRecord extends SAMReadGroupRecord {
super(record.getReadGroupId(), record); super(record.getReadGroupId(), record);
} }
public GATKSAMReadGroupRecord(SAMReadGroupRecord record, NGSPlatform pl) { /**
super(record.getReadGroupId(), record); * Get the NGSPlatform enum telling us the platform of this read group
setPlatform(pl.getDefaultPlatform()); *
mNGSPlatform = pl; * This function call is caching, so subsequent calls to it are free, while
retrievedPlatform = retrievedNGSPlatform = true; * the first time it's called there's a bit of work to resolve the enum
} *
* @return an NGSPlatform enum value
/////////////////////////////////////////////////////////////////////////////// */
// *** The following methods are overloaded to cache the appropriate data ***//
///////////////////////////////////////////////////////////////////////////////
public String getSample() {
if ( !retrievedSample ) {
mSample = super.getSample();
retrievedSample = true;
}
return mSample;
}
public void setSample(String s) {
super.setSample(s);
mSample = s;
retrievedSample = true;
}
public String getPlatform() {
if ( !retrievedPlatform ) {
mPlatform = super.getPlatform();
retrievedPlatform = true;
}
return mPlatform;
}
public void setPlatform(String s) {
super.setPlatform(s);
mPlatform = s;
retrievedPlatform = true;
retrievedNGSPlatform = false; // recalculate the NGSPlatform
}
public NGSPlatform getNGSPlatform() { public NGSPlatform getNGSPlatform() {
if ( ! retrievedNGSPlatform ) { if ( ! retrievedNGSPlatform ) {
mNGSPlatform = NGSPlatform.fromReadGroupPL(getPlatform()); mNGSPlatform = NGSPlatform.fromReadGroupPL(getPlatform());
@ -82,11 +47,40 @@ public class GATKSAMReadGroupRecord extends SAMReadGroupRecord {
return mNGSPlatform; return mNGSPlatform;
} }
public String getLane() { ///////////////////////////////////////////////////////////////////////////////
return this.getAttribute(LANE_TAG); // *** The following methods are overloaded to cache the appropriate data ***//
///////////////////////////////////////////////////////////////////////////////
@Override
public String getSample() {
if ( !retrievedSample ) {
mSample = super.getSample();
retrievedSample = true;
}
return mSample;
} }
public void setLane(String lane) { @Override
this.setAttribute(LANE_TAG, lane); public void setSample(String s) {
super.setSample(s);
mSample = s;
retrievedSample = true;
}
@Override
public String getPlatform() {
if ( !retrievedPlatform ) {
mPlatform = super.getPlatform();
retrievedPlatform = true;
}
return mPlatform;
}
@Override
public void setPlatform(String s) {
super.setPlatform(s);
mPlatform = s;
retrievedPlatform = true;
retrievedNGSPlatform = false; // recalculate the NGSPlatform
} }
} }

View File

@ -25,9 +25,9 @@
package org.broadinstitute.sting.utils.sam; package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.*; import net.sf.samtools.*;
import org.broadinstitute.sting.utils.recalibration.EventType;
import org.broadinstitute.sting.utils.NGSPlatform; import org.broadinstitute.sting.utils.NGSPlatform;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.recalibration.EventType;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
@ -141,16 +141,26 @@ public class GATKSAMRecord extends BAMRecord {
mReadString = s; mReadString = s;
} }
/**
* Get the GATKSAMReadGroupRecord of this read
* @return a non-null GATKSAMReadGroupRecord
*/
@Override @Override
public GATKSAMReadGroupRecord getReadGroup() { public GATKSAMReadGroupRecord getReadGroup() {
if ( !retrievedReadGroup ) { if ( ! retrievedReadGroup ) {
SAMReadGroupRecord tempReadGroup = super.getReadGroup(); mReadGroup = (GATKSAMReadGroupRecord)super.getReadGroup();
mReadGroup = (tempReadGroup == null ? null : new GATKSAMReadGroupRecord(tempReadGroup));
retrievedReadGroup = true; retrievedReadGroup = true;
} }
return mReadGroup; return mReadGroup;
} }
public void setReadGroup( final GATKSAMReadGroupRecord readGroup ) {
mReadGroup = readGroup;
retrievedReadGroup = true;
setAttribute("RG", mReadGroup.getId()); // todo -- this should be standardized, but we don't have access to SAMTagUtils!
}
@Override @Override
public int hashCode() { public int hashCode() {
return super.hashCode(); return super.hashCode();
@ -259,12 +269,6 @@ public class GATKSAMRecord extends BAMRecord {
return getReadGroup().getNGSPlatform(); return getReadGroup().getNGSPlatform();
} }
public void setReadGroup( final GATKSAMReadGroupRecord readGroup ) {
mReadGroup = readGroup;
retrievedReadGroup = true;
setAttribute("RG", mReadGroup.getId()); // todo -- this should be standardized, but we don't have access to SAMTagUtils!
}
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// *** ReduceReads functions ***// // *** ReduceReads functions ***//
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////

View File

@ -226,7 +226,7 @@ public class ReadUtils {
* @param read the read to test * @param read the read to test
* @return checks the read group tag PL for the default 454 tag * @return checks the read group tag PL for the default 454 tag
*/ */
public static boolean is454Read(SAMRecord read) { public static boolean is454Read(GATKSAMRecord read) {
return NGSPlatform.fromRead(read) == NGSPlatform.LS454; return NGSPlatform.fromRead(read) == NGSPlatform.LS454;
} }
@ -236,7 +236,7 @@ public class ReadUtils {
* @param read the read to test * @param read the read to test
* @return checks the read group tag PL for the default ion tag * @return checks the read group tag PL for the default ion tag
*/ */
public static boolean isIonRead(SAMRecord read) { public static boolean isIonRead(GATKSAMRecord read) {
return NGSPlatform.fromRead(read) == NGSPlatform.ION_TORRENT; return NGSPlatform.fromRead(read) == NGSPlatform.ION_TORRENT;
} }
@ -246,7 +246,7 @@ public class ReadUtils {
* @param read the read to test * @param read the read to test
* @return checks the read group tag PL for the default SOLiD tag * @return checks the read group tag PL for the default SOLiD tag
*/ */
public static boolean isSOLiDRead(SAMRecord read) { public static boolean isSOLiDRead(GATKSAMRecord read) {
return NGSPlatform.fromRead(read) == NGSPlatform.SOLID; return NGSPlatform.fromRead(read) == NGSPlatform.SOLID;
} }
@ -256,7 +256,7 @@ public class ReadUtils {
* @param read the read to test * @param read the read to test
* @return checks the read group tag PL for the default SLX tag * @return checks the read group tag PL for the default SLX tag
*/ */
public static boolean isIlluminaRead(SAMRecord read) { public static boolean isIlluminaRead(GATKSAMRecord read) {
return NGSPlatform.fromRead(read) == NGSPlatform.ILLUMINA; return NGSPlatform.fromRead(read) == NGSPlatform.ILLUMINA;
} }
@ -268,7 +268,7 @@ public class ReadUtils {
* @param name the upper-cased platform name to test * @param name the upper-cased platform name to test
* @return whether or not name == PL tag in the read group of read * @return whether or not name == PL tag in the read group of read
*/ */
public static boolean isPlatformRead(SAMRecord read, String name) { public static boolean isPlatformRead(GATKSAMRecord read, String name) {
SAMReadGroupRecord readGroup = read.getReadGroup(); SAMReadGroupRecord readGroup = read.getReadGroup();
if (readGroup != null) { if (readGroup != null) {