The GATK engine now ensures that incoming GATKSAMRecords have GATKSAMReadGroupRecord objects in their header
-- Update SAMDataSource so that the merged header contains GATKSAMReadGroupRecord -- Now getting the NGSPlatform for a GATKSAMRecord is actually efficient, instead of computing the NGS platform over and over from the PL string -- Updated a few places in the code where the input argument is actually a GATKSAMRecord, not a SAMRecord for type safety
This commit is contained in:
parent
c8cd6ac465
commit
f6d5499582
|
|
@ -44,6 +44,7 @@ import org.broadinstitute.sting.utils.SimpleTimer;
|
|||
import org.broadinstitute.sting.utils.baq.ReadTransformingIterator;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -894,9 +895,11 @@ public class SAMDataSource {
|
|||
long lastTick = timer.currentTime();
|
||||
for(final SAMReaderID readerID: readerIDs) {
|
||||
final ReaderInitializer init = new ReaderInitializer(readerID).call();
|
||||
|
||||
if (removeProgramRecords) {
|
||||
init.reader.getFileHeader().setProgramRecords(new ArrayList<SAMProgramRecord>());
|
||||
}
|
||||
|
||||
if (threadAllocation.getNumIOThreads() > 0) {
|
||||
inputStreams.put(init.readerID, init.blockInputStream); // get from initializer
|
||||
}
|
||||
|
|
@ -916,6 +919,13 @@ public class SAMDataSource {
|
|||
for(SAMFileReader reader: readers.values())
|
||||
headers.add(reader.getFileHeader());
|
||||
headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,headers,true);
|
||||
|
||||
// update all read groups to GATKSAMRecordReadGroups
|
||||
final List<SAMReadGroupRecord> gatkReadGroups = new LinkedList<SAMReadGroupRecord>();
|
||||
for ( final SAMReadGroupRecord rg : headerMerger.getMergedHeader().getReadGroups() ) {
|
||||
gatkReadGroups.add(new GATKSAMReadGroupRecord(rg));
|
||||
}
|
||||
headerMerger.getMergedHeader().setReadGroups(gatkReadGroups);
|
||||
}
|
||||
|
||||
final private void printReaderPerformance(final int nExecutedTotal,
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
package org.broadinstitute.sting.gatk.filters;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
/**
|
||||
|
|
@ -37,6 +38,6 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
|
|||
|
||||
public class Platform454Filter extends ReadFilter {
|
||||
public boolean filterOut(SAMRecord rec) {
|
||||
return (ReadUtils.is454Read(rec));
|
||||
return (ReadUtils.is454Read((GATKSAMRecord)rec));
|
||||
}
|
||||
}
|
||||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.filters;
|
|||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
/**
|
||||
|
|
@ -41,7 +42,7 @@ public class PlatformFilter extends ReadFilter {
|
|||
|
||||
public boolean filterOut(SAMRecord rec) {
|
||||
for ( String name : PLFilterNames )
|
||||
if ( ReadUtils.isPlatformRead(rec, name.toUpperCase() ))
|
||||
if ( ReadUtils.isPlatformRead((GATKSAMRecord)rec, name.toUpperCase() ))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ import org.broadinstitute.sting.utils.Haplotype;
|
|||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
|
||||
|
|
@ -421,7 +422,7 @@ public class HaplotypeIndelErrorModel {
|
|||
double[][] haplotypeLikehoodMatrix = new double[haplotypesInVC.size()][haplotypesInVC.size()];
|
||||
double readLikelihoods[][] = new double[pileup.getReads().size()][haplotypesInVC.size()];
|
||||
int i=0;
|
||||
for (SAMRecord read : pileup.getReads()) {
|
||||
for (GATKSAMRecord read : pileup.getReads()) {
|
||||
if(ReadUtils.is454Read(read)) {
|
||||
continue;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -529,7 +529,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
sawReadInCurrentInterval = false;
|
||||
}
|
||||
|
||||
private boolean doNotTryToClean(SAMRecord read) {
|
||||
private boolean doNotTryToClean(GATKSAMRecord read) {
|
||||
return read.getReadUnmappedFlag() ||
|
||||
read.getNotPrimaryAlignmentFlag() ||
|
||||
read.getReadFailsVendorQualityCheckFlag() ||
|
||||
|
|
@ -835,7 +835,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
// TODO -- get rid of this try block when Picard does the right thing for reads aligned off the end of the reference
|
||||
try {
|
||||
if ( read.getAttribute(SAMTag.NM.name()) != null )
|
||||
read.setAttribute(SAMTag.NM.name(), SequenceUtil.calculateSamNmTag(read, reference, leftmostIndex-1));
|
||||
read.setAttribute(SAMTag.NM.name(), SequenceUtil.calculateSamNmTag(read, reference, leftmostIndex - 1));
|
||||
if ( read.getAttribute(SAMTag.UQ.name()) != null )
|
||||
read.setAttribute(SAMTag.UQ.name(), SequenceUtil.sumQualitiesOfMismatches(read, reference, leftmostIndex-1));
|
||||
} catch (Exception e) {
|
||||
|
|
|
|||
|
|
@ -24,8 +24,7 @@
|
|||
|
||||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/**
|
||||
* A canonical, master list of the standard NGS platforms. These values
|
||||
|
|
@ -64,25 +63,15 @@ public enum NGSPlatform {
|
|||
}
|
||||
|
||||
/**
|
||||
* Convenience constructor -- calculates the NGSPlatfrom from a SAMRecord.
|
||||
* Note you should not use this function if you have a GATKSAMRecord -- use the
|
||||
* accessor method instead.
|
||||
* Convenience get -- get the NGSPlatfrom from a SAMRecord.
|
||||
*
|
||||
* @param read
|
||||
* Just gets the platform from the GATKReadGroupRecord associated with this read.
|
||||
*
|
||||
* @param read a GATKSAMRecord
|
||||
* @return an NGSPlatform object matching the PL field of the header, of UNKNOWN if there was no match
|
||||
*/
|
||||
public static final NGSPlatform fromRead(SAMRecord read) {
|
||||
return fromReadGroup(read.getReadGroup());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the NGSPlatform corresponding to the PL tag in the read group
|
||||
* @param rg
|
||||
* @return an NGSPlatform object matching the PL field of the header, of UNKNOWN if there was no match
|
||||
*/
|
||||
public static final NGSPlatform fromReadGroup(SAMReadGroupRecord rg) {
|
||||
if ( rg == null ) return UNKNOWN;
|
||||
return fromReadGroupPL(rg.getPlatform());
|
||||
public static NGSPlatform fromRead(GATKSAMRecord read) {
|
||||
return read.getReadGroup().getNGSPlatform();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -90,7 +79,7 @@ public enum NGSPlatform {
|
|||
* @param plFromRG -- the PL field (or equivalent) in a ReadGroup object
|
||||
* @return an NGSPlatform object matching the PL field of the header, or UNKNOWN if there was no match
|
||||
*/
|
||||
public static final NGSPlatform fromReadGroupPL(final String plFromRG) {
|
||||
public static NGSPlatform fromReadGroupPL(final String plFromRG) {
|
||||
if ( plFromRG == null ) return UNKNOWN;
|
||||
|
||||
// todo -- algorithm could be implemented more efficiently, as the list of all
|
||||
|
|
@ -113,7 +102,7 @@ public enum NGSPlatform {
|
|||
* @param platform the read group string that describes the platform used
|
||||
* @return true if the platform is known (i.e. it's in the list and is not UNKNOWN)
|
||||
*/
|
||||
public static final boolean isKnown (final String platform) {
|
||||
public static final boolean isKnown(final String platform) {
|
||||
return fromReadGroupPL(platform) != UNKNOWN;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,9 +12,6 @@ import org.broadinstitute.sting.utils.NGSPlatform;
|
|||
*
|
||||
*/
|
||||
public class GATKSAMReadGroupRecord extends SAMReadGroupRecord {
|
||||
|
||||
public static final String LANE_TAG = "LN";
|
||||
|
||||
// the SAMReadGroupRecord data we're caching
|
||||
private String mSample = null;
|
||||
private String mPlatform = null;
|
||||
|
|
@ -33,46 +30,14 @@ public class GATKSAMReadGroupRecord extends SAMReadGroupRecord {
|
|||
super(record.getReadGroupId(), record);
|
||||
}
|
||||
|
||||
public GATKSAMReadGroupRecord(SAMReadGroupRecord record, NGSPlatform pl) {
|
||||
super(record.getReadGroupId(), record);
|
||||
setPlatform(pl.getDefaultPlatform());
|
||||
mNGSPlatform = pl;
|
||||
retrievedPlatform = retrievedNGSPlatform = true;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// *** The following methods are overloaded to cache the appropriate data ***//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
public String getSample() {
|
||||
if ( !retrievedSample ) {
|
||||
mSample = super.getSample();
|
||||
retrievedSample = true;
|
||||
}
|
||||
return mSample;
|
||||
}
|
||||
|
||||
public void setSample(String s) {
|
||||
super.setSample(s);
|
||||
mSample = s;
|
||||
retrievedSample = true;
|
||||
}
|
||||
|
||||
public String getPlatform() {
|
||||
if ( !retrievedPlatform ) {
|
||||
mPlatform = super.getPlatform();
|
||||
retrievedPlatform = true;
|
||||
}
|
||||
return mPlatform;
|
||||
}
|
||||
|
||||
public void setPlatform(String s) {
|
||||
super.setPlatform(s);
|
||||
mPlatform = s;
|
||||
retrievedPlatform = true;
|
||||
retrievedNGSPlatform = false; // recalculate the NGSPlatform
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the NGSPlatform enum telling us the platform of this read group
|
||||
*
|
||||
* This function call is caching, so subsequent calls to it are free, while
|
||||
* the first time it's called there's a bit of work to resolve the enum
|
||||
*
|
||||
* @return an NGSPlatform enum value
|
||||
*/
|
||||
public NGSPlatform getNGSPlatform() {
|
||||
if ( ! retrievedNGSPlatform ) {
|
||||
mNGSPlatform = NGSPlatform.fromReadGroupPL(getPlatform());
|
||||
|
|
@ -82,11 +47,40 @@ public class GATKSAMReadGroupRecord extends SAMReadGroupRecord {
|
|||
return mNGSPlatform;
|
||||
}
|
||||
|
||||
public String getLane() {
|
||||
return this.getAttribute(LANE_TAG);
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// *** The following methods are overloaded to cache the appropriate data ***//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
@Override
|
||||
public String getSample() {
|
||||
if ( !retrievedSample ) {
|
||||
mSample = super.getSample();
|
||||
retrievedSample = true;
|
||||
}
|
||||
return mSample;
|
||||
}
|
||||
|
||||
public void setLane(String lane) {
|
||||
this.setAttribute(LANE_TAG, lane);
|
||||
|
||||
@Override
|
||||
public void setSample(String s) {
|
||||
super.setSample(s);
|
||||
mSample = s;
|
||||
retrievedSample = true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getPlatform() {
|
||||
if ( !retrievedPlatform ) {
|
||||
mPlatform = super.getPlatform();
|
||||
retrievedPlatform = true;
|
||||
}
|
||||
return mPlatform;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setPlatform(String s) {
|
||||
super.setPlatform(s);
|
||||
mPlatform = s;
|
||||
retrievedPlatform = true;
|
||||
retrievedNGSPlatform = false; // recalculate the NGSPlatform
|
||||
}
|
||||
}
|
||||
|
|
@ -25,9 +25,9 @@
|
|||
package org.broadinstitute.sting.utils.sam;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
import org.broadinstitute.sting.utils.NGSPlatform;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.recalibration.EventType;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
|
|
@ -141,16 +141,26 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
mReadString = s;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the GATKSAMReadGroupRecord of this read
|
||||
* @return a non-null GATKSAMReadGroupRecord
|
||||
*/
|
||||
@Override
|
||||
public GATKSAMReadGroupRecord getReadGroup() {
|
||||
if ( !retrievedReadGroup ) {
|
||||
SAMReadGroupRecord tempReadGroup = super.getReadGroup();
|
||||
mReadGroup = (tempReadGroup == null ? null : new GATKSAMReadGroupRecord(tempReadGroup));
|
||||
if ( ! retrievedReadGroup ) {
|
||||
mReadGroup = (GATKSAMReadGroupRecord)super.getReadGroup();
|
||||
retrievedReadGroup = true;
|
||||
}
|
||||
return mReadGroup;
|
||||
}
|
||||
|
||||
public void setReadGroup( final GATKSAMReadGroupRecord readGroup ) {
|
||||
mReadGroup = readGroup;
|
||||
retrievedReadGroup = true;
|
||||
setAttribute("RG", mReadGroup.getId()); // todo -- this should be standardized, but we don't have access to SAMTagUtils!
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return super.hashCode();
|
||||
|
|
@ -259,12 +269,6 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
return getReadGroup().getNGSPlatform();
|
||||
}
|
||||
|
||||
public void setReadGroup( final GATKSAMReadGroupRecord readGroup ) {
|
||||
mReadGroup = readGroup;
|
||||
retrievedReadGroup = true;
|
||||
setAttribute("RG", mReadGroup.getId()); // todo -- this should be standardized, but we don't have access to SAMTagUtils!
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// *** ReduceReads functions ***//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
|
|
|||
|
|
@ -226,7 +226,7 @@ public class ReadUtils {
|
|||
* @param read the read to test
|
||||
* @return checks the read group tag PL for the default 454 tag
|
||||
*/
|
||||
public static boolean is454Read(SAMRecord read) {
|
||||
public static boolean is454Read(GATKSAMRecord read) {
|
||||
return NGSPlatform.fromRead(read) == NGSPlatform.LS454;
|
||||
}
|
||||
|
||||
|
|
@ -236,7 +236,7 @@ public class ReadUtils {
|
|||
* @param read the read to test
|
||||
* @return checks the read group tag PL for the default ion tag
|
||||
*/
|
||||
public static boolean isIonRead(SAMRecord read) {
|
||||
public static boolean isIonRead(GATKSAMRecord read) {
|
||||
return NGSPlatform.fromRead(read) == NGSPlatform.ION_TORRENT;
|
||||
}
|
||||
|
||||
|
|
@ -246,7 +246,7 @@ public class ReadUtils {
|
|||
* @param read the read to test
|
||||
* @return checks the read group tag PL for the default SOLiD tag
|
||||
*/
|
||||
public static boolean isSOLiDRead(SAMRecord read) {
|
||||
public static boolean isSOLiDRead(GATKSAMRecord read) {
|
||||
return NGSPlatform.fromRead(read) == NGSPlatform.SOLID;
|
||||
}
|
||||
|
||||
|
|
@ -256,7 +256,7 @@ public class ReadUtils {
|
|||
* @param read the read to test
|
||||
* @return checks the read group tag PL for the default SLX tag
|
||||
*/
|
||||
public static boolean isIlluminaRead(SAMRecord read) {
|
||||
public static boolean isIlluminaRead(GATKSAMRecord read) {
|
||||
return NGSPlatform.fromRead(read) == NGSPlatform.ILLUMINA;
|
||||
}
|
||||
|
||||
|
|
@ -268,7 +268,7 @@ public class ReadUtils {
|
|||
* @param name the upper-cased platform name to test
|
||||
* @return whether or not name == PL tag in the read group of read
|
||||
*/
|
||||
public static boolean isPlatformRead(SAMRecord read, String name) {
|
||||
public static boolean isPlatformRead(GATKSAMRecord read, String name) {
|
||||
|
||||
SAMReadGroupRecord readGroup = read.getReadGroup();
|
||||
if (readGroup != null) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue