From f6d5499582e707e9a86f7c654e340f1c474485c1 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 18 Dec 2012 15:46:34 -0500 Subject: [PATCH] The GATK engine now ensures that incoming GATKSAMRecords have GATKSAMReadGroupRecord objects in their header -- Update SAMDataSource so that the merged header contains GATKSAMReadGroupRecord -- Now getting the NGSPlatform for a GATKSAMRecord is actually efficient, instead of computing the NGS platform over and over from the PL string -- Updated a few places in the code where the input argument is actually a GATKSAMRecord, not a SAMRecord for type safety --- .../gatk/datasources/reads/SAMDataSource.java | 10 +++ .../sting/gatk/filters/Platform454Filter.java | 3 +- .../sting/gatk/filters/PlatformFilter.java | 3 +- .../indels/HaplotypeIndelErrorModel.java | 3 +- .../gatk/walkers/indels/IndelRealigner.java | 4 +- .../sting/utils/NGSPlatform.java | 29 ++---- .../utils/sam/GATKSAMReadGroupRecord.java | 90 +++++++++---------- .../sting/utils/sam/GATKSAMRecord.java | 24 ++--- .../sting/utils/sam/ReadUtils.java | 10 +-- 9 files changed, 88 insertions(+), 88 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index e99814278..e3f197716 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -44,6 +44,7 @@ import org.broadinstitute.sting.utils.SimpleTimer; import org.broadinstitute.sting.utils.baq.ReadTransformingIterator; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory; import java.io.File; @@ -894,9 +895,11 @@ public class SAMDataSource { long lastTick = timer.currentTime(); for(final SAMReaderID readerID: readerIDs) { final ReaderInitializer init = new ReaderInitializer(readerID).call(); + if (removeProgramRecords) { init.reader.getFileHeader().setProgramRecords(new ArrayList()); } + if (threadAllocation.getNumIOThreads() > 0) { inputStreams.put(init.readerID, init.blockInputStream); // get from initializer } @@ -916,6 +919,13 @@ public class SAMDataSource { for(SAMFileReader reader: readers.values()) headers.add(reader.getFileHeader()); headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,headers,true); + + // update all read groups to GATKSAMRecordReadGroups + final List gatkReadGroups = new LinkedList(); + for ( final SAMReadGroupRecord rg : headerMerger.getMergedHeader().getReadGroups() ) { + gatkReadGroups.add(new GATKSAMReadGroupRecord(rg)); + } + headerMerger.getMergedHeader().setReadGroups(gatkReadGroups); } final private void printReaderPerformance(final int nExecutedTotal, diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/Platform454Filter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/Platform454Filter.java index 8ad91ac1c..a1f2a877b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/Platform454Filter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/Platform454Filter.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.filters; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; /** @@ -37,6 +38,6 @@ import org.broadinstitute.sting.utils.sam.ReadUtils; public class Platform454Filter extends ReadFilter { public boolean filterOut(SAMRecord rec) { - return (ReadUtils.is454Read(rec)); + return (ReadUtils.is454Read((GATKSAMRecord)rec)); } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java index 8e241bb2c..de5be94bc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.filters; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; /** @@ -41,7 +42,7 @@ public class PlatformFilter extends ReadFilter { public boolean filterOut(SAMRecord rec) { for ( String name : PLFilterNames ) - if ( ReadUtils.isPlatformRead(rec, name.toUpperCase() )) + if ( ReadUtils.isPlatformRead((GATKSAMRecord)rec, name.toUpperCase() )) return true; return false; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java index 795fc76ed..3c1bc338a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java @@ -30,6 +30,7 @@ import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.variant.variantcontext.Allele; @@ -421,7 +422,7 @@ public class HaplotypeIndelErrorModel { double[][] haplotypeLikehoodMatrix = new double[haplotypesInVC.size()][haplotypesInVC.size()]; double readLikelihoods[][] = new double[pileup.getReads().size()][haplotypesInVC.size()]; int i=0; - for (SAMRecord read : pileup.getReads()) { + for (GATKSAMRecord read : pileup.getReads()) { if(ReadUtils.is454Read(read)) { continue; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 5d19ac5e8..15d3f43fd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -529,7 +529,7 @@ public class IndelRealigner extends ReadWalker { sawReadInCurrentInterval = false; } - private boolean doNotTryToClean(SAMRecord read) { + private boolean doNotTryToClean(GATKSAMRecord read) { return read.getReadUnmappedFlag() || read.getNotPrimaryAlignmentFlag() || read.getReadFailsVendorQualityCheckFlag() || @@ -835,7 +835,7 @@ public class IndelRealigner extends ReadWalker { // TODO -- get rid of this try block when Picard does the right thing for reads aligned off the end of the reference try { if ( read.getAttribute(SAMTag.NM.name()) != null ) - read.setAttribute(SAMTag.NM.name(), SequenceUtil.calculateSamNmTag(read, reference, leftmostIndex-1)); + read.setAttribute(SAMTag.NM.name(), SequenceUtil.calculateSamNmTag(read, reference, leftmostIndex - 1)); if ( read.getAttribute(SAMTag.UQ.name()) != null ) read.setAttribute(SAMTag.UQ.name(), SequenceUtil.sumQualitiesOfMismatches(read, reference, leftmostIndex-1)); } catch (Exception e) { diff --git a/public/java/src/org/broadinstitute/sting/utils/NGSPlatform.java b/public/java/src/org/broadinstitute/sting/utils/NGSPlatform.java index 504704e55..847d8067c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/NGSPlatform.java +++ b/public/java/src/org/broadinstitute/sting/utils/NGSPlatform.java @@ -24,8 +24,7 @@ package org.broadinstitute.sting.utils; -import net.sf.samtools.SAMReadGroupRecord; -import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; /** * A canonical, master list of the standard NGS platforms. These values @@ -64,25 +63,15 @@ public enum NGSPlatform { } /** - * Convenience constructor -- calculates the NGSPlatfrom from a SAMRecord. - * Note you should not use this function if you have a GATKSAMRecord -- use the - * accessor method instead. + * Convenience get -- get the NGSPlatfrom from a SAMRecord. * - * @param read + * Just gets the platform from the GATKReadGroupRecord associated with this read. + * + * @param read a GATKSAMRecord * @return an NGSPlatform object matching the PL field of the header, of UNKNOWN if there was no match */ - public static final NGSPlatform fromRead(SAMRecord read) { - return fromReadGroup(read.getReadGroup()); - } - - /** - * Returns the NGSPlatform corresponding to the PL tag in the read group - * @param rg - * @return an NGSPlatform object matching the PL field of the header, of UNKNOWN if there was no match - */ - public static final NGSPlatform fromReadGroup(SAMReadGroupRecord rg) { - if ( rg == null ) return UNKNOWN; - return fromReadGroupPL(rg.getPlatform()); + public static NGSPlatform fromRead(GATKSAMRecord read) { + return read.getReadGroup().getNGSPlatform(); } /** @@ -90,7 +79,7 @@ public enum NGSPlatform { * @param plFromRG -- the PL field (or equivalent) in a ReadGroup object * @return an NGSPlatform object matching the PL field of the header, or UNKNOWN if there was no match */ - public static final NGSPlatform fromReadGroupPL(final String plFromRG) { + public static NGSPlatform fromReadGroupPL(final String plFromRG) { if ( plFromRG == null ) return UNKNOWN; // todo -- algorithm could be implemented more efficiently, as the list of all @@ -113,7 +102,7 @@ public enum NGSPlatform { * @param platform the read group string that describes the platform used * @return true if the platform is known (i.e. it's in the list and is not UNKNOWN) */ - public static final boolean isKnown (final String platform) { + public static final boolean isKnown(final String platform) { return fromReadGroupPL(platform) != UNKNOWN; } } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java index bb99156fe..5f70ced92 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMReadGroupRecord.java @@ -12,9 +12,6 @@ import org.broadinstitute.sting.utils.NGSPlatform; * */ public class GATKSAMReadGroupRecord extends SAMReadGroupRecord { - - public static final String LANE_TAG = "LN"; - // the SAMReadGroupRecord data we're caching private String mSample = null; private String mPlatform = null; @@ -33,46 +30,14 @@ public class GATKSAMReadGroupRecord extends SAMReadGroupRecord { super(record.getReadGroupId(), record); } - public GATKSAMReadGroupRecord(SAMReadGroupRecord record, NGSPlatform pl) { - super(record.getReadGroupId(), record); - setPlatform(pl.getDefaultPlatform()); - mNGSPlatform = pl; - retrievedPlatform = retrievedNGSPlatform = true; - } - - /////////////////////////////////////////////////////////////////////////////// - // *** The following methods are overloaded to cache the appropriate data ***// - /////////////////////////////////////////////////////////////////////////////// - - public String getSample() { - if ( !retrievedSample ) { - mSample = super.getSample(); - retrievedSample = true; - } - return mSample; - } - - public void setSample(String s) { - super.setSample(s); - mSample = s; - retrievedSample = true; - } - - public String getPlatform() { - if ( !retrievedPlatform ) { - mPlatform = super.getPlatform(); - retrievedPlatform = true; - } - return mPlatform; - } - - public void setPlatform(String s) { - super.setPlatform(s); - mPlatform = s; - retrievedPlatform = true; - retrievedNGSPlatform = false; // recalculate the NGSPlatform - } - + /** + * Get the NGSPlatform enum telling us the platform of this read group + * + * This function call is caching, so subsequent calls to it are free, while + * the first time it's called there's a bit of work to resolve the enum + * + * @return an NGSPlatform enum value + */ public NGSPlatform getNGSPlatform() { if ( ! retrievedNGSPlatform ) { mNGSPlatform = NGSPlatform.fromReadGroupPL(getPlatform()); @@ -82,11 +47,40 @@ public class GATKSAMReadGroupRecord extends SAMReadGroupRecord { return mNGSPlatform; } - public String getLane() { - return this.getAttribute(LANE_TAG); + /////////////////////////////////////////////////////////////////////////////// + // *** The following methods are overloaded to cache the appropriate data ***// + /////////////////////////////////////////////////////////////////////////////// + + @Override + public String getSample() { + if ( !retrievedSample ) { + mSample = super.getSample(); + retrievedSample = true; + } + return mSample; } - - public void setLane(String lane) { - this.setAttribute(LANE_TAG, lane); + + @Override + public void setSample(String s) { + super.setSample(s); + mSample = s; + retrievedSample = true; + } + + @Override + public String getPlatform() { + if ( !retrievedPlatform ) { + mPlatform = super.getPlatform(); + retrievedPlatform = true; + } + return mPlatform; + } + + @Override + public void setPlatform(String s) { + super.setPlatform(s); + mPlatform = s; + retrievedPlatform = true; + retrievedNGSPlatform = false; // recalculate the NGSPlatform } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index 9fdb48b34..a83aca8f3 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -25,9 +25,9 @@ package org.broadinstitute.sting.utils.sam; import net.sf.samtools.*; -import org.broadinstitute.sting.utils.recalibration.EventType; import org.broadinstitute.sting.utils.NGSPlatform; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.recalibration.EventType; import java.util.Arrays; import java.util.HashMap; @@ -141,16 +141,26 @@ public class GATKSAMRecord extends BAMRecord { mReadString = s; } + /** + * Get the GATKSAMReadGroupRecord of this read + * @return a non-null GATKSAMReadGroupRecord + */ @Override public GATKSAMReadGroupRecord getReadGroup() { - if ( !retrievedReadGroup ) { - SAMReadGroupRecord tempReadGroup = super.getReadGroup(); - mReadGroup = (tempReadGroup == null ? null : new GATKSAMReadGroupRecord(tempReadGroup)); + if ( ! retrievedReadGroup ) { + mReadGroup = (GATKSAMReadGroupRecord)super.getReadGroup(); retrievedReadGroup = true; } return mReadGroup; } + public void setReadGroup( final GATKSAMReadGroupRecord readGroup ) { + mReadGroup = readGroup; + retrievedReadGroup = true; + setAttribute("RG", mReadGroup.getId()); // todo -- this should be standardized, but we don't have access to SAMTagUtils! + } + + @Override public int hashCode() { return super.hashCode(); @@ -259,12 +269,6 @@ public class GATKSAMRecord extends BAMRecord { return getReadGroup().getNGSPlatform(); } - public void setReadGroup( final GATKSAMReadGroupRecord readGroup ) { - mReadGroup = readGroup; - retrievedReadGroup = true; - setAttribute("RG", mReadGroup.getId()); // todo -- this should be standardized, but we don't have access to SAMTagUtils! - } - /////////////////////////////////////////////////////////////////////////////// // *** ReduceReads functions ***// /////////////////////////////////////////////////////////////////////////////// diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index bd908727f..263cd9bd1 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -226,7 +226,7 @@ public class ReadUtils { * @param read the read to test * @return checks the read group tag PL for the default 454 tag */ - public static boolean is454Read(SAMRecord read) { + public static boolean is454Read(GATKSAMRecord read) { return NGSPlatform.fromRead(read) == NGSPlatform.LS454; } @@ -236,7 +236,7 @@ public class ReadUtils { * @param read the read to test * @return checks the read group tag PL for the default ion tag */ - public static boolean isIonRead(SAMRecord read) { + public static boolean isIonRead(GATKSAMRecord read) { return NGSPlatform.fromRead(read) == NGSPlatform.ION_TORRENT; } @@ -246,7 +246,7 @@ public class ReadUtils { * @param read the read to test * @return checks the read group tag PL for the default SOLiD tag */ - public static boolean isSOLiDRead(SAMRecord read) { + public static boolean isSOLiDRead(GATKSAMRecord read) { return NGSPlatform.fromRead(read) == NGSPlatform.SOLID; } @@ -256,7 +256,7 @@ public class ReadUtils { * @param read the read to test * @return checks the read group tag PL for the default SLX tag */ - public static boolean isIlluminaRead(SAMRecord read) { + public static boolean isIlluminaRead(GATKSAMRecord read) { return NGSPlatform.fromRead(read) == NGSPlatform.ILLUMINA; } @@ -268,7 +268,7 @@ public class ReadUtils { * @param name the upper-cased platform name to test * @return whether or not name == PL tag in the read group of read */ - public static boolean isPlatformRead(SAMRecord read, String name) { + public static boolean isPlatformRead(GATKSAMRecord read, String name) { SAMReadGroupRecord readGroup = read.getReadGroup(); if (readGroup != null) {