From bcfd9ce19a31b441821a88941d40f30cbea03d27 Mon Sep 17 00:00:00 2001 From: Phillip Dexheimer Date: Fri, 31 Oct 2014 22:27:34 -0400 Subject: [PATCH] Moved platform flow information into NGSPlatform * Explicitly added a type for rarely used platforms * PT 81767718 --- .../covariates/CycleCovariate.java | 10 ++--- .../gatk/utils/NGSPlatform.java | 38 ++++++++++++------- .../gatk/utils/SequencerFlowClass.java | 38 +++++++++++++++++++ 3 files changed, 65 insertions(+), 21 deletions(-) create mode 100644 public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/SequencerFlowClass.java diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/CycleCovariate.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/CycleCovariate.java index 627402acb..a76a13e4a 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/CycleCovariate.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/engine/recalibration/covariates/CycleCovariate.java @@ -55,11 +55,10 @@ import org.broadinstitute.gatk.engine.recalibration.ReadCovariates; import org.broadinstitute.gatk.engine.recalibration.RecalibrationArgumentCollection; import org.broadinstitute.gatk.utils.BaseUtils; import org.broadinstitute.gatk.utils.NGSPlatform; +import org.broadinstitute.gatk.utils.SequencerFlowClass; import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; -import java.util.EnumSet; - /* * Copyright (c) 2009 The Broad Institute * @@ -104,9 +103,6 @@ public class CycleCovariate implements StandardCovariate { public static final int CUSHION_FOR_INDELS = 4; private String default_platform = null; - private static final EnumSet DISCRETE_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.ILLUMINA, NGSPlatform.SOLID, NGSPlatform.PACBIO, NGSPlatform.COMPLETE_GENOMICS); - private static final EnumSet FLOW_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.LS454, NGSPlatform.ION_TORRENT); - // Initialize any member variables using the command-line arguments passed to the walkers @Override public void initialize(final RecalibrationArgumentCollection RAC) { @@ -126,7 +122,7 @@ public class CycleCovariate implements StandardCovariate { final NGSPlatform ngsPlatform = default_platform == null ? read.getNGSPlatform() : NGSPlatform.fromReadGroupPL(default_platform); // Discrete cycle platforms - if (DISCRETE_CYCLE_PLATFORMS.contains(ngsPlatform)) { + if (ngsPlatform.getSequencerType() == SequencerFlowClass.DISCRETE) { final int readOrderFactor = read.getReadPairedFlag() && read.getSecondOfPairFlag() ? -1 : 1; final int increment; int cycle; @@ -149,7 +145,7 @@ public class CycleCovariate implements StandardCovariate { } // Flow cycle platforms - else if (FLOW_CYCLE_PLATFORMS.contains(ngsPlatform)) { + else if (ngsPlatform.getSequencerType() == SequencerFlowClass.FLOW) { final byte[] bases = read.getReadBases(); diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/NGSPlatform.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/NGSPlatform.java index f0c40a064..829728835 100644 --- a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/NGSPlatform.java +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/NGSPlatform.java @@ -28,6 +28,7 @@ package org.broadinstitute.gatk.utils; import org.broadinstitute.gatk.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; +import java.util.Arrays; import java.util.LinkedList; import java.util.List; @@ -42,38 +43,48 @@ import java.util.List; public enum NGSPlatform { // note the order of elements here determines the order of matching operations, and therefore the // efficiency of getting a NGSPlatform from a string. - ILLUMINA("ILLUMINA", "SLX", "SOLEXA"), - SOLID("SOLID"), - LS454("454"), - COMPLETE_GENOMICS("COMPLETE"), - PACBIO("PACBIO"), - ION_TORRENT("IONTORRENT"), - CAPILLARY("CAPILLARY"), - HELICOS("HELICOS"), - UNKNOWN("UNKNOWN"); + ILLUMINA(SequencerFlowClass.DISCRETE, "ILLUMINA", "SLX", "SOLEXA"), + SOLID(SequencerFlowClass.DISCRETE, "SOLID"), + LS454(SequencerFlowClass.FLOW, "454", "LS454"), + COMPLETE_GENOMICS(SequencerFlowClass.DISCRETE, "COMPLETE"), + PACBIO(SequencerFlowClass.DISCRETE, "PACBIO"), + ION_TORRENT(SequencerFlowClass.FLOW, "IONTORRENT"), + CAPILLARY(SequencerFlowClass.OTHER, "CAPILLARY"), + HELICOS(SequencerFlowClass.OTHER, "HELICOS"), + UNKNOWN(SequencerFlowClass.OTHER, "UNKNOWN"); /** * Array of the prefix names in a BAM file for each of the platforms. */ protected final String[] BAM_PL_NAMES; + protected final SequencerFlowClass sequencerType; - NGSPlatform(final String... BAM_PL_NAMES) { + NGSPlatform(final SequencerFlowClass type, final String... BAM_PL_NAMES) { if ( BAM_PL_NAMES.length == 0 ) throw new IllegalStateException("Platforms must have at least one name"); for ( int i = 0; i < BAM_PL_NAMES.length; i++ ) BAM_PL_NAMES[i] = BAM_PL_NAMES[i].toUpperCase(); this.BAM_PL_NAMES = BAM_PL_NAMES; + this.sequencerType = type; } /** * Returns a representative PL string for this platform - * @return + * @return a representative PL string */ public final String getDefaultPlatform() { return BAM_PL_NAMES[0]; } + /** + * The broad "type" of sequencer this platform represents (discrete or flow) + * @return a SequencerFlowClass + */ + public final SequencerFlowClass getSequencerType() { + return sequencerType; + } + /** * Convenience get -- get the NGSPlatform from a GATKSAMRecord. * @@ -126,10 +137,9 @@ public enum NGSPlatform { * @return the list of platform names */ public static String knownPlatformsString() { - final List names = new LinkedList(); + final List names = new LinkedList<>(); for ( final NGSPlatform pl : values() ) { - for ( final String name : pl.BAM_PL_NAMES ) - names.add(name); + names.addAll(Arrays.asList(pl.BAM_PL_NAMES)); } return Utils.join(",", names); } diff --git a/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/SequencerFlowClass.java b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/SequencerFlowClass.java new file mode 100644 index 000000000..7290292e7 --- /dev/null +++ b/public/gatk-utils/src/main/java/org/broadinstitute/gatk/utils/SequencerFlowClass.java @@ -0,0 +1,38 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.utils; + +/** + * In broad terms, each sequencing platform can be classified by whether it flows nucleotides in some order + * such that homopolymers get sequenced in a single event (ie 454 or Ion) or it reads each position in the + * sequence one at a time, regardless of base composition (Illumina or Solid). This information is primarily + * useful in the BQSR process + */ +public enum SequencerFlowClass { + DISCRETE, + FLOW, + OTHER //Catch-all for unknown platforms, as well as relics that GATK doesn't handle well (Capillary, Helicos) +}