laying groundwork to have insertions and deletions going through the system.

2012-02-07 18:11:53 -05:00 · 2012-02-07 18:11:53 -05:00 · e89887cd8e
parent 0d3ea0401c
commit e89887cd8e
2 changed files with 163 additions and 136 deletions
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
@ -36,6 +36,7 @@ import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
 import org.broadinstitute.sting.utils.sam.AlignmentUtils;
 import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.broadinstitute.sting.utils.sam.ReadUtils;

 import java.util.ArrayList;
 import java.util.List;
@ -284,7 +285,7 @@ public class RecalDataManager {
    public static void parseColorSpace(final GATKSAMRecord read) {

        // If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
-        if (read.getReadGroup().getPlatform().toUpperCase().contains("SOLID")) {
+        if (ReadUtils.isSOLiDRead(read)) {
            if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read
                final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
                if (attr != null) {
@ -382,7 +383,7 @@ public class RecalDataManager {
    }

    public static boolean checkNoCallColorSpace(final GATKSAMRecord read) {
-        if (read.getReadGroup().getPlatform().toUpperCase().contains("SOLID")) {
+        if (ReadUtils.isSOLiDRead(read)) {
            final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
            if (attr != null) {
                byte[] colorSpace;
@ -611,21 +612,17 @@ public class RecalDataManager {
        final Comparable[][] covariateValues_offset_x_covar = new Comparable[readLength][numRequestedCovariates];
        final Comparable[] tempCovariateValuesHolder = new Comparable[readLength];

-        // Loop through the list of requested covariates and compute the values of each covariate for all positions in this read
-        for (int i = 0; i < numRequestedCovariates; i++) {
+        for (int i = 0; i < numRequestedCovariates; i++) {                              // Loop through the list of requested covariates and compute the values of each covariate for all positions in this read
            requestedCovariates.get(i).getValues(gatkRead, tempCovariateValuesHolder, modelType);
-            for (int j = 0; j < readLength; j++) {
-                //copy values into a 2D array that allows all covar types to be extracted at once for
-                //an offset j by doing covariateValues_offset_x_covar[j]. This avoids the need to later iterate over covar types.
-                covariateValues_offset_x_covar[j][i] = tempCovariateValuesHolder[j];
-            }
+            for (int j = 0; j < readLength; j++)
+                covariateValues_offset_x_covar[j][i] = tempCovariateValuesHolder[j];    // copy values into a 2D array that allows all covar types to be extracted at once for an offset j by doing covariateValues_offset_x_covar[j]. This avoids the need to later iterate over covar types.
        }

        return covariateValues_offset_x_covar;
    }

    /**
-     * Perform a ceratin transversion (A <-> C or G <-> T) on the base.
+     * Perform a certain transversion (A <-> C or G <-> T) on the base.
     *
     * @param base the base [AaCcGgTt]
     * @return the transversion of the base, or the input base if it's not one of the understood ones
--- a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java
@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils;

 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;

-
 /**
 * BaseUtils contains some basic utilities for manipulating nucleotides.
 */
@ -29,21 +28,25 @@ public class BaseUtils {

        byte b;
        int index;
+
        private Base(char base, int index) {
            this.b = (byte) base;
            this.index = index;
        }

        public byte getBase() { return b; }
+
        public char getBaseAsChar() { return (char) b; }
+
        public int getIndex() { return index; }

        public boolean sameBase(byte o) { return b == o; }
+
        public boolean sameBase(char o) { return b == (byte) o; }
+
        public boolean sameBase(int i) { return index == i; }
    }

-
    // todo -- fix me (enums?)
    public static final byte DELETION_INDEX = 4;
    public static final byte NO_CALL_INDEX = 5; // (this is 'N')
@ -53,7 +56,6 @@ public class BaseUtils {
    public static int aIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'A');
    public static int tIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'T');

-
    /// In genetics, a transition is a mutation changing a purine to another purine nucleotide (A <-> G) or
    // a pyrimidine to another pyrimidine nucleotide (C <-> T).
    // Approximately two out of every three single nucleotide polymorphisms (SNPs) are transitions.
@ -64,6 +66,7 @@ public class BaseUtils {

    /**
     * Returns the base substitution type of the 2 state SNP
+     *
     * @param base1
     * @param base2
     * @return
@ -85,7 +88,9 @@ public class BaseUtils {
        return !isTransition(base1, base2);
    }

-    /** Private constructor.  No instantiating this class! */
+    /**
+     * Private constructor.  No instantiating this class!
+     */
    private BaseUtils() {}

    static public boolean basesAreEqual(byte base1, byte base2) {
@ -96,7 +101,6 @@ public class BaseUtils {
        return extendedBaseToBaseIndex(base1) == extendedBaseToBaseIndex(base2);
    }

-
    /**
     * Converts a IUPAC nucleotide code to a pair of bases
     *
@ -170,22 +174,26 @@ public class BaseUtils {
        switch (base) {
            case '*':               // the wildcard character counts as an A
            case 'A':
-            case 'a': return 0;
+            case 'a':
+                return 0;

            case 'C':
-            case 'c': return 1;
+            case 'c':
+                return 1;

            case 'G':
-            case 'g': return 2;
+            case 'g':
+                return 2;

            case 'T':
-            case 't': return 3;
+            case 't':
+                return 3;

-            default: return -1;
+            default:
+                return -1;
        }
    }

-
    /**
     * Converts a simple base to a base index
     *
@ -197,29 +205,37 @@ public class BaseUtils {
        switch (base) {
            case '*':               // the wildcard character counts as an A
            case 'A':
-            case 'a': return 0;
+            case 'a':
+                return 0;

            case 'C':
-            case 'c': return 1;
+            case 'c':
+                return 1;

            case 'G':
-            case 'g': return 2;
+            case 'g':
+                return 2;

            case 'T':
-            case 't': return 3;
+            case 't':
+                return 3;

-            default: return -1;
+            default:
+                return -1;
        }
    }

    static public int extendedBaseToBaseIndex(byte base) {
        switch (base) {
            case 'd':
-            case 'D': return DELETION_INDEX;
+            case 'D':
+                return DELETION_INDEX;
            case 'n':
-            case 'N': return NO_CALL_INDEX;
+            case 'N':
+                return NO_CALL_INDEX;

-            default: return simpleBaseToBaseIndex(base);
+            default:
+                return simpleBaseToBaseIndex(base);
        }
    }

@ -232,11 +248,6 @@ public class BaseUtils {
        return simpleBaseToBaseIndex(base) != -1;
    }

-    @Deprecated
-    static public boolean isNBase(char base) {
-        return isNBase((byte)base);
-    }
-
    static public boolean isNBase(byte base) {
        return base == 'N' || base == 'n';
    }
@ -249,11 +260,16 @@ public class BaseUtils {
     */
    static public byte baseIndexToSimpleBase(int baseIndex) {
        switch (baseIndex) {
-            case 0: return 'A';
-            case 1: return 'C';
-            case 2: return 'G';
-            case 3: return 'T';
-            default: return '.';
+            case 0:
+                return 'A';
+            case 1:
+                return 'C';
+            case 2:
+                return 'G';
+            case 3:
+                return 'T';
+            default:
+                return '.';
        }
    }

@ -270,11 +286,16 @@ public class BaseUtils {
     */
    static public int crossTalkPartnerIndex(int baseIndex) {
        switch (baseIndex) {
-            case 0: return 1; // A -> C
-            case 1: return 0; // C -> A
-            case 2: return 3; // G -> T
-            case 3: return 2; // T -> G
-            default: return -1;
+            case 0:
+                return 1; // A -> C
+            case 1:
+                return 0; // C -> A
+            case 2:
+                return 3; // G -> T
+            case 3:
+                return 2; // T -> G
+            default:
+                return -1;
        }
    }

@ -297,11 +318,16 @@ public class BaseUtils {
     */
    static public byte complementIndex(int baseIndex) {
        switch (baseIndex) {
-            case 0: return 3; // a -> t
-            case 1: return 2; // c -> g
-            case 2: return 1; // g -> c
-            case 3: return 0; // t -> a
-            default: return -1; // wtf?
+            case 0:
+                return 3; // a -> t
+            case 1:
+                return 2; // c -> g
+            case 2:
+                return 1; // g -> c
+            case 3:
+                return 0; // t -> a
+            default:
+                return -1; // wtf?
        }
    }

@ -314,14 +340,19 @@ public class BaseUtils {
    static public byte simpleComplement(byte base) {
        switch (base) {
            case 'A':
-            case 'a': return 'T';
+            case 'a':
+                return 'T';
            case 'C':
-            case 'c': return 'G';
+            case 'c':
+                return 'G';
            case 'G':
-            case 'g': return 'C';
+            case 'g':
+                return 'C';
            case 'T':
-            case 't': return 'A';
-            default: return base;
+            case 't':
+                return 'A';
+            default:
+                return base;
        }
    }

@ -407,7 +438,6 @@ public class BaseUtils {
        return new String(simpleReverseComplement(bases.getBytes()));
    }

-
    /**
     * Complement a String of bases.  Preserves ambiguous bases.
     *
@ -532,8 +562,8 @@ public class BaseUtils {
        return BaseUtils.baseIndexToSimpleBase(getRandomBaseIndex(BaseUtils.simpleBaseToBaseIndex(excludeBase)));
    }

-    
-    /** Computes the smallest period >= minPeriod for the specified string. The period is defined as such p, 
+    /**
+     * Computes the smallest period >= minPeriod for the specified string. The period is defined as such p,
     * that for all  i = 0... seq.length-1,  seq[ i % p ] = seq[i] (or equivalently seq[i] = seq[i+p] for i=0...seq.length-1-p).
     * The sequence does <i>not</i> have to contain whole number of periods. For instance, "ACACACAC" has a period
     * of 2 (it has a period of 4 as well), and so does
@ -554,9 +584,7 @@ public class BaseUtils {
            int offset = pos % period; // we are currenlty 'offset' bases into the putative repeat of period 'period'
            // if our current hypothesis holds, base[pos] must be the same as base[offset]

-    		if ( Character.toUpperCase( seq[pos] ) !=
-    				Character.toUpperCase( seq[offset] )
-    			) {
+            if (Character.toUpperCase(seq[pos]) != Character.toUpperCase(seq[offset])) {

                // period we have been trying so far does not work.
                // two possibilities:
@ -569,8 +597,10 @@ public class BaseUtils {
                //     we set period to pos  (thus trying the hypothesis that bases from start up to the current one,
                //     non-inclusive are repeated hereafter), and decrement pos (this will re-test current base against the first base
                // on the next loop re-entrance after pos is autoincremented)
-    			if ( offset == 0 ) period = pos+1;
-    			else period = pos-- ;
+                if (offset == 0)
+                    period = pos + 1;
+                else
+                    period = pos--;

            }
        }