laying groundwork to have insertions and deletions going through the system.
This commit is contained in:
parent
0d3ea0401c
commit
e89887cd8e
|
|
@ -36,6 +36,7 @@ import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
@ -284,7 +285,7 @@ public class RecalDataManager {
|
||||||
public static void parseColorSpace(final GATKSAMRecord read) {
|
public static void parseColorSpace(final GATKSAMRecord read) {
|
||||||
|
|
||||||
// If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
|
// If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
|
||||||
if (read.getReadGroup().getPlatform().toUpperCase().contains("SOLID")) {
|
if (ReadUtils.isSOLiDRead(read)) {
|
||||||
if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read
|
if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read
|
||||||
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
|
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
|
||||||
if (attr != null) {
|
if (attr != null) {
|
||||||
|
|
@ -382,7 +383,7 @@ public class RecalDataManager {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean checkNoCallColorSpace(final GATKSAMRecord read) {
|
public static boolean checkNoCallColorSpace(final GATKSAMRecord read) {
|
||||||
if (read.getReadGroup().getPlatform().toUpperCase().contains("SOLID")) {
|
if (ReadUtils.isSOLiDRead(read)) {
|
||||||
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
|
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
|
||||||
if (attr != null) {
|
if (attr != null) {
|
||||||
byte[] colorSpace;
|
byte[] colorSpace;
|
||||||
|
|
@ -611,21 +612,17 @@ public class RecalDataManager {
|
||||||
final Comparable[][] covariateValues_offset_x_covar = new Comparable[readLength][numRequestedCovariates];
|
final Comparable[][] covariateValues_offset_x_covar = new Comparable[readLength][numRequestedCovariates];
|
||||||
final Comparable[] tempCovariateValuesHolder = new Comparable[readLength];
|
final Comparable[] tempCovariateValuesHolder = new Comparable[readLength];
|
||||||
|
|
||||||
// Loop through the list of requested covariates and compute the values of each covariate for all positions in this read
|
for (int i = 0; i < numRequestedCovariates; i++) { // Loop through the list of requested covariates and compute the values of each covariate for all positions in this read
|
||||||
for (int i = 0; i < numRequestedCovariates; i++) {
|
|
||||||
requestedCovariates.get(i).getValues(gatkRead, tempCovariateValuesHolder, modelType);
|
requestedCovariates.get(i).getValues(gatkRead, tempCovariateValuesHolder, modelType);
|
||||||
for (int j = 0; j < readLength; j++) {
|
for (int j = 0; j < readLength; j++)
|
||||||
//copy values into a 2D array that allows all covar types to be extracted at once for
|
covariateValues_offset_x_covar[j][i] = tempCovariateValuesHolder[j]; // copy values into a 2D array that allows all covar types to be extracted at once for an offset j by doing covariateValues_offset_x_covar[j]. This avoids the need to later iterate over covar types.
|
||||||
//an offset j by doing covariateValues_offset_x_covar[j]. This avoids the need to later iterate over covar types.
|
|
||||||
covariateValues_offset_x_covar[j][i] = tempCovariateValuesHolder[j];
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return covariateValues_offset_x_covar;
|
return covariateValues_offset_x_covar;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Perform a ceratin transversion (A <-> C or G <-> T) on the base.
|
* Perform a certain transversion (A <-> C or G <-> T) on the base.
|
||||||
*
|
*
|
||||||
* @param base the base [AaCcGgTt]
|
* @param base the base [AaCcGgTt]
|
||||||
* @return the transversion of the base, or the input base if it's not one of the understood ones
|
* @return the transversion of the base, or the input base if it's not one of the understood ones
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* BaseUtils contains some basic utilities for manipulating nucleotides.
|
* BaseUtils contains some basic utilities for manipulating nucleotides.
|
||||||
*/
|
*/
|
||||||
|
|
@ -29,21 +28,25 @@ public class BaseUtils {
|
||||||
|
|
||||||
byte b;
|
byte b;
|
||||||
int index;
|
int index;
|
||||||
|
|
||||||
private Base(char base, int index) {
|
private Base(char base, int index) {
|
||||||
this.b = (byte) base;
|
this.b = (byte) base;
|
||||||
this.index = index;
|
this.index = index;
|
||||||
}
|
}
|
||||||
|
|
||||||
public byte getBase() { return b; }
|
public byte getBase() { return b; }
|
||||||
|
|
||||||
public char getBaseAsChar() { return (char) b; }
|
public char getBaseAsChar() { return (char) b; }
|
||||||
|
|
||||||
public int getIndex() { return index; }
|
public int getIndex() { return index; }
|
||||||
|
|
||||||
public boolean sameBase(byte o) { return b == o; }
|
public boolean sameBase(byte o) { return b == o; }
|
||||||
|
|
||||||
public boolean sameBase(char o) { return b == (byte) o; }
|
public boolean sameBase(char o) { return b == (byte) o; }
|
||||||
|
|
||||||
public boolean sameBase(int i) { return index == i; }
|
public boolean sameBase(int i) { return index == i; }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// todo -- fix me (enums?)
|
// todo -- fix me (enums?)
|
||||||
public static final byte DELETION_INDEX = 4;
|
public static final byte DELETION_INDEX = 4;
|
||||||
public static final byte NO_CALL_INDEX = 5; // (this is 'N')
|
public static final byte NO_CALL_INDEX = 5; // (this is 'N')
|
||||||
|
|
@ -53,7 +56,6 @@ public class BaseUtils {
|
||||||
public static int aIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'A');
|
public static int aIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'A');
|
||||||
public static int tIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'T');
|
public static int tIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'T');
|
||||||
|
|
||||||
|
|
||||||
/// In genetics, a transition is a mutation changing a purine to another purine nucleotide (A <-> G) or
|
/// In genetics, a transition is a mutation changing a purine to another purine nucleotide (A <-> G) or
|
||||||
// a pyrimidine to another pyrimidine nucleotide (C <-> T).
|
// a pyrimidine to another pyrimidine nucleotide (C <-> T).
|
||||||
// Approximately two out of every three single nucleotide polymorphisms (SNPs) are transitions.
|
// Approximately two out of every three single nucleotide polymorphisms (SNPs) are transitions.
|
||||||
|
|
@ -64,6 +66,7 @@ public class BaseUtils {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the base substitution type of the 2 state SNP
|
* Returns the base substitution type of the 2 state SNP
|
||||||
|
*
|
||||||
* @param base1
|
* @param base1
|
||||||
* @param base2
|
* @param base2
|
||||||
* @return
|
* @return
|
||||||
|
|
@ -85,7 +88,9 @@ public class BaseUtils {
|
||||||
return !isTransition(base1, base2);
|
return !isTransition(base1, base2);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Private constructor. No instantiating this class! */
|
/**
|
||||||
|
* Private constructor. No instantiating this class!
|
||||||
|
*/
|
||||||
private BaseUtils() {}
|
private BaseUtils() {}
|
||||||
|
|
||||||
static public boolean basesAreEqual(byte base1, byte base2) {
|
static public boolean basesAreEqual(byte base1, byte base2) {
|
||||||
|
|
@ -96,7 +101,6 @@ public class BaseUtils {
|
||||||
return extendedBaseToBaseIndex(base1) == extendedBaseToBaseIndex(base2);
|
return extendedBaseToBaseIndex(base1) == extendedBaseToBaseIndex(base2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Converts a IUPAC nucleotide code to a pair of bases
|
* Converts a IUPAC nucleotide code to a pair of bases
|
||||||
*
|
*
|
||||||
|
|
@ -170,22 +174,26 @@ public class BaseUtils {
|
||||||
switch (base) {
|
switch (base) {
|
||||||
case '*': // the wildcard character counts as an A
|
case '*': // the wildcard character counts as an A
|
||||||
case 'A':
|
case 'A':
|
||||||
case 'a': return 0;
|
case 'a':
|
||||||
|
return 0;
|
||||||
|
|
||||||
case 'C':
|
case 'C':
|
||||||
case 'c': return 1;
|
case 'c':
|
||||||
|
return 1;
|
||||||
|
|
||||||
case 'G':
|
case 'G':
|
||||||
case 'g': return 2;
|
case 'g':
|
||||||
|
return 2;
|
||||||
|
|
||||||
case 'T':
|
case 'T':
|
||||||
case 't': return 3;
|
case 't':
|
||||||
|
return 3;
|
||||||
|
|
||||||
default: return -1;
|
default:
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Converts a simple base to a base index
|
* Converts a simple base to a base index
|
||||||
*
|
*
|
||||||
|
|
@ -197,29 +205,37 @@ public class BaseUtils {
|
||||||
switch (base) {
|
switch (base) {
|
||||||
case '*': // the wildcard character counts as an A
|
case '*': // the wildcard character counts as an A
|
||||||
case 'A':
|
case 'A':
|
||||||
case 'a': return 0;
|
case 'a':
|
||||||
|
return 0;
|
||||||
|
|
||||||
case 'C':
|
case 'C':
|
||||||
case 'c': return 1;
|
case 'c':
|
||||||
|
return 1;
|
||||||
|
|
||||||
case 'G':
|
case 'G':
|
||||||
case 'g': return 2;
|
case 'g':
|
||||||
|
return 2;
|
||||||
|
|
||||||
case 'T':
|
case 'T':
|
||||||
case 't': return 3;
|
case 't':
|
||||||
|
return 3;
|
||||||
|
|
||||||
default: return -1;
|
default:
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static public int extendedBaseToBaseIndex(byte base) {
|
static public int extendedBaseToBaseIndex(byte base) {
|
||||||
switch (base) {
|
switch (base) {
|
||||||
case 'd':
|
case 'd':
|
||||||
case 'D': return DELETION_INDEX;
|
case 'D':
|
||||||
|
return DELETION_INDEX;
|
||||||
case 'n':
|
case 'n':
|
||||||
case 'N': return NO_CALL_INDEX;
|
case 'N':
|
||||||
|
return NO_CALL_INDEX;
|
||||||
|
|
||||||
default: return simpleBaseToBaseIndex(base);
|
default:
|
||||||
|
return simpleBaseToBaseIndex(base);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -232,11 +248,6 @@ public class BaseUtils {
|
||||||
return simpleBaseToBaseIndex(base) != -1;
|
return simpleBaseToBaseIndex(base) != -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Deprecated
|
|
||||||
static public boolean isNBase(char base) {
|
|
||||||
return isNBase((byte)base);
|
|
||||||
}
|
|
||||||
|
|
||||||
static public boolean isNBase(byte base) {
|
static public boolean isNBase(byte base) {
|
||||||
return base == 'N' || base == 'n';
|
return base == 'N' || base == 'n';
|
||||||
}
|
}
|
||||||
|
|
@ -249,11 +260,16 @@ public class BaseUtils {
|
||||||
*/
|
*/
|
||||||
static public byte baseIndexToSimpleBase(int baseIndex) {
|
static public byte baseIndexToSimpleBase(int baseIndex) {
|
||||||
switch (baseIndex) {
|
switch (baseIndex) {
|
||||||
case 0: return 'A';
|
case 0:
|
||||||
case 1: return 'C';
|
return 'A';
|
||||||
case 2: return 'G';
|
case 1:
|
||||||
case 3: return 'T';
|
return 'C';
|
||||||
default: return '.';
|
case 2:
|
||||||
|
return 'G';
|
||||||
|
case 3:
|
||||||
|
return 'T';
|
||||||
|
default:
|
||||||
|
return '.';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -270,11 +286,16 @@ public class BaseUtils {
|
||||||
*/
|
*/
|
||||||
static public int crossTalkPartnerIndex(int baseIndex) {
|
static public int crossTalkPartnerIndex(int baseIndex) {
|
||||||
switch (baseIndex) {
|
switch (baseIndex) {
|
||||||
case 0: return 1; // A -> C
|
case 0:
|
||||||
case 1: return 0; // C -> A
|
return 1; // A -> C
|
||||||
case 2: return 3; // G -> T
|
case 1:
|
||||||
case 3: return 2; // T -> G
|
return 0; // C -> A
|
||||||
default: return -1;
|
case 2:
|
||||||
|
return 3; // G -> T
|
||||||
|
case 3:
|
||||||
|
return 2; // T -> G
|
||||||
|
default:
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -297,11 +318,16 @@ public class BaseUtils {
|
||||||
*/
|
*/
|
||||||
static public byte complementIndex(int baseIndex) {
|
static public byte complementIndex(int baseIndex) {
|
||||||
switch (baseIndex) {
|
switch (baseIndex) {
|
||||||
case 0: return 3; // a -> t
|
case 0:
|
||||||
case 1: return 2; // c -> g
|
return 3; // a -> t
|
||||||
case 2: return 1; // g -> c
|
case 1:
|
||||||
case 3: return 0; // t -> a
|
return 2; // c -> g
|
||||||
default: return -1; // wtf?
|
case 2:
|
||||||
|
return 1; // g -> c
|
||||||
|
case 3:
|
||||||
|
return 0; // t -> a
|
||||||
|
default:
|
||||||
|
return -1; // wtf?
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -314,14 +340,19 @@ public class BaseUtils {
|
||||||
static public byte simpleComplement(byte base) {
|
static public byte simpleComplement(byte base) {
|
||||||
switch (base) {
|
switch (base) {
|
||||||
case 'A':
|
case 'A':
|
||||||
case 'a': return 'T';
|
case 'a':
|
||||||
|
return 'T';
|
||||||
case 'C':
|
case 'C':
|
||||||
case 'c': return 'G';
|
case 'c':
|
||||||
|
return 'G';
|
||||||
case 'G':
|
case 'G':
|
||||||
case 'g': return 'C';
|
case 'g':
|
||||||
|
return 'C';
|
||||||
case 'T':
|
case 'T':
|
||||||
case 't': return 'A';
|
case 't':
|
||||||
default: return base;
|
return 'A';
|
||||||
|
default:
|
||||||
|
return base;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -407,7 +438,6 @@ public class BaseUtils {
|
||||||
return new String(simpleReverseComplement(bases.getBytes()));
|
return new String(simpleReverseComplement(bases.getBytes()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Complement a String of bases. Preserves ambiguous bases.
|
* Complement a String of bases. Preserves ambiguous bases.
|
||||||
*
|
*
|
||||||
|
|
@ -532,8 +562,8 @@ public class BaseUtils {
|
||||||
return BaseUtils.baseIndexToSimpleBase(getRandomBaseIndex(BaseUtils.simpleBaseToBaseIndex(excludeBase)));
|
return BaseUtils.baseIndexToSimpleBase(getRandomBaseIndex(BaseUtils.simpleBaseToBaseIndex(excludeBase)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
/** Computes the smallest period >= minPeriod for the specified string. The period is defined as such p,
|
* Computes the smallest period >= minPeriod for the specified string. The period is defined as such p,
|
||||||
* that for all i = 0... seq.length-1, seq[ i % p ] = seq[i] (or equivalently seq[i] = seq[i+p] for i=0...seq.length-1-p).
|
* that for all i = 0... seq.length-1, seq[ i % p ] = seq[i] (or equivalently seq[i] = seq[i+p] for i=0...seq.length-1-p).
|
||||||
* The sequence does <i>not</i> have to contain whole number of periods. For instance, "ACACACAC" has a period
|
* The sequence does <i>not</i> have to contain whole number of periods. For instance, "ACACACAC" has a period
|
||||||
* of 2 (it has a period of 4 as well), and so does
|
* of 2 (it has a period of 4 as well), and so does
|
||||||
|
|
@ -554,9 +584,7 @@ public class BaseUtils {
|
||||||
int offset = pos % period; // we are currenlty 'offset' bases into the putative repeat of period 'period'
|
int offset = pos % period; // we are currenlty 'offset' bases into the putative repeat of period 'period'
|
||||||
// if our current hypothesis holds, base[pos] must be the same as base[offset]
|
// if our current hypothesis holds, base[pos] must be the same as base[offset]
|
||||||
|
|
||||||
if ( Character.toUpperCase( seq[pos] ) !=
|
if (Character.toUpperCase(seq[pos]) != Character.toUpperCase(seq[offset])) {
|
||||||
Character.toUpperCase( seq[offset] )
|
|
||||||
) {
|
|
||||||
|
|
||||||
// period we have been trying so far does not work.
|
// period we have been trying so far does not work.
|
||||||
// two possibilities:
|
// two possibilities:
|
||||||
|
|
@ -569,8 +597,10 @@ public class BaseUtils {
|
||||||
// we set period to pos (thus trying the hypothesis that bases from start up to the current one,
|
// we set period to pos (thus trying the hypothesis that bases from start up to the current one,
|
||||||
// non-inclusive are repeated hereafter), and decrement pos (this will re-test current base against the first base
|
// non-inclusive are repeated hereafter), and decrement pos (this will re-test current base against the first base
|
||||||
// on the next loop re-entrance after pos is autoincremented)
|
// on the next loop re-entrance after pos is autoincremented)
|
||||||
if ( offset == 0 ) period = pos+1;
|
if (offset == 0)
|
||||||
else period = pos-- ;
|
period = pos + 1;
|
||||||
|
else
|
||||||
|
period = pos--;
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue