BaseQualityScoreRecalibration walker (bqsr v2) first commit includes
* Adding the context covariate standard in both modes (including old CountCovariates) with parameters * Updating all covariates and modules to use GATKSAMRecord throughout the code. * BQSR now processes indels in the pileup (but doesn't do anything with them yet)
This commit is contained in:
parent
0717c79901
commit
6e6f0f10e1
|
|
@ -25,8 +25,9 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
|
@ -38,23 +39,27 @@ import java.util.Arrays;
|
||||||
|
|
||||||
public class ContextCovariate implements ExperimentalCovariate {
|
public class ContextCovariate implements ExperimentalCovariate {
|
||||||
|
|
||||||
final int CONTEXT_SIZE = 8;
|
private int CONTEXT_SIZE;
|
||||||
String allN = "";
|
private String allN = "";
|
||||||
|
|
||||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||||
@Override
|
@Override
|
||||||
public void initialize(final RecalibrationArgumentCollection RAC) {
|
public void initialize(final RecalibrationArgumentCollection RAC) {
|
||||||
for( int iii = 0; iii < CONTEXT_SIZE; iii++ ) {
|
CONTEXT_SIZE = RAC.CONTEXT_SIZE;
|
||||||
|
|
||||||
|
if (CONTEXT_SIZE <= 0)
|
||||||
|
throw new UserException("Context Size must be positive, if you don't want to use the context covariate, just turn it off instead");
|
||||||
|
|
||||||
|
// initialize allN given the size of the context
|
||||||
|
for (int i = 0; i < CONTEXT_SIZE; i++)
|
||||||
allN += "N";
|
allN += "N";
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
||||||
byte[] bases = read.getReadBases();
|
byte[] bases = read.getReadBases();
|
||||||
for(int i = 0; i < read.getReadLength(); i++) {
|
for (int i = 0; i < read.getReadLength(); i++)
|
||||||
comparable[i] = ( i-CONTEXT_SIZE < 0 ? allN : new String(Arrays.copyOfRange(bases,i-CONTEXT_SIZE,i)) );
|
comparable[i] = (i < CONTEXT_SIZE) ? allN : new String(Arrays.copyOfRange(bases, i - CONTEXT_SIZE, i));
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2009 The Broad Institute
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
|
@ -40,18 +40,17 @@ import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||||
|
|
||||||
public interface Covariate {
|
public interface Covariate {
|
||||||
public void initialize(RecalibrationArgumentCollection RAC); // Initialize any member variables using the command-line arguments passed to the walkers
|
public void initialize(RecalibrationArgumentCollection RAC); // Initialize any member variables using the command-line arguments passed to the walkers
|
||||||
|
|
||||||
public Comparable getValue(String str); // Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
public Comparable getValue(String str); // Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
||||||
public void getValues( SAMRecord read, Comparable[] comparable, BaseRecalibration.BaseRecalibrationType modelType );
|
|
||||||
|
public void getValues(GATKSAMRecord read, Comparable[] comparable, BaseRecalibration.BaseRecalibrationType modelType);
|
||||||
//Takes an array of size (at least) read.getReadLength() and fills it with covariate
|
//Takes an array of size (at least) read.getReadLength() and fills it with covariate
|
||||||
//values for each position in the read. This method was created as an optimization over calling getValue( read, offset ) for each offset and allows
|
//values for each position in the read. This method was created as an optimization over calling getValue( read, offset ) for each offset and allows
|
||||||
//read-specific calculations to be done just once rather than for each offset.
|
//read-specific calculations to be done just once rather than for each offset.
|
||||||
}
|
}
|
||||||
|
|
||||||
interface RequiredCovariate extends Covariate {
|
interface RequiredCovariate extends Covariate {}
|
||||||
}
|
|
||||||
|
|
||||||
interface StandardCovariate extends Covariate {
|
interface StandardCovariate extends Covariate {}
|
||||||
}
|
|
||||||
|
|
||||||
interface ExperimentalCovariate extends Covariate {
|
interface ExperimentalCovariate extends Covariate {}
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.NGSPlatform;
|
import org.broadinstitute.sting.utils.NGSPlatform;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
@ -58,7 +57,8 @@ public class CycleCovariate implements StandardCovariate {
|
||||||
if (RAC.DEFAULT_PLATFORM.equalsIgnoreCase("SLX") || RAC.DEFAULT_PLATFORM.equalsIgnoreCase("ILLUMINA") ||
|
if (RAC.DEFAULT_PLATFORM.equalsIgnoreCase("SLX") || RAC.DEFAULT_PLATFORM.equalsIgnoreCase("ILLUMINA") ||
|
||||||
RAC.DEFAULT_PLATFORM.contains("454") || RAC.DEFAULT_PLATFORM.equalsIgnoreCase("SOLID") || RAC.DEFAULT_PLATFORM.equalsIgnoreCase("ABI_SOLID")) {
|
RAC.DEFAULT_PLATFORM.contains("454") || RAC.DEFAULT_PLATFORM.equalsIgnoreCase("SOLID") || RAC.DEFAULT_PLATFORM.equalsIgnoreCase("ABI_SOLID")) {
|
||||||
// nothing to do
|
// nothing to do
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
throw new UserException.CommandLineException("The requested default platform (" + RAC.DEFAULT_PLATFORM + ") is not a recognized platform. Implemented options are illumina, 454, and solid");
|
throw new UserException.CommandLineException("The requested default platform (" + RAC.DEFAULT_PLATFORM + ") is not a recognized platform. Implemented options are illumina, 454, and solid");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -66,13 +66,13 @@ public class CycleCovariate implements StandardCovariate {
|
||||||
|
|
||||||
// Used to pick out the covariate's value from attributes of the read
|
// Used to pick out the covariate's value from attributes of the read
|
||||||
@Override
|
@Override
|
||||||
public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
||||||
|
|
||||||
//-----------------------------
|
//-----------------------------
|
||||||
// Illumina, Solid, PacBio, and Complete Genomics
|
// Illumina, Solid, PacBio, and Complete Genomics
|
||||||
//-----------------------------
|
//-----------------------------
|
||||||
|
|
||||||
final NGSPlatform ngsPlatform = ((GATKSAMRecord)read).getNGSPlatform();
|
final NGSPlatform ngsPlatform = read.getNGSPlatform();
|
||||||
if (DISCRETE_CYCLE_PLATFORMS.contains(ngsPlatform)) {
|
if (DISCRETE_CYCLE_PLATFORMS.contains(ngsPlatform)) {
|
||||||
final int init;
|
final int init;
|
||||||
final int increment;
|
final int increment;
|
||||||
|
|
@ -88,21 +88,20 @@ public class CycleCovariate implements StandardCovariate {
|
||||||
init = -1;
|
init = -1;
|
||||||
increment = -1;
|
increment = -1;
|
||||||
}
|
}
|
||||||
else
|
else {
|
||||||
{
|
|
||||||
//first of pair, positive strand
|
//first of pair, positive strand
|
||||||
init = 1;
|
init = 1;
|
||||||
increment = 1;
|
increment = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
if (read.getReadPairedFlag() && read.getSecondOfPairFlag()) {
|
if (read.getReadPairedFlag() && read.getSecondOfPairFlag()) {
|
||||||
//second of pair, negative strand
|
//second of pair, negative strand
|
||||||
init = -read.getReadLength();
|
init = -read.getReadLength();
|
||||||
increment = 1;
|
increment = 1;
|
||||||
}
|
}
|
||||||
else
|
else {
|
||||||
{
|
|
||||||
//first of pair, negative strand
|
//first of pair, negative strand
|
||||||
init = read.getReadLength();
|
init = read.getReadLength();
|
||||||
increment = -1;
|
increment = -1;
|
||||||
|
|
@ -138,26 +137,65 @@ public class CycleCovariate implements StandardCovariate {
|
||||||
// For example, AAAAAAA was probably read in two flow cycles but here we count it as one
|
// For example, AAAAAAA was probably read in two flow cycles but here we count it as one
|
||||||
if (!read.getReadNegativeStrandFlag()) { // Forward direction
|
if (!read.getReadNegativeStrandFlag()) { // Forward direction
|
||||||
int iii = 0;
|
int iii = 0;
|
||||||
while( iii < readLength )
|
while (iii < readLength) {
|
||||||
{
|
while (iii < readLength && bases[iii] == (byte) 'T') {
|
||||||
while( iii < readLength && bases[iii] == (byte)'T' ) { comparable[iii] = cycle; iii++; }
|
comparable[iii] = cycle;
|
||||||
while( iii < readLength && bases[iii] == (byte)'A' ) { comparable[iii] = cycle; iii++; }
|
iii++;
|
||||||
while( iii < readLength && bases[iii] == (byte)'C' ) { comparable[iii] = cycle; iii++; }
|
}
|
||||||
while( iii < readLength && bases[iii] == (byte)'G' ) { comparable[iii] = cycle; iii++; }
|
while (iii < readLength && bases[iii] == (byte) 'A') {
|
||||||
if( iii < readLength ) { if (multiplyByNegative1) cycle--; else cycle++; }
|
comparable[iii] = cycle;
|
||||||
if( iii < readLength && !BaseUtils.isRegularBase(bases[iii]) ) { comparable[iii] = cycle; iii++; }
|
iii++;
|
||||||
|
}
|
||||||
|
while (iii < readLength && bases[iii] == (byte) 'C') {
|
||||||
|
comparable[iii] = cycle;
|
||||||
|
iii++;
|
||||||
|
}
|
||||||
|
while (iii < readLength && bases[iii] == (byte) 'G') {
|
||||||
|
comparable[iii] = cycle;
|
||||||
|
iii++;
|
||||||
|
}
|
||||||
|
if (iii < readLength) {
|
||||||
|
if (multiplyByNegative1)
|
||||||
|
cycle--;
|
||||||
|
else
|
||||||
|
cycle++;
|
||||||
|
}
|
||||||
|
if (iii < readLength && !BaseUtils.isRegularBase(bases[iii])) {
|
||||||
|
comparable[iii] = cycle;
|
||||||
|
iii++;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
} else { // Negative direction
|
}
|
||||||
|
else { // Negative direction
|
||||||
int iii = readLength - 1;
|
int iii = readLength - 1;
|
||||||
while( iii >= 0 )
|
while (iii >= 0) {
|
||||||
{
|
while (iii >= 0 && bases[iii] == (byte) 'T') {
|
||||||
while( iii >= 0 && bases[iii] == (byte)'T' ) { comparable[iii] = cycle; iii--; }
|
comparable[iii] = cycle;
|
||||||
while( iii >= 0 && bases[iii] == (byte)'A' ) { comparable[iii] = cycle; iii--; }
|
iii--;
|
||||||
while( iii >= 0 && bases[iii] == (byte)'C' ) { comparable[iii] = cycle; iii--; }
|
}
|
||||||
while( iii >= 0 && bases[iii] == (byte)'G' ) { comparable[iii] = cycle; iii--; }
|
while (iii >= 0 && bases[iii] == (byte) 'A') {
|
||||||
if( iii >= 0 ) { if (multiplyByNegative1) cycle--; else cycle++; }
|
comparable[iii] = cycle;
|
||||||
if( iii >= 0 && !BaseUtils.isRegularBase(bases[iii]) ) { comparable[iii] = cycle; iii--; }
|
iii--;
|
||||||
|
}
|
||||||
|
while (iii >= 0 && bases[iii] == (byte) 'C') {
|
||||||
|
comparable[iii] = cycle;
|
||||||
|
iii--;
|
||||||
|
}
|
||||||
|
while (iii >= 0 && bases[iii] == (byte) 'G') {
|
||||||
|
comparable[iii] = cycle;
|
||||||
|
iii--;
|
||||||
|
}
|
||||||
|
if (iii >= 0) {
|
||||||
|
if (multiplyByNegative1)
|
||||||
|
cycle--;
|
||||||
|
else
|
||||||
|
cycle++;
|
||||||
|
}
|
||||||
|
if (iii >= 0 && !BaseUtils.isRegularBase(bases[iii])) {
|
||||||
|
comparable[iii] = cycle;
|
||||||
|
iii--;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
|
||||||
|
|
@ -66,7 +66,7 @@ public class DinucCovariate implements StandardCovariate {
|
||||||
* Takes an array of size (at least) read.getReadLength() and fills it with the covariate values for each position in the read.
|
* Takes an array of size (at least) read.getReadLength() and fills it with the covariate values for each position in the read.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
||||||
final HashMap<Integer, Dinuc> dinucHashMapRef = this.dinucHashMap; //optimize access to dinucHashMap
|
final HashMap<Integer, Dinuc> dinucHashMapRef = this.dinucHashMap; //optimize access to dinucHashMap
|
||||||
final int readLength = read.getReadLength();
|
final int readLength = read.getReadLength();
|
||||||
final boolean negativeStrand = read.getReadNegativeStrandFlag();
|
final boolean negativeStrand = read.getReadNegativeStrandFlag();
|
||||||
|
|
@ -89,7 +89,8 @@ public class DinucCovariate implements StandardCovariate {
|
||||||
base = bases[offset];
|
base = bases[offset];
|
||||||
if (BaseUtils.isRegularBase(prevBase)) {
|
if (BaseUtils.isRegularBase(prevBase)) {
|
||||||
comparable[offset] = dinucHashMapRef.get(Dinuc.hashBytes(prevBase, base));
|
comparable[offset] = dinucHashMapRef.get(Dinuc.hashBytes(prevBase, base));
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
comparable[offset] = NO_DINUC;
|
comparable[offset] = NO_DINUC;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -115,7 +116,7 @@ public class DinucCovariate implements StandardCovariate {
|
||||||
/**
|
/**
|
||||||
* Reverses the given array in place.
|
* Reverses the given array in place.
|
||||||
*
|
*
|
||||||
* @param array
|
* @param array any array
|
||||||
*/
|
*/
|
||||||
private static void reverse(final Comparable[] array) {
|
private static void reverse(final Comparable[] array) {
|
||||||
final int arrayLength = array.length;
|
final int arrayLength = array.length;
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2010 The Broad Institute
|
* Copyright (c) 2010 The Broad Institute
|
||||||
|
|
@ -39,7 +40,7 @@ import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||||
|
|
||||||
public class GCContentCovariate implements ExperimentalCovariate {
|
public class GCContentCovariate implements ExperimentalCovariate {
|
||||||
|
|
||||||
int numBack = 7;
|
private int numBack = 7;
|
||||||
|
|
||||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -48,20 +49,21 @@ public class GCContentCovariate implements ExperimentalCovariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used to pick out the covariate's value from attributes of the read
|
// Used to pick out the covariate's value from attributes of the read
|
||||||
private final Comparable getValue( final SAMRecord read, final int offset ) {
|
private Comparable getValue(final SAMRecord read, final int offset) {
|
||||||
|
|
||||||
// ATTGCCCCGTAAAAAAAGAGAA
|
// ATTGCCCCGTAAAAAAAGAGAA
|
||||||
// 0000123456654321001122
|
// 0000123456654321001122
|
||||||
|
|
||||||
if (read.getReadGroup().getPlatform().equalsIgnoreCase("ILLUMINA") || read.getReadGroup().getPlatform().equalsIgnoreCase("SLX")) {
|
if (read.getReadGroup().getPlatform().equalsIgnoreCase("ILLUMINA") || read.getReadGroup().getPlatform().equalsIgnoreCase("SLX")) {
|
||||||
int numGC = 0;
|
int numGC = 0;
|
||||||
int startPos = 0;
|
int startPos;
|
||||||
int stopPos = 0;
|
int stopPos;
|
||||||
final byte[] bases = read.getReadBases();
|
final byte[] bases = read.getReadBases();
|
||||||
if (!read.getReadNegativeStrandFlag()) { // Forward direction
|
if (!read.getReadNegativeStrandFlag()) { // Forward direction
|
||||||
startPos = Math.max(offset - numBack, 0);
|
startPos = Math.max(offset - numBack, 0);
|
||||||
stopPos = Math.max(offset - 1, 0);
|
stopPos = Math.max(offset - 1, 0);
|
||||||
} else { // Negative direction
|
}
|
||||||
|
else { // Negative direction
|
||||||
startPos = Math.min(offset + 2, bases.length);
|
startPos = Math.min(offset + 2, bases.length);
|
||||||
stopPos = Math.min(offset + numBack + 1, bases.length);
|
stopPos = Math.min(offset + numBack + 1, bases.length);
|
||||||
}
|
}
|
||||||
|
|
@ -73,13 +75,14 @@ public class GCContentCovariate implements ExperimentalCovariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
return numGC;
|
return numGC;
|
||||||
} else { // This effect is specific to the Illumina platform
|
}
|
||||||
|
else { // This effect is specific to the Illumina platform
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
||||||
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
||||||
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2009 The Broad Institute
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
|
@ -41,7 +42,7 @@ import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||||
|
|
||||||
public class HomopolymerCovariate implements ExperimentalCovariate {
|
public class HomopolymerCovariate implements ExperimentalCovariate {
|
||||||
|
|
||||||
int numBack = 7;
|
private int numBack;
|
||||||
|
|
||||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -50,7 +51,7 @@ public class HomopolymerCovariate implements ExperimentalCovariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used to pick out the covariate's value from attributes of the read
|
// Used to pick out the covariate's value from attributes of the read
|
||||||
private final Comparable getValue( final SAMRecord read, final int offset ) {
|
private Comparable getValue(final SAMRecord read, final int offset) {
|
||||||
|
|
||||||
// This block of code is for if you don't want to only count consecutive bases
|
// This block of code is for if you don't want to only count consecutive bases
|
||||||
// ATTGCCCCGTAAAAAAAAATA
|
// ATTGCCCCGTAAAAAAAAATA
|
||||||
|
|
@ -82,7 +83,8 @@ public class HomopolymerCovariate implements ExperimentalCovariate {
|
||||||
numAgree++;
|
numAgree++;
|
||||||
iii++;
|
iii++;
|
||||||
}
|
}
|
||||||
} else { // Negative direction
|
}
|
||||||
|
else { // Negative direction
|
||||||
while (iii >= 1 && bases[iii] == bases[iii - 1] && numAgree < numBack) {
|
while (iii >= 1 && bases[iii] == bases[iii - 1] && numAgree < numBack) {
|
||||||
numAgree++;
|
numAgree++;
|
||||||
iii--;
|
iii--;
|
||||||
|
|
@ -93,7 +95,7 @@ public class HomopolymerCovariate implements ExperimentalCovariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
||||||
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
||||||
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2009 The Broad Institute
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
|
@ -44,7 +44,7 @@ public class MappingQualityCovariate implements ExperimentalCovariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used to pick out the covariate's value from attributes of the read
|
// Used to pick out the covariate's value from attributes of the read
|
||||||
private final Comparable getValue( final SAMRecord read, final int offset ) {
|
private Comparable getValue(final GATKSAMRecord read) {
|
||||||
return read.getMappingQuality();
|
return read.getMappingQuality();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -55,9 +55,9 @@ public class MappingQualityCovariate implements ExperimentalCovariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
||||||
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
||||||
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
comparable[iii] = getValue(read); // BUGBUG: this can be optimized
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2009 The Broad Institute
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
|
@ -48,7 +49,7 @@ public class MinimumNQSCovariate implements ExperimentalCovariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used to pick out the covariate's value from attributes of the read
|
// Used to pick out the covariate's value from attributes of the read
|
||||||
private final Comparable getValue( final SAMRecord read, final int offset ) {
|
private Comparable getValue(final SAMRecord read, final int offset) {
|
||||||
|
|
||||||
// Loop over the list of base quality scores in the window and find the minimum
|
// Loop over the list of base quality scores in the window and find the minimum
|
||||||
final byte[] quals = read.getBaseQualities();
|
final byte[] quals = read.getBaseQualities();
|
||||||
|
|
@ -64,7 +65,7 @@ public class MinimumNQSCovariate implements ExperimentalCovariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
||||||
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
||||||
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2009 The Broad Institute
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
|
@ -45,7 +46,7 @@ public class PositionCovariate implements ExperimentalCovariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used to pick out the covariate's value from attributes of the read
|
// Used to pick out the covariate's value from attributes of the read
|
||||||
private final Comparable getValue( final SAMRecord read, final int offset ) {
|
private Comparable getValue(final SAMRecord read, final int offset) {
|
||||||
int cycle = offset;
|
int cycle = offset;
|
||||||
if (read.getReadNegativeStrandFlag()) {
|
if (read.getReadNegativeStrandFlag()) {
|
||||||
cycle = read.getReadLength() - (offset + 1);
|
cycle = read.getReadLength() - (offset + 1);
|
||||||
|
|
@ -54,7 +55,7 @@ public class PositionCovariate implements ExperimentalCovariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
||||||
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
||||||
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2009 The Broad Institute
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
|
@ -46,21 +47,22 @@ public class PrimerRoundCovariate implements ExperimentalCovariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used to pick out the covariate's value from attributes of the read
|
// Used to pick out the covariate's value from attributes of the read
|
||||||
private final Comparable getValue( final SAMRecord read, final int offset ) {
|
private Comparable getValue(final SAMRecord read, final int offset) {
|
||||||
if (read.getReadGroup().getPlatform().equalsIgnoreCase("SOLID") || read.getReadGroup().getPlatform().equalsIgnoreCase("ABI_SOLID")) {
|
if (read.getReadGroup().getPlatform().equalsIgnoreCase("SOLID") || read.getReadGroup().getPlatform().equalsIgnoreCase("ABI_SOLID")) {
|
||||||
int pos = offset;
|
int pos = offset;
|
||||||
if (read.getReadNegativeStrandFlag()) {
|
if (read.getReadNegativeStrandFlag()) {
|
||||||
pos = read.getReadLength() - (offset + 1);
|
pos = read.getReadLength() - (offset + 1);
|
||||||
}
|
}
|
||||||
return pos % 5; // the primer round according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf
|
return pos % 5; // the primer round according to http://www3.appliedbiosystems.com/cms/groups/mcb_marketing/documents/generaldocuments/cms_057511.pdf
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
return 1; // nothing to do here because it is always the same
|
return 1; // nothing to do here because it is always the same
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
||||||
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
for (int iii = 0; iii < read.getReadLength(); iii++) {
|
||||||
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
comparable[iii] = getValue(read, iii); // BUGBUG: this can be optimized
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
|
@ -46,13 +46,14 @@ public class QualityScoreCovariate implements RequiredCovariate {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
||||||
if (modelType == BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION) {
|
if (modelType == BaseRecalibration.BaseRecalibrationType.BASE_SUBSTITUTION) {
|
||||||
byte[] baseQualities = read.getBaseQualities();
|
byte[] baseQualities = read.getBaseQualities();
|
||||||
for (int i = 0; i < read.getReadLength(); i++) {
|
for (int i = 0; i < read.getReadLength(); i++) {
|
||||||
comparable[i] = (int) baseQualities[i];
|
comparable[i] = (int) baseQualities[i];
|
||||||
}
|
}
|
||||||
} else { // model == BASE_INSERTION || model == BASE_DELETION
|
}
|
||||||
|
else { // model == BASE_INSERTION || model == BASE_DELETION
|
||||||
Arrays.fill(comparable, 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
|
Arrays.fill(comparable, 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
|
||||||
// be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
|
// be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||||
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2009 The Broad Institute
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
|
@ -38,15 +38,13 @@ import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
|
||||||
|
|
||||||
public class ReadGroupCovariate implements RequiredCovariate {
|
public class ReadGroupCovariate implements RequiredCovariate {
|
||||||
|
|
||||||
public static final String defaultReadGroup = "DefaultReadGroup";
|
|
||||||
|
|
||||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||||
@Override
|
@Override
|
||||||
public void initialize(final RecalibrationArgumentCollection RAC) {
|
public void initialize(final RecalibrationArgumentCollection RAC) {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void getValues( final SAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType ) {
|
public void getValues(final GATKSAMRecord read, final Comparable[] comparable, final BaseRecalibration.BaseRecalibrationType modelType) {
|
||||||
final String readGroupId = read.getReadGroup().getReadGroupId();
|
final String readGroupId = read.getReadGroup().getReadGroupId();
|
||||||
for (int i = 0; i < read.getReadLength(); i++) {
|
for (int i = 0; i < read.getReadLength(); i++) {
|
||||||
comparable[i] = readGroupId;
|
comparable[i] = readGroupId;
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,6 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
package org.broadinstitute.sting.gatk.walkers.recalibration;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
|
||||||
import net.sf.samtools.SAMUtils;
|
import net.sf.samtools.SAMUtils;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
|
@ -67,22 +66,36 @@ public class RecalDataManager {
|
||||||
private static boolean warnUserNullPlatform = false;
|
private static boolean warnUserNullPlatform = false;
|
||||||
|
|
||||||
public enum SOLID_RECAL_MODE {
|
public enum SOLID_RECAL_MODE {
|
||||||
/** Treat reference inserted bases as reference matching bases. Very unsafe! */
|
/**
|
||||||
|
* Treat reference inserted bases as reference matching bases. Very unsafe!
|
||||||
|
*/
|
||||||
DO_NOTHING,
|
DO_NOTHING,
|
||||||
/** Set reference inserted bases and the previous base (because of color space alignment details) to Q0. This is the default option. */
|
/**
|
||||||
|
* Set reference inserted bases and the previous base (because of color space alignment details) to Q0. This is the default option.
|
||||||
|
*/
|
||||||
SET_Q_ZERO,
|
SET_Q_ZERO,
|
||||||
/** In addition to setting the quality scores to zero, also set the base itself to 'N'. This is useful to visualize in IGV. */
|
/**
|
||||||
|
* In addition to setting the quality scores to zero, also set the base itself to 'N'. This is useful to visualize in IGV.
|
||||||
|
*/
|
||||||
SET_Q_ZERO_BASE_N,
|
SET_Q_ZERO_BASE_N,
|
||||||
/** Look at the color quality scores and probabilistically decide to change the reference inserted base to be the base which is implied by the original color space instead of the reference. */
|
/**
|
||||||
|
* Look at the color quality scores and probabilistically decide to change the reference inserted base to be the base which is implied by the original color space instead of the reference.
|
||||||
|
*/
|
||||||
REMOVE_REF_BIAS
|
REMOVE_REF_BIAS
|
||||||
}
|
}
|
||||||
|
|
||||||
public enum SOLID_NOCALL_STRATEGY {
|
public enum SOLID_NOCALL_STRATEGY {
|
||||||
/** When a no call is detected throw an exception to alert the user that recalibrating this SOLiD data is unsafe. This is the default option. */
|
/**
|
||||||
|
* When a no call is detected throw an exception to alert the user that recalibrating this SOLiD data is unsafe. This is the default option.
|
||||||
|
*/
|
||||||
THROW_EXCEPTION,
|
THROW_EXCEPTION,
|
||||||
/** Leave the read in the output bam completely untouched. This mode is only okay if the no calls are very rare. */
|
/**
|
||||||
|
* Leave the read in the output bam completely untouched. This mode is only okay if the no calls are very rare.
|
||||||
|
*/
|
||||||
LEAVE_READ_UNRECALIBRATED,
|
LEAVE_READ_UNRECALIBRATED,
|
||||||
/** Mark these reads as failing vendor quality checks so they can be filtered out by downstream analyses. */
|
/**
|
||||||
|
* Mark these reads as failing vendor quality checks so they can be filtered out by downstream analyses.
|
||||||
|
*/
|
||||||
PURGE_READ
|
PURGE_READ
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -102,7 +115,8 @@ public class RecalDataManager {
|
||||||
for (int iii = 0; iii < numCovariates - 2; iii++) { // readGroup and QualityScore aren't counted here, their tables are separate
|
for (int iii = 0; iii < numCovariates - 2; iii++) { // readGroup and QualityScore aren't counted here, their tables are separate
|
||||||
dataCollapsedByCovariate.add(new NestedHashMap());
|
dataCollapsedByCovariate.add(new NestedHashMap());
|
||||||
}
|
}
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
data = new NestedHashMap();
|
data = new NestedHashMap();
|
||||||
dataCollapsedReadGroup = null;
|
dataCollapsedReadGroup = null;
|
||||||
dataCollapsedQualityScore = null;
|
dataCollapsedQualityScore = null;
|
||||||
|
|
@ -112,6 +126,7 @@ public class RecalDataManager {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Add the given mapping to all of the collapsed hash tables
|
* Add the given mapping to all of the collapsed hash tables
|
||||||
|
*
|
||||||
* @param key The list of comparables that is the key for this mapping
|
* @param key The list of comparables that is the key for this mapping
|
||||||
* @param fullDatum The RecalDatum which is the data for this mapping
|
* @param fullDatum The RecalDatum which is the data for this mapping
|
||||||
* @param PRESERVE_QSCORES_LESS_THAN The threshold in report quality for adding to the aggregate collapsed table
|
* @param PRESERVE_QSCORES_LESS_THAN The threshold in report quality for adding to the aggregate collapsed table
|
||||||
|
|
@ -133,7 +148,8 @@ public class RecalDataManager {
|
||||||
collapsedDatum = (RecalDatum) dataCollapsedReadGroup.get(readGroupCollapsedKey);
|
collapsedDatum = (RecalDatum) dataCollapsedReadGroup.get(readGroupCollapsedKey);
|
||||||
if (collapsedDatum == null) {
|
if (collapsedDatum == null) {
|
||||||
dataCollapsedReadGroup.put(new RecalDatum(fullDatum), readGroupCollapsedKey);
|
dataCollapsedReadGroup.put(new RecalDatum(fullDatum), readGroupCollapsedKey);
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
collapsedDatum.combine(fullDatum); // using combine instead of increment in order to calculate overall aggregateQReported
|
collapsedDatum.combine(fullDatum); // using combine instead of increment in order to calculate overall aggregateQReported
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -144,7 +160,8 @@ public class RecalDataManager {
|
||||||
collapsedDatum = (RecalDatum) dataCollapsedQualityScore.get(qualityScoreCollapsedKey);
|
collapsedDatum = (RecalDatum) dataCollapsedQualityScore.get(qualityScoreCollapsedKey);
|
||||||
if (collapsedDatum == null) {
|
if (collapsedDatum == null) {
|
||||||
dataCollapsedQualityScore.put(new RecalDatum(fullDatum), qualityScoreCollapsedKey);
|
dataCollapsedQualityScore.put(new RecalDatum(fullDatum), qualityScoreCollapsedKey);
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
collapsedDatum.increment(fullDatum);
|
collapsedDatum.increment(fullDatum);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -158,7 +175,8 @@ public class RecalDataManager {
|
||||||
collapsedDatum = (RecalDatum) dataCollapsedByCovariate.get(iii).get(covariateCollapsedKey);
|
collapsedDatum = (RecalDatum) dataCollapsedByCovariate.get(iii).get(covariateCollapsedKey);
|
||||||
if (collapsedDatum == null) {
|
if (collapsedDatum == null) {
|
||||||
dataCollapsedByCovariate.get(iii).put(new RecalDatum(fullDatum), covariateCollapsedKey);
|
dataCollapsedByCovariate.get(iii).put(new RecalDatum(fullDatum), covariateCollapsedKey);
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
collapsedDatum.increment(fullDatum);
|
collapsedDatum.increment(fullDatum);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -168,6 +186,7 @@ public class RecalDataManager {
|
||||||
/**
|
/**
|
||||||
* Loop over all the collapsed tables and turn the recalDatums found there into an empirical quality score
|
* Loop over all the collapsed tables and turn the recalDatums found there into an empirical quality score
|
||||||
* that will be used in the sequential calculation in TableRecalibrationWalker
|
* that will be used in the sequential calculation in TableRecalibrationWalker
|
||||||
|
*
|
||||||
* @param smoothing The smoothing parameter that goes into empirical quality score calculation
|
* @param smoothing The smoothing parameter that goes into empirical quality score calculation
|
||||||
* @param maxQual At which value to cap the quality scores
|
* @param maxQual At which value to cap the quality scores
|
||||||
*/
|
*/
|
||||||
|
|
@ -187,7 +206,8 @@ public class RecalDataManager {
|
||||||
final Object val = data.get(comp);
|
final Object val = data.get(comp);
|
||||||
if (val instanceof RecalDatum) { // We are at the end of the nested hash maps
|
if (val instanceof RecalDatum) { // We are at the end of the nested hash maps
|
||||||
((RecalDatum) val).calcCombinedEmpiricalQuality(smoothing, maxQual);
|
((RecalDatum) val).calcCombinedEmpiricalQuality(smoothing, maxQual);
|
||||||
} else { // Another layer in the nested hash map
|
}
|
||||||
|
else { // Another layer in the nested hash map
|
||||||
recursivelyGenerateEmpiricalQualities((Map) val, smoothing, maxQual);
|
recursivelyGenerateEmpiricalQualities((Map) val, smoothing, maxQual);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -202,7 +222,8 @@ public class RecalDataManager {
|
||||||
data.clear(); // don't TableRecalibrate a non-required covariate if it only has one element because that correction has already been done ...
|
data.clear(); // don't TableRecalibrate a non-required covariate if it only has one element because that correction has already been done ...
|
||||||
// in a previous step of the sequential calculation model
|
// in a previous step of the sequential calculation model
|
||||||
}
|
}
|
||||||
} else { // Another layer in the nested hash map
|
}
|
||||||
|
else { // Another layer in the nested hash map
|
||||||
checkForSingletons((Map) val);
|
checkForSingletons((Map) val);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -210,25 +231,29 @@ public class RecalDataManager {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the appropriate collapsed table out of the set of all the tables held by this Object
|
* Get the appropriate collapsed table out of the set of all the tables held by this Object
|
||||||
|
*
|
||||||
* @param covariate Which covariate indexes the desired collapsed HashMap
|
* @param covariate Which covariate indexes the desired collapsed HashMap
|
||||||
* @return The desired collapsed HashMap
|
* @return The desired collapsed HashMap
|
||||||
*/
|
*/
|
||||||
public final NestedHashMap getCollapsedTable(final int covariate) {
|
public final NestedHashMap getCollapsedTable(final int covariate) {
|
||||||
if (covariate == 0) {
|
if (covariate == 0) {
|
||||||
return dataCollapsedReadGroup; // Table where everything except read group has been collapsed
|
return dataCollapsedReadGroup; // Table where everything except read group has been collapsed
|
||||||
} else if( covariate == 1 ) {
|
}
|
||||||
|
else if (covariate == 1) {
|
||||||
return dataCollapsedQualityScore; // Table where everything except read group and quality score has been collapsed
|
return dataCollapsedQualityScore; // Table where everything except read group and quality score has been collapsed
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
return dataCollapsedByCovariate.get(covariate - 2); // Table where everything except read group, quality score, and given covariate has been collapsed
|
return dataCollapsedByCovariate.get(covariate - 2); // Table where everything except read group, quality score, and given covariate has been collapsed
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Section of code shared between the two recalibration walkers which uses the command line arguments to adjust attributes of the read such as quals or platform string
|
* Section of code shared between the two recalibration walkers which uses the command line arguments to adjust attributes of the read such as quals or platform string
|
||||||
|
*
|
||||||
* @param read The read to adjust
|
* @param read The read to adjust
|
||||||
* @param RAC The list of shared command line arguments
|
* @param RAC The list of shared command line arguments
|
||||||
*/
|
*/
|
||||||
public static void parseSAMRecord( final SAMRecord read, final RecalibrationArgumentCollection RAC ) {
|
public static void parseSAMRecord(final GATKSAMRecord read, final RecalibrationArgumentCollection RAC) {
|
||||||
GATKSAMReadGroupRecord readGroup = ((GATKSAMRecord) read).getReadGroup();
|
GATKSAMReadGroupRecord readGroup = ((GATKSAMRecord) read).getReadGroup();
|
||||||
|
|
||||||
// If there are no read groups we have to default to something, and that something could be specified by the user using command line arguments
|
// If there are no read groups we have to default to something, and that something could be specified by the user using command line arguments
|
||||||
|
|
@ -244,7 +269,8 @@ public class RecalDataManager {
|
||||||
readGroup = new GATKSAMReadGroupRecord(RAC.DEFAULT_READ_GROUP);
|
readGroup = new GATKSAMReadGroupRecord(RAC.DEFAULT_READ_GROUP);
|
||||||
readGroup.setPlatform(RAC.DEFAULT_PLATFORM);
|
readGroup.setPlatform(RAC.DEFAULT_PLATFORM);
|
||||||
((GATKSAMRecord) read).setReadGroup(readGroup);
|
((GATKSAMRecord) read).setReadGroup(readGroup);
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
throw new UserException.MalformedBAM(read, "The input .bam file contains reads with no read group. First observed at read with name = " + read.getReadName());
|
throw new UserException.MalformedBAM(read, "The input .bam file contains reads with no read group. First observed at read with name = " + read.getReadName());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -269,7 +295,8 @@ public class RecalDataManager {
|
||||||
warnUserNullPlatform = true;
|
warnUserNullPlatform = true;
|
||||||
}
|
}
|
||||||
readGroup.setPlatform(RAC.DEFAULT_PLATFORM);
|
readGroup.setPlatform(RAC.DEFAULT_PLATFORM);
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
throw new UserException.MalformedBAM(read, "The input .bam file contains reads with no platform information. First observed at read with name = " + read.getReadName());
|
throw new UserException.MalformedBAM(read, "The input .bam file contains reads with no platform information. First observed at read with name = " + read.getReadName());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -277,9 +304,10 @@ public class RecalDataManager {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse through the color space of the read and add a new tag to the SAMRecord that says which bases are inconsistent with the color space
|
* Parse through the color space of the read and add a new tag to the SAMRecord that says which bases are inconsistent with the color space
|
||||||
|
*
|
||||||
* @param read The SAMRecord to parse
|
* @param read The SAMRecord to parse
|
||||||
*/
|
*/
|
||||||
public static void parseColorSpace( final SAMRecord read ) {
|
public static void parseColorSpace(final GATKSAMRecord read) {
|
||||||
|
|
||||||
// If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
|
// If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
|
||||||
if (read.getReadGroup().getPlatform().toUpperCase().contains("SOLID")) {
|
if (read.getReadGroup().getPlatform().toUpperCase().contains("SOLID")) {
|
||||||
|
|
@ -289,7 +317,8 @@ public class RecalDataManager {
|
||||||
byte[] colorSpace;
|
byte[] colorSpace;
|
||||||
if (attr instanceof String) {
|
if (attr instanceof String) {
|
||||||
colorSpace = ((String) attr).getBytes();
|
colorSpace = ((String) attr).getBytes();
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
throw new UserException.MalformedBAM(read, String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
|
throw new UserException.MalformedBAM(read, String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -308,7 +337,8 @@ public class RecalDataManager {
|
||||||
}
|
}
|
||||||
read.setAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency);
|
read.setAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency);
|
||||||
|
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
|
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
|
||||||
" Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
|
" Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
|
||||||
}
|
}
|
||||||
|
|
@ -319,20 +349,22 @@ public class RecalDataManager {
|
||||||
/**
|
/**
|
||||||
* Parse through the color space of the read and apply the desired --solid_recal_mode correction to the bases
|
* Parse through the color space of the read and apply the desired --solid_recal_mode correction to the bases
|
||||||
* This method doesn't add the inconsistent tag to the read like parseColorSpace does
|
* This method doesn't add the inconsistent tag to the read like parseColorSpace does
|
||||||
|
*
|
||||||
* @param read The SAMRecord to parse
|
* @param read The SAMRecord to parse
|
||||||
* @param originalQualScores The array of original quality scores to modify during the correction
|
* @param originalQualScores The array of original quality scores to modify during the correction
|
||||||
* @param solidRecalMode Which mode of solid recalibration to apply
|
* @param solidRecalMode Which mode of solid recalibration to apply
|
||||||
* @param refBases The reference for this read
|
* @param refBases The reference for this read
|
||||||
* @return A new array of quality scores that have been ref bias corrected
|
* @return A new array of quality scores that have been ref bias corrected
|
||||||
*/
|
*/
|
||||||
public static byte[] calcColorSpace( final SAMRecord read, byte[] originalQualScores, final SOLID_RECAL_MODE solidRecalMode, final byte[] refBases ) {
|
public static byte[] calcColorSpace(final GATKSAMRecord read, byte[] originalQualScores, final SOLID_RECAL_MODE solidRecalMode, final byte[] refBases) {
|
||||||
|
|
||||||
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
|
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
|
||||||
if (attr != null) {
|
if (attr != null) {
|
||||||
byte[] colorSpace;
|
byte[] colorSpace;
|
||||||
if (attr instanceof String) {
|
if (attr instanceof String) {
|
||||||
colorSpace = ((String) attr).getBytes();
|
colorSpace = ((String) attr).getBytes();
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
throw new ReviewedStingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
|
throw new ReviewedStingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -357,14 +389,17 @@ public class RecalDataManager {
|
||||||
if (solidRecalMode == SOLID_RECAL_MODE.SET_Q_ZERO) { // Set inconsistent bases and the one before it to Q0
|
if (solidRecalMode == SOLID_RECAL_MODE.SET_Q_ZERO) { // Set inconsistent bases and the one before it to Q0
|
||||||
final boolean setBaseN = false;
|
final boolean setBaseN = false;
|
||||||
originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, setBaseN);
|
originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, setBaseN);
|
||||||
} else if( solidRecalMode == SOLID_RECAL_MODE.SET_Q_ZERO_BASE_N ) {
|
}
|
||||||
|
else if (solidRecalMode == SOLID_RECAL_MODE.SET_Q_ZERO_BASE_N) {
|
||||||
final boolean setBaseN = true;
|
final boolean setBaseN = true;
|
||||||
originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, setBaseN);
|
originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, setBaseN);
|
||||||
} else if( solidRecalMode == SOLID_RECAL_MODE.REMOVE_REF_BIAS ) { // Use the color space quality to probabilistically remove ref bases at inconsistent color space bases
|
}
|
||||||
|
else if (solidRecalMode == SOLID_RECAL_MODE.REMOVE_REF_BIAS) { // Use the color space quality to probabilistically remove ref bases at inconsistent color space bases
|
||||||
solidRecalRemoveRefBias(read, readBases, inconsistency, colorImpliedBases, refBasesDirRead);
|
solidRecalRemoveRefBias(read, readBases, inconsistency, colorImpliedBases, refBasesDirRead);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
|
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
|
||||||
" Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
|
" Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
|
||||||
}
|
}
|
||||||
|
|
@ -372,14 +407,15 @@ public class RecalDataManager {
|
||||||
return originalQualScores;
|
return originalQualScores;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean checkNoCallColorSpace( final SAMRecord read ) {
|
public static boolean checkNoCallColorSpace(final GATKSAMRecord read) {
|
||||||
if (read.getReadGroup().getPlatform().toUpperCase().contains("SOLID")) {
|
if (read.getReadGroup().getPlatform().toUpperCase().contains("SOLID")) {
|
||||||
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
|
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
|
||||||
if (attr != null) {
|
if (attr != null) {
|
||||||
byte[] colorSpace;
|
byte[] colorSpace;
|
||||||
if (attr instanceof String) {
|
if (attr instanceof String) {
|
||||||
colorSpace = ((String) attr).substring(1).getBytes(); // trim off the Sentinel
|
colorSpace = ((String) attr).substring(1).getBytes(); // trim off the Sentinel
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
throw new ReviewedStingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
|
throw new ReviewedStingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -389,7 +425,8 @@ public class RecalDataManager {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
|
throw new UserException.MalformedBAM(read, "Unable to find color space information in SOLiD read. First observed at read with name = " + read.getReadName() +
|
||||||
" Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
|
" Unfortunately this .bam file can not be recalibrated without color space information because of potential reference bias.");
|
||||||
}
|
}
|
||||||
|
|
@ -400,6 +437,7 @@ public class RecalDataManager {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Perform the SET_Q_ZERO solid recalibration. Inconsistent color space bases and their previous base are set to quality zero
|
* Perform the SET_Q_ZERO solid recalibration. Inconsistent color space bases and their previous base are set to quality zero
|
||||||
|
*
|
||||||
* @param read The SAMRecord to recalibrate
|
* @param read The SAMRecord to recalibrate
|
||||||
* @param readBases The bases in the read which have been RC'd if necessary
|
* @param readBases The bases in the read which have been RC'd if necessary
|
||||||
* @param inconsistency The array of 1/0 that says if this base is inconsistent with its color
|
* @param inconsistency The array of 1/0 that says if this base is inconsistent with its color
|
||||||
|
|
@ -408,22 +446,33 @@ public class RecalDataManager {
|
||||||
* @param setBaseN Should we also set the base to N as well as quality zero in order to visualize in IGV or something similar
|
* @param setBaseN Should we also set the base to N as well as quality zero in order to visualize in IGV or something similar
|
||||||
* @return The byte array of original quality scores some of which might have been set to zero
|
* @return The byte array of original quality scores some of which might have been set to zero
|
||||||
*/
|
*/
|
||||||
private static byte[] solidRecalSetToQZero( final SAMRecord read, byte[] readBases, final int[] inconsistency, final byte[] originalQualScores,
|
private static byte[] solidRecalSetToQZero(final GATKSAMRecord read, byte[] readBases, final int[] inconsistency, final byte[] originalQualScores, final byte[] refBases, final boolean setBaseN) {
|
||||||
final byte[] refBases, final boolean setBaseN ) {
|
|
||||||
|
|
||||||
final boolean negStrand = read.getReadNegativeStrandFlag();
|
final boolean negStrand = read.getReadNegativeStrandFlag();
|
||||||
for (int iii = 1; iii < originalQualScores.length; iii++) {
|
for (int iii = 1; iii < originalQualScores.length; iii++) {
|
||||||
if (inconsistency[iii] == 1) {
|
if (inconsistency[iii] == 1) {
|
||||||
if (readBases[iii] == refBases[iii]) {
|
if (readBases[iii] == refBases[iii]) {
|
||||||
if( negStrand ) { originalQualScores[originalQualScores.length-(iii+1)] = (byte)0; }
|
if (negStrand) {
|
||||||
else { originalQualScores[iii] = (byte)0; }
|
originalQualScores[originalQualScores.length - (iii + 1)] = (byte) 0;
|
||||||
if( setBaseN ) { readBases[iii] = (byte)'N'; }
|
}
|
||||||
|
else {
|
||||||
|
originalQualScores[iii] = (byte) 0;
|
||||||
|
}
|
||||||
|
if (setBaseN) {
|
||||||
|
readBases[iii] = (byte) 'N';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// Set the prev base to Q0 as well
|
// Set the prev base to Q0 as well
|
||||||
if (readBases[iii - 1] == refBases[iii - 1]) {
|
if (readBases[iii - 1] == refBases[iii - 1]) {
|
||||||
if( negStrand ) { originalQualScores[originalQualScores.length-iii] = (byte)0; }
|
if (negStrand) {
|
||||||
else { originalQualScores[iii-1] = (byte)0; }
|
originalQualScores[originalQualScores.length - iii] = (byte) 0;
|
||||||
if( setBaseN ) { readBases[iii-1] = (byte)'N'; }
|
}
|
||||||
|
else {
|
||||||
|
originalQualScores[iii - 1] = (byte) 0;
|
||||||
|
}
|
||||||
|
if (setBaseN) {
|
||||||
|
readBases[iii - 1] = (byte) 'N';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -437,14 +486,14 @@ public class RecalDataManager {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Peform the REMOVE_REF_BIAS solid recalibration. Look at the color space qualities and probabilistically decide if the base should be change to match the color or left as reference
|
* Peform the REMOVE_REF_BIAS solid recalibration. Look at the color space qualities and probabilistically decide if the base should be change to match the color or left as reference
|
||||||
|
*
|
||||||
* @param read The SAMRecord to recalibrate
|
* @param read The SAMRecord to recalibrate
|
||||||
* @param readBases The bases in the read which have been RC'd if necessary
|
* @param readBases The bases in the read which have been RC'd if necessary
|
||||||
* @param inconsistency The array of 1/0 that says if this base is inconsistent with its color
|
* @param inconsistency The array of 1/0 that says if this base is inconsistent with its color
|
||||||
* @param colorImpliedBases The bases implied by the color space, RC'd if necessary
|
* @param colorImpliedBases The bases implied by the color space, RC'd if necessary
|
||||||
* @param refBases The reference which has been RC'd if necessary
|
* @param refBases The reference which has been RC'd if necessary
|
||||||
*/
|
*/
|
||||||
private static void solidRecalRemoveRefBias( final SAMRecord read, byte[] readBases, final int[] inconsistency, final byte[] colorImpliedBases,
|
private static void solidRecalRemoveRefBias(final GATKSAMRecord read, byte[] readBases, final int[] inconsistency, final byte[] colorImpliedBases, final byte[] refBases) {
|
||||||
final byte[] refBases) {
|
|
||||||
|
|
||||||
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG);
|
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG);
|
||||||
if (attr != null) {
|
if (attr != null) {
|
||||||
|
|
@ -453,7 +502,8 @@ public class RecalDataManager {
|
||||||
String x = (String) attr;
|
String x = (String) attr;
|
||||||
colorSpaceQuals = x.getBytes();
|
colorSpaceQuals = x.getBytes();
|
||||||
SAMUtils.fastqToPhred(colorSpaceQuals);
|
SAMUtils.fastqToPhred(colorSpaceQuals);
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
throw new ReviewedStingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG, read.getReadName()));
|
throw new ReviewedStingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG, read.getReadName()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -467,7 +517,8 @@ public class RecalDataManager {
|
||||||
if (rand == 0) { // The color implied base won the coin flip
|
if (rand == 0) { // The color implied base won the coin flip
|
||||||
readBases[jjj] = colorImpliedBases[jjj];
|
readBases[jjj] = colorImpliedBases[jjj];
|
||||||
}
|
}
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
final int maxQuality = Math.max((int) colorSpaceQuals[jjj], (int) colorSpaceQuals[jjj + 1]);
|
final int maxQuality = Math.max((int) colorSpaceQuals[jjj], (int) colorSpaceQuals[jjj + 1]);
|
||||||
final int minQuality = Math.min((int) colorSpaceQuals[jjj], (int) colorSpaceQuals[jjj + 1]);
|
final int minQuality = Math.min((int) colorSpaceQuals[jjj], (int) colorSpaceQuals[jjj + 1]);
|
||||||
int diffInQuality = maxQuality - minQuality;
|
int diffInQuality = maxQuality - minQuality;
|
||||||
|
|
@ -482,7 +533,8 @@ public class RecalDataManager {
|
||||||
if (maxQuality == (int) colorSpaceQuals[jjj]) {
|
if (maxQuality == (int) colorSpaceQuals[jjj]) {
|
||||||
readBases[jjj] = colorImpliedBases[jjj];
|
readBases[jjj] = colorImpliedBases[jjj];
|
||||||
} // else ref color had higher q score, and won out, so nothing to do here
|
} // else ref color had higher q score, and won out, so nothing to do here
|
||||||
} else { // lower q score won
|
}
|
||||||
|
else { // lower q score won
|
||||||
if (minQuality == (int) colorSpaceQuals[jjj]) {
|
if (minQuality == (int) colorSpaceQuals[jjj]) {
|
||||||
readBases[jjj] = colorImpliedBases[jjj];
|
readBases[jjj] = colorImpliedBases[jjj];
|
||||||
} // else ref color had lower q score, and won out, so nothing to do here
|
} // else ref color had lower q score, and won out, so nothing to do here
|
||||||
|
|
@ -498,18 +550,20 @@ public class RecalDataManager {
|
||||||
readBases = BaseUtils.simpleReverseComplement(readBases.clone()); // Put the bases back in reverse order to stuff them back in the read
|
readBases = BaseUtils.simpleReverseComplement(readBases.clone()); // Put the bases back in reverse order to stuff them back in the read
|
||||||
}
|
}
|
||||||
read.setReadBases(readBases);
|
read.setReadBases(readBases);
|
||||||
} else { // No color space quality tag in file
|
}
|
||||||
|
else { // No color space quality tag in file
|
||||||
throw new UserException.MalformedBAM(read, "REMOVE_REF_BIAS recal mode requires color space qualities but they can't be found for read: " + read.getReadName());
|
throw new UserException.MalformedBAM(read, "REMOVE_REF_BIAS recal mode requires color space qualities but they can't be found for read: " + read.getReadName());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given the base and the color calculate the next base in the sequence
|
* Given the base and the color calculate the next base in the sequence
|
||||||
|
*
|
||||||
* @param prevBase The base
|
* @param prevBase The base
|
||||||
* @param color The color
|
* @param color The color
|
||||||
* @return The next base in the sequence
|
* @return The next base in the sequence
|
||||||
*/
|
*/
|
||||||
private static byte getNextBaseFromColor( SAMRecord read, final byte prevBase, final byte color ) {
|
private static byte getNextBaseFromColor(GATKSAMRecord read, final byte prevBase, final byte color) {
|
||||||
switch (color) {
|
switch (color) {
|
||||||
case '0':
|
case '0':
|
||||||
return prevBase;
|
return prevBase;
|
||||||
|
|
@ -527,18 +581,20 @@ public class RecalDataManager {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if this base is inconsistent with its color space. If it is then SOLID inserted the reference here and we should reduce the quality
|
* Check if this base is inconsistent with its color space. If it is then SOLID inserted the reference here and we should reduce the quality
|
||||||
|
*
|
||||||
* @param read The read which contains the color space to check against
|
* @param read The read which contains the color space to check against
|
||||||
* @param offset The offset in the read at which to check
|
* @param offset The offset in the read at which to check
|
||||||
* @return Returns true if the base was inconsistent with the color space
|
* @return Returns true if the base was inconsistent with the color space
|
||||||
*/
|
*/
|
||||||
public static boolean isInconsistentColorSpace( final SAMRecord read, final int offset ) {
|
public static boolean isInconsistentColorSpace(final GATKSAMRecord read, final int offset) {
|
||||||
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG);
|
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG);
|
||||||
if (attr != null) {
|
if (attr != null) {
|
||||||
final byte[] inconsistency = (byte[]) attr;
|
final byte[] inconsistency = (byte[]) attr;
|
||||||
// NOTE: The inconsistency array is in the direction of the read, not aligned to the reference!
|
// NOTE: The inconsistency array is in the direction of the read, not aligned to the reference!
|
||||||
if (read.getReadNegativeStrandFlag()) { // Negative direction
|
if (read.getReadNegativeStrandFlag()) { // Negative direction
|
||||||
return inconsistency[inconsistency.length - offset - 1] != (byte) 0;
|
return inconsistency[inconsistency.length - offset - 1] != (byte) 0;
|
||||||
} else { // Forward direction
|
}
|
||||||
|
else { // Forward direction
|
||||||
return inconsistency[offset] != (byte) 0;
|
return inconsistency[offset] != (byte) 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -557,7 +613,8 @@ public class RecalDataManager {
|
||||||
// }
|
// }
|
||||||
//}
|
//}
|
||||||
|
|
||||||
} else { // No inconsistency array, so nothing is inconsistent
|
}
|
||||||
|
else { // No inconsistency array, so nothing is inconsistent
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -574,8 +631,7 @@ public class RecalDataManager {
|
||||||
*/
|
*/
|
||||||
public static Comparable[][] computeCovariates(final GATKSAMRecord gatkRead, final List<Covariate> requestedCovariates, final BaseRecalibration.BaseRecalibrationType modelType) {
|
public static Comparable[][] computeCovariates(final GATKSAMRecord gatkRead, final List<Covariate> requestedCovariates, final BaseRecalibration.BaseRecalibrationType modelType) {
|
||||||
//compute all covariates for this read
|
//compute all covariates for this read
|
||||||
final List<Covariate> requestedCovariatesRef = requestedCovariates;
|
final int numRequestedCovariates = requestedCovariates.size();
|
||||||
final int numRequestedCovariates = requestedCovariatesRef.size();
|
|
||||||
final int readLength = gatkRead.getReadLength();
|
final int readLength = gatkRead.getReadLength();
|
||||||
|
|
||||||
final Comparable[][] covariateValues_offset_x_covar = new Comparable[readLength][numRequestedCovariates];
|
final Comparable[][] covariateValues_offset_x_covar = new Comparable[readLength][numRequestedCovariates];
|
||||||
|
|
@ -583,7 +639,7 @@ public class RecalDataManager {
|
||||||
|
|
||||||
// Loop through the list of requested covariates and compute the values of each covariate for all positions in this read
|
// Loop through the list of requested covariates and compute the values of each covariate for all positions in this read
|
||||||
for (int i = 0; i < numRequestedCovariates; i++) {
|
for (int i = 0; i < numRequestedCovariates; i++) {
|
||||||
requestedCovariatesRef.get(i).getValues( gatkRead, tempCovariateValuesHolder, modelType );
|
requestedCovariates.get(i).getValues(gatkRead, tempCovariateValuesHolder, modelType);
|
||||||
for (int j = 0; j < readLength; j++) {
|
for (int j = 0; j < readLength; j++) {
|
||||||
//copy values into a 2D array that allows all covar types to be extracted at once for
|
//copy values into a 2D array that allows all covar types to be extracted at once for
|
||||||
//an offset j by doing covariateValues_offset_x_covar[j]. This avoids the need to later iterate over covar types.
|
//an offset j by doing covariateValues_offset_x_covar[j]. This avoids the need to later iterate over covar types.
|
||||||
|
|
@ -603,14 +659,19 @@ public class RecalDataManager {
|
||||||
private static byte performColorOne(byte base) {
|
private static byte performColorOne(byte base) {
|
||||||
switch (base) {
|
switch (base) {
|
||||||
case 'A':
|
case 'A':
|
||||||
case 'a': return 'C';
|
case 'a':
|
||||||
|
return 'C';
|
||||||
case 'C':
|
case 'C':
|
||||||
case 'c': return 'A';
|
case 'c':
|
||||||
|
return 'A';
|
||||||
case 'G':
|
case 'G':
|
||||||
case 'g': return 'T';
|
case 'g':
|
||||||
|
return 'T';
|
||||||
case 'T':
|
case 'T':
|
||||||
case 't': return 'G';
|
case 't':
|
||||||
default: return base;
|
return 'G';
|
||||||
|
default:
|
||||||
|
return base;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -623,14 +684,19 @@ public class RecalDataManager {
|
||||||
private static byte performColorTwo(byte base) {
|
private static byte performColorTwo(byte base) {
|
||||||
switch (base) {
|
switch (base) {
|
||||||
case 'A':
|
case 'A':
|
||||||
case 'a': return 'G';
|
case 'a':
|
||||||
|
return 'G';
|
||||||
case 'C':
|
case 'C':
|
||||||
case 'c': return 'T';
|
case 'c':
|
||||||
|
return 'T';
|
||||||
case 'G':
|
case 'G':
|
||||||
case 'g': return 'A';
|
case 'g':
|
||||||
|
return 'A';
|
||||||
case 'T':
|
case 'T':
|
||||||
case 't': return 'C';
|
case 't':
|
||||||
default: return base;
|
return 'C';
|
||||||
|
default:
|
||||||
|
return base;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -643,14 +709,19 @@ public class RecalDataManager {
|
||||||
private static byte performColorThree(byte base) {
|
private static byte performColorThree(byte base) {
|
||||||
switch (base) {
|
switch (base) {
|
||||||
case 'A':
|
case 'A':
|
||||||
case 'a': return 'T';
|
case 'a':
|
||||||
|
return 'T';
|
||||||
case 'C':
|
case 'C':
|
||||||
case 'c': return 'G';
|
case 'c':
|
||||||
|
return 'G';
|
||||||
case 'G':
|
case 'G':
|
||||||
case 'g': return 'C';
|
case 'g':
|
||||||
|
return 'C';
|
||||||
case 'T':
|
case 'T':
|
||||||
case 't': return 'A';
|
case 't':
|
||||||
default: return base;
|
return 'A';
|
||||||
|
default:
|
||||||
|
return base;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -82,4 +82,11 @@ public class RecalibrationArgumentCollection {
|
||||||
*/
|
*/
|
||||||
@Argument(fullName = "solid_nocall_strategy", shortName = "solid_nocall_strategy", doc = "Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ", required = false)
|
@Argument(fullName = "solid_nocall_strategy", shortName = "solid_nocall_strategy", doc = "Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ", required = false)
|
||||||
public RecalDataManager.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION;
|
public RecalDataManager.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The context covariate will use a context of this size to calculate it's covariate value
|
||||||
|
*/
|
||||||
|
@Argument(fullName = "context_size", shortName = "cs", doc = "size of the k-mer context to be used", required = false)
|
||||||
|
int CONTEXT_SIZE = 8;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue