Added method to AlignmentUtils which takes a read's cigar and the refBases char array given to a ReadWalker and returns the aligned reference char array. Bug fix in solid_recal_modes to use this aligned reference array. Recalibrator version number is no longer separate for each of the two walkers.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2589 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
2a116bb5d6
commit
70df30fc1b
|
|
@ -102,7 +102,6 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
||||||
private long solidInsertedReferenceBases = 0; // Number of bases where we believe SOLID has inserted the reference because the color space is inconsistent with the read base
|
private long solidInsertedReferenceBases = 0; // Number of bases where we believe SOLID has inserted the reference because the color space is inconsistent with the read base
|
||||||
private long otherColorSpaceInconsistency = 0; // Number of bases where the color space is inconsistent with the read but the reference wasn't inserted.
|
private long otherColorSpaceInconsistency = 0; // Number of bases where the color space is inconsistent with the read but the reference wasn't inserted.
|
||||||
private int numUnprocessed = 0; // Number of consecutive loci skipped because we are only processing every Nth site
|
private int numUnprocessed = 0; // Number of consecutive loci skipped because we are only processing every Nth site
|
||||||
private static final String versionString = "v2.2.5"; // Major version, minor version, and build number
|
|
||||||
private Pair<Long, Long> dbSNP_counts = new Pair<Long, Long>(0L, 0L); // mismatch/base counts for dbSNP loci
|
private Pair<Long, Long> dbSNP_counts = new Pair<Long, Long>(0L, 0L); // mismatch/base counts for dbSNP loci
|
||||||
private Pair<Long, Long> novel_counts = new Pair<Long, Long>(0L, 0L); // mismatch/base counts for non-dbSNP loci
|
private Pair<Long, Long> novel_counts = new Pair<Long, Long>(0L, 0L); // mismatch/base counts for non-dbSNP loci
|
||||||
private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878)
|
private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878)
|
||||||
|
|
@ -121,7 +120,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
|
||||||
*/
|
*/
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
|
|
||||||
logger.info( "CovariateCounterWalker version: " + versionString );
|
logger.info( "Recalibrator version: " + RecalDataManager.versionString );
|
||||||
if( RAC.FORCE_READ_GROUP != null ) { RAC.DEFAULT_READ_GROUP = RAC.FORCE_READ_GROUP; }
|
if( RAC.FORCE_READ_GROUP != null ) { RAC.DEFAULT_READ_GROUP = RAC.FORCE_READ_GROUP; }
|
||||||
if( RAC.FORCE_PLATFORM != null ) { RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; }
|
if( RAC.FORCE_PLATFORM != null ) { RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; }
|
||||||
DBSNP_VALIDATION_CHECK_FREQUENCY *= PROCESS_EVERY_NTH_LOCUS;
|
DBSNP_VALIDATION_CHECK_FREQUENCY *= PROCESS_EVERY_NTH_LOCUS;
|
||||||
|
|
|
||||||
|
|
@ -57,6 +57,8 @@ public class RecalDataManager {
|
||||||
private static boolean warnUserNullReadGroup = false;
|
private static boolean warnUserNullReadGroup = false;
|
||||||
private static boolean warnUserNoColorSpace = false;
|
private static boolean warnUserNoColorSpace = false;
|
||||||
|
|
||||||
|
public static final String versionString = "v2.2.10"; // Major version, minor version, and build number
|
||||||
|
|
||||||
RecalDataManager() {
|
RecalDataManager() {
|
||||||
data = new NestedHashMap();
|
data = new NestedHashMap();
|
||||||
dataCollapsedReadGroup = null;
|
dataCollapsedReadGroup = null;
|
||||||
|
|
@ -261,7 +263,6 @@ public class RecalDataManager {
|
||||||
throw new StingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
|
throw new StingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Loop over the read and calculate first the infered bases from the color and then check if it is consistent with the read
|
// Loop over the read and calculate first the infered bases from the color and then check if it is consistent with the read
|
||||||
byte[] readBases = read.getReadBases();
|
byte[] readBases = read.getReadBases();
|
||||||
if( read.getReadNegativeStrandFlag() ) {
|
if( read.getReadNegativeStrandFlag() ) {
|
||||||
|
|
@ -310,10 +311,10 @@ public class RecalDataManager {
|
||||||
// Loop over the read and calculate first the infered bases from the color and then check if it is consistent with the read
|
// Loop over the read and calculate first the infered bases from the color and then check if it is consistent with the read
|
||||||
byte[] readBases = read.getReadBases();
|
byte[] readBases = read.getReadBases();
|
||||||
final byte[] colorImpliedBases = readBases.clone();
|
final byte[] colorImpliedBases = readBases.clone();
|
||||||
char[] refBasesDirRead = refBases;
|
char[] refBasesDirRead = AlignmentUtils.alignmentToCharArray( read.getCigar(), read.getReadString().toCharArray(), refBases ); //BUGBUG: This needs to change when read walkers are changed to give the aligned refBases
|
||||||
if( read.getReadNegativeStrandFlag() ) {
|
if( read.getReadNegativeStrandFlag() ) {
|
||||||
readBases = BaseUtils.simpleReverseComplement( read.getReadBases() );
|
readBases = BaseUtils.simpleReverseComplement( read.getReadBases() );
|
||||||
refBasesDirRead = BaseUtils.simpleReverseComplement( refBases );
|
refBasesDirRead = BaseUtils.simpleReverseComplement( refBasesDirRead.clone() );
|
||||||
}
|
}
|
||||||
final int[] inconsistency = new int[readBases.length];
|
final int[] inconsistency = new int[readBases.length];
|
||||||
byte prevBase = (byte) colorSpace[0]; // The sentinel
|
byte prevBase = (byte) colorSpace[0]; // The sentinel
|
||||||
|
|
@ -324,17 +325,15 @@ public class RecalDataManager {
|
||||||
prevBase = readBases[iii];
|
prevBase = readBases[iii];
|
||||||
}
|
}
|
||||||
|
|
||||||
final boolean isMappedToReference = (refBases != null && refBases.length == inconsistency.length);
|
|
||||||
|
|
||||||
// Now that we have the inconsistency array apply the desired correction to the inconsistent bases
|
// Now that we have the inconsistency array apply the desired correction to the inconsistent bases
|
||||||
if( SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") ) { // Set inconsistent bases and the one before it to Q0
|
if( SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") ) { // Set inconsistent bases and the one before it to Q0
|
||||||
final boolean setBaseN = false;
|
final boolean setBaseN = false;
|
||||||
originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, isMappedToReference, setBaseN);
|
originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, setBaseN);
|
||||||
} else if( SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO_BASE_N") ) {
|
} else if( SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO_BASE_N") ) {
|
||||||
final boolean setBaseN = true;
|
final boolean setBaseN = true;
|
||||||
originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, isMappedToReference, setBaseN);
|
originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, setBaseN);
|
||||||
} else if( SOLID_RECAL_MODE.equalsIgnoreCase("REMOVE_REF_BIAS") ) { // Use the color space quality to probabilistically remove ref bases at inconsistent color space bases
|
} else if( SOLID_RECAL_MODE.equalsIgnoreCase("REMOVE_REF_BIAS") ) { // Use the color space quality to probabilistically remove ref bases at inconsistent color space bases
|
||||||
solidRecalRemoveRefBias(read, readBases, inconsistency, colorImpliedBases, refBasesDirRead, isMappedToReference, coinFlip);
|
solidRecalRemoveRefBias(read, readBases, inconsistency, colorImpliedBases, refBasesDirRead, coinFlip);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if ( !warnUserNoColorSpace ) { // Warn the user if we can't find the color space tag
|
} else if ( !warnUserNoColorSpace ) { // Warn the user if we can't find the color space tag
|
||||||
|
|
@ -353,23 +352,22 @@ public class RecalDataManager {
|
||||||
* @param inconsistency The array of 1/0 that says if this base is inconsistent with its color
|
* @param inconsistency The array of 1/0 that says if this base is inconsistent with its color
|
||||||
* @param originalQualScores The array of original quality scores to set to zero if needed
|
* @param originalQualScores The array of original quality scores to set to zero if needed
|
||||||
* @param refBases The reference which has been RC'd if necessary
|
* @param refBases The reference which has been RC'd if necessary
|
||||||
* @param isMappedToRef Is this read mapped fully to a reference
|
|
||||||
* @param setBaseN Should we also set the base to N as well as quality zero in order to visualize in IGV or something similar
|
* @param setBaseN Should we also set the base to N as well as quality zero in order to visualize in IGV or something similar
|
||||||
* @return The byte array of original quality scores some of which might have been set to zero
|
* @return The byte array of original quality scores some of which might have been set to zero
|
||||||
*/
|
*/
|
||||||
private static byte[] solidRecalSetToQZero( final SAMRecord read, byte[] readBases, final int[] inconsistency, final byte[] originalQualScores,
|
private static byte[] solidRecalSetToQZero( final SAMRecord read, byte[] readBases, final int[] inconsistency, final byte[] originalQualScores,
|
||||||
final char[] refBases, final boolean isMappedToRef, final boolean setBaseN ) {
|
final char[] refBases, final boolean setBaseN ) {
|
||||||
|
|
||||||
final boolean negStrand = read.getReadNegativeStrandFlag();
|
final boolean negStrand = read.getReadNegativeStrandFlag();
|
||||||
for( int iii = 1; iii < originalQualScores.length - 1; iii++ ) {
|
for( int iii = 1; iii < originalQualScores.length - 1; iii++ ) {
|
||||||
if( inconsistency[iii] == 1 ) {
|
if( inconsistency[iii] == 1 ) {
|
||||||
if( !isMappedToRef || (char)readBases[iii] == refBases[iii] ) {
|
if( (char)readBases[iii] == refBases[iii] ) {
|
||||||
if( negStrand ) { originalQualScores[originalQualScores.length-(iii+1)] = (byte)0; }
|
if( negStrand ) { originalQualScores[originalQualScores.length-(iii+1)] = (byte)0; }
|
||||||
else { originalQualScores[iii] = (byte)0; }
|
else { originalQualScores[iii] = (byte)0; }
|
||||||
if( setBaseN ) { readBases[iii] = (byte)'N'; }
|
if( setBaseN ) { readBases[iii] = (byte)'N'; }
|
||||||
}
|
}
|
||||||
// Set the prev base to Q0 as well
|
// Set the prev base to Q0 as well
|
||||||
if( !isMappedToRef || (char)readBases[iii-1] == refBases[iii-1] ) {
|
if( (char)readBases[iii-1] == refBases[iii-1] ) {
|
||||||
if( negStrand ) { originalQualScores[originalQualScores.length-iii] = (byte)0; }
|
if( negStrand ) { originalQualScores[originalQualScores.length-iii] = (byte)0; }
|
||||||
else { originalQualScores[iii-1] = (byte)0; }
|
else { originalQualScores[iii-1] = (byte)0; }
|
||||||
if( setBaseN ) { readBases[iii-1] = (byte)'N'; }
|
if( setBaseN ) { readBases[iii-1] = (byte)'N'; }
|
||||||
|
|
@ -390,11 +388,10 @@ public class RecalDataManager {
|
||||||
* @param inconsistency The array of 1/0 that says if this base is inconsistent with its color
|
* @param inconsistency The array of 1/0 that says if this base is inconsistent with its color
|
||||||
* @param colorImpliedBases The bases implied by the color space, RC'd if necessary
|
* @param colorImpliedBases The bases implied by the color space, RC'd if necessary
|
||||||
* @param refBases The reference which has been RC'd if necessary
|
* @param refBases The reference which has been RC'd if necessary
|
||||||
* @param isMappedToRef Is this read mapped fully to a reference
|
|
||||||
* @param coinFlip A random number generator
|
* @param coinFlip A random number generator
|
||||||
*/
|
*/
|
||||||
private static void solidRecalRemoveRefBias( final SAMRecord read, byte[] readBases, final int[] inconsistency, final byte[] colorImpliedBases,
|
private static void solidRecalRemoveRefBias( final SAMRecord read, byte[] readBases, final int[] inconsistency, final byte[] colorImpliedBases,
|
||||||
final char[] refBases, final boolean isMappedToRef, final Random coinFlip) {
|
final char[] refBases, final Random coinFlip) {
|
||||||
|
|
||||||
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG);
|
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG);
|
||||||
if( attr != null ) {
|
if( attr != null ) {
|
||||||
|
|
@ -409,7 +406,7 @@ public class RecalDataManager {
|
||||||
if( inconsistency[iii] == 1 ) {
|
if( inconsistency[iii] == 1 ) {
|
||||||
for( int jjj = iii - 1; jjj <= iii; jjj++ ) { // Correct this base and the one before it along the direction of the read
|
for( int jjj = iii - 1; jjj <= iii; jjj++ ) { // Correct this base and the one before it along the direction of the read
|
||||||
if( jjj == iii || inconsistency[jjj] == 0 ) { // Don't want to correct the previous base a second time if it was already corrected in the previous step
|
if( jjj == iii || inconsistency[jjj] == 0 ) { // Don't want to correct the previous base a second time if it was already corrected in the previous step
|
||||||
if( !isMappedToRef || (char)readBases[jjj] == refBases[jjj] ) {
|
if( (char)readBases[jjj] == refBases[jjj] ) {
|
||||||
if( colorSpaceQuals[jjj] == colorSpaceQuals[jjj+1] ) { // Equal evidence for the color implied base and the reference base, so flip a coin
|
if( colorSpaceQuals[jjj] == colorSpaceQuals[jjj+1] ) { // Equal evidence for the color implied base and the reference base, so flip a coin
|
||||||
final int rand = coinFlip.nextInt( 2 );
|
final int rand = coinFlip.nextInt( 2 );
|
||||||
if( rand == 0 ) { // The color implied base won the coin flip
|
if( rand == 0 ) { // The color implied base won the coin flip
|
||||||
|
|
|
||||||
|
|
@ -62,9 +62,9 @@ public class RecalibrationArgumentCollection {
|
||||||
@Argument(fullName = "exception_if_no_tile", shortName="throwTileException", doc="If provided, TileCovariate will throw an exception when no tile can be found. The default behavior is to use tile = -1", required=false)
|
@Argument(fullName = "exception_if_no_tile", shortName="throwTileException", doc="If provided, TileCovariate will throw an exception when no tile can be found. The default behavior is to use tile = -1", required=false)
|
||||||
public boolean EXCEPTION_IF_NO_TILE = false;
|
public boolean EXCEPTION_IF_NO_TILE = false;
|
||||||
|
|
||||||
|
|
||||||
public final boolean checkSolidRecalMode() {
|
public final boolean checkSolidRecalMode() {
|
||||||
return ( SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") ||
|
return ( SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") ||
|
||||||
SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO_BASE_N") || SOLID_RECAL_MODE.equalsIgnoreCase("REMOVE_REF_BIAS") );
|
SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO_BASE_N") || SOLID_RECAL_MODE.equalsIgnoreCase("REMOVE_REF_BIAS") );
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -95,7 +95,6 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
private static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
|
private static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
|
||||||
private static final Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
|
private static final Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
|
||||||
private static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
|
private static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
|
||||||
private static final String versionString = "v2.2.9"; // Major version, minor version, and build number
|
|
||||||
private Random coinFlip; // Random number generator is used to remove reference bias in solid bams
|
private Random coinFlip; // Random number generator is used to remove reference bias in solid bams
|
||||||
private static final long RANDOM_SEED = 1032861495;
|
private static final long RANDOM_SEED = 1032861495;
|
||||||
|
|
||||||
|
|
@ -112,7 +111,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
*/
|
*/
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
|
|
||||||
logger.info( "TableRecalibrationWalker version: " + versionString );
|
logger.info( "Recalibrator version: " + RecalDataManager.versionString );
|
||||||
if( RAC.FORCE_READ_GROUP != null ) { RAC.DEFAULT_READ_GROUP = RAC.FORCE_READ_GROUP; }
|
if( RAC.FORCE_READ_GROUP != null ) { RAC.DEFAULT_READ_GROUP = RAC.FORCE_READ_GROUP; }
|
||||||
if( RAC.FORCE_PLATFORM != null ) { RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; }
|
if( RAC.FORCE_PLATFORM != null ) { RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; }
|
||||||
coinFlip = new Random(RANDOM_SEED);
|
coinFlip = new Random(RANDOM_SEED);
|
||||||
|
|
@ -225,7 +224,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
||||||
final SAMFileHeader header = getToolkit().getSAMFileHeader().clone();
|
final SAMFileHeader header = getToolkit().getSAMFileHeader().clone();
|
||||||
if( !NO_PG_TAG ) {
|
if( !NO_PG_TAG ) {
|
||||||
final SAMProgramRecord programRecord = new SAMProgramRecord( "GATK TableRecalibration" );
|
final SAMProgramRecord programRecord = new SAMProgramRecord( "GATK TableRecalibration" );
|
||||||
programRecord.setProgramVersion( versionString );
|
programRecord.setProgramVersion( RecalDataManager.versionString );
|
||||||
String commandLineString = "Covariates used = [";
|
String commandLineString = "Covariates used = [";
|
||||||
for( Covariate cov : requestedCovariates ) {
|
for( Covariate cov : requestedCovariates ) {
|
||||||
commandLineString += cov.getClass().getSimpleName() + ", ";
|
commandLineString += cov.getClass().getSimpleName() + ", ";
|
||||||
|
|
|
||||||
|
|
@ -353,6 +353,41 @@ public class AlignmentUtils {
|
||||||
return refLine.toString();
|
return refLine.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static char[] alignmentToCharArray( final Cigar cigar, final char[] read, final char[] ref ) {
|
||||||
|
|
||||||
|
final char[] alignment = new char[read.length];
|
||||||
|
int refPos = 0;
|
||||||
|
int alignPos = 0;
|
||||||
|
|
||||||
|
for ( int iii = 0 ; iii < cigar.numCigarElements() ; iii++ ) {
|
||||||
|
|
||||||
|
final CigarElement ce = cigar.getCigarElement(iii);
|
||||||
|
|
||||||
|
switch( ce.getOperator() ) {
|
||||||
|
case I:
|
||||||
|
case S:
|
||||||
|
for ( int jjj = 0 ; jjj < ce.getLength(); jjj++ ) {
|
||||||
|
alignment[alignPos++] = '+';
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case D:
|
||||||
|
case N:
|
||||||
|
refPos++;
|
||||||
|
break;
|
||||||
|
case M:
|
||||||
|
for ( int jjj = 0 ; jjj < ce.getLength(); jjj++ ) {
|
||||||
|
alignment[alignPos] = ref[refPos];
|
||||||
|
alignPos++;
|
||||||
|
refPos++;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new StingException( "Unsupported cigar operator: " + ce.getOperator() );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return alignment;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Due to (unfortunate) multiple ways to indicate that read is unmapped allowed by SAM format
|
* Due to (unfortunate) multiple ways to indicate that read is unmapped allowed by SAM format
|
||||||
* specification, one may need this convenience shortcut. Checks both 'read unmapped' flag and
|
* specification, one may need this convenience shortcut. Checks both 'read unmapped' flag and
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue