Updating the two solid_recal_mode options to also change the previous base since solid aligner prefers single color mismatch alignments over true SNP alignments. COUNT_AS_MISMATCH mode has been removed completely. The default mode is now SET_Q_ZERO.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2394 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2009-12-17 20:07:26 +00:00
parent 07f1859290
commit 6fbf77be95
6 changed files with 222 additions and 108 deletions

View File

@ -103,7 +103,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
private long solidInsertedReferenceBases = 0; // Number of bases where we believe SOLID has inserted the reference because the color space is inconsistent with the read base
private long otherColorSpaceInconsistency = 0; // Number of bases where the color space is inconsistent with the read but the reference wasn't inserted. BUGBUG: I don't understand what is going on in this case
private int numUnprocessed = 0; // Number of consecutive loci skipped because we are only processing every Nth site
private static final String versionString = "v2.1.1"; // Major version, minor version, and build number
private static final String versionString = "v2.1.2"; // Major version, minor version, and build number
private Pair<Long, Long> dbSNP_counts = new Pair<Long, Long>(0L, 0L); // mismatch/base counts for dbSNP loci
private Pair<Long, Long> novel_counts = new Pair<Long, Long>(0L, 0L); // mismatch/base counts for non-dbSNP loci
private static final double DBSNP_VS_NOVEL_MISMATCH_RATE = 2.0; // rate at which dbSNP sites (on an individual level) mismatch relative to novel sites (determined by looking at NA12878)
@ -127,7 +127,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
if( RAC.FORCE_PLATFORM != null ) { RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; }
DBSNP_VALIDATION_CHECK_FREQUENCY *= PROCESS_EVERY_NTH_LOCUS;
if( !RAC.checkSolidRecalMode() ) {
throw new StingException( "Unrecognized --solid_recal_mode argument. Implemented options: DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, COUNT_AS_MISMATCH, or REMOVE_REF_BIAS");
throw new StingException( "Unrecognized --solid_recal_mode argument. Implemented options: DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS");
}
// Get a list of all available covariates
@ -300,10 +300,6 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
} else {
otherColorSpaceInconsistency++;
}
if( RAC.SOLID_RECAL_MODE.equalsIgnoreCase("COUNT_AS_MISMATCH") ) {
refBase = (byte)'X'; // This will always mismatch since we are already filtering out reads that don't pass BaseUtils.isRegularBase
updateDataFromRead( read, offset, refBase );
}
}
}
}
@ -313,7 +309,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> {
} else { // We skipped over the dbSNP site, and we are only processing every Nth locus
skippedSites++;
if( isSNP ) {
updateMismatchCounts(dbSNP_counts, context, ref.getBase());// For sanity check to ensure novel mismatch rate vs dnsnp mismatch rate is reasonable
updateMismatchCounts(dbSNP_counts, context, ref.getBase()); // For sanity check to ensure novel mismatch rate vs dnsnp mismatch rate is reasonable
}
}

View File

@ -42,6 +42,7 @@ import net.sf.samtools.SAMReadGroupRecord;
* Date: Nov 6, 2009
*
* This helper class holds the data HashMap as well as submaps that represent the marginal distributions collapsed over all needed dimensions.
* It also has static methods that are used to perform the various solid recalibration modes that attempt to correct the reference bias.
*/
public class RecalDataManager {
@ -58,7 +59,7 @@ public class RecalDataManager {
public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // The tag that holds the original quality scores
public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams
public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which if this base is inconsistent with its color
public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
private static boolean warnUserNullReadGroup = false;
private static boolean warnUserNoColorSpace = false;
@ -88,21 +89,27 @@ public class RecalDataManager {
* Add the given mapping to all of the collapsed hash tables
* @param key The list of comparables that is the key for this mapping
* @param fullDatum The RecalDatum which is the data for this mapping
* @param PRESERVE_QSCORES_LESS_THAN The threshold in report quality for adding to the aggregate collapsed table
*/
public final void addToAllTables( final List<? extends Comparable> key, final RecalDatum fullDatum ) {
public final void addToAllTables( final List<? extends Comparable> key, final RecalDatum fullDatum, final int PRESERVE_QSCORES_LESS_THAN ) {
// The full dataset isn't actually ever used for anything because of the sequential calculation so no need to keep the full data HashMap around
//data.put(key, thisDatum); // add the mapping to the main table
int qualityScore = Integer.parseInt( key.get(1).toString() );
ArrayList<Comparable> newKey;
RecalDatum collapsedDatum;
// Create dataCollapsedReadGroup, the table where everything except read group has been collapsed
//BUGBUG: Bases with Reported Quality less than -pQ argument shouldn't be included in this collapsed table
ArrayList<Comparable> newKey = new ArrayList<Comparable>();
newKey.add( key.get(0) ); // Make a new key with just the read group
RecalDatum collapsedDatum = dataCollapsedReadGroup.get( newKey );
if( collapsedDatum == null ) {
dataCollapsedReadGroup.put( newKey, new RecalDatum(fullDatum) );
} else {
collapsedDatum.combine( fullDatum ); // using combine instead of increment in order to calculate overall aggregateQReported
if( qualityScore >= PRESERVE_QSCORES_LESS_THAN ) {
newKey = new ArrayList<Comparable>();
newKey.add( key.get(0) ); // Make a new key with just the read group
collapsedDatum = dataCollapsedReadGroup.get( newKey );
if( collapsedDatum == null ) {
dataCollapsedReadGroup.put( newKey, new RecalDatum(fullDatum) );
} else {
collapsedDatum.combine( fullDatum ); // using combine instead of increment in order to calculate overall aggregateQReported
}
}
newKey = new ArrayList<Comparable>();
@ -248,6 +255,10 @@ public class RecalDataManager {
}
}
/**
* Parse through the color space of the read and add a new tag to the SAMRecord that says which bases are inconsistent with the color space
* @param read The SAMRecord to parse
*/
public static void parseColorSpace( final SAMRecord read ) {
// If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
if( read.getReadGroup().getPlatform().equalsIgnoreCase("SOLID") ) {
@ -265,19 +276,9 @@ public class RecalDataManager {
for( iii = 0; iii < readBases.length; iii++ ) {
byte thisBase = (byte)getNextBaseFromColor( (char)prevBase, colorSpace[iii + 1] );
inconsistency[iii] = ( thisBase == readBases[iii] ? 0 : 1 );
prevBase = ( thisBase == readBases[iii] ? thisBase : readBases[iii] );
//System.out.print((char)thisBase);
prevBase = readBases[iii];
}
//System.out.println();
read.setAttribute( RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG, inconsistency );
//for( iii = 0; iii < readBases.length; iii++ ) {
// System.out.print((char)readBases[iii]);
//}
//System.out.println();
//for( iii = 0; iii < readBases.length; iii++ ) {
// System.out.print(inconsistency[iii]);
//}
//System.out.println("\n");
} else if ( !warnUserNoColorSpace ) { // Warn the user if we can't find the color space tag
Utils.warnUser("Unable to find color space information in SOLID read. First observed at read with name = " + read.getReadName());
@ -288,77 +289,48 @@ public class RecalDataManager {
}
}
public static byte[] calcColorSpace( final SAMRecord read, byte[] originalQualScores, String SOLID_RECAL_MODE, Random coinFlip ) {
/**
* Parse through the color space of the read and apply the desired --solid_recal_mode correction to the bases
* This method doesn't add the inconsistent tag to the read like parseColorSpace does
* @param read The SAMRecord to parse
* @param originalQualScores The array of original quality scores to modify during the correction
* @param SOLID_RECAL_MODE Which mode of solid recalibration to apply
* @param coinFlip A random number generator
* @param refBases The reference for this read
* @return A new array of quality scores that have been ref bias corrected
*/
public static byte[] calcColorSpace( SAMRecord read, byte[] originalQualScores, final String SOLID_RECAL_MODE, final Random coinFlip, final char[] refBases ) {
int[] inconsistency = null;
if( read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG) != null ) {
char[] colorSpace = ((String)read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG)).toCharArray();
// Loop over the read and calculate first the infered bases from the color and then check if it is consistent with the read
byte[] readBases = read.getReadBases();
byte[] colorImpliedBases = readBases.clone();
char[] refBasesDirRead = refBases;
if( read.getReadNegativeStrandFlag() ) {
readBases = BaseUtils.simpleReverseComplement( read.getReadBases() );
refBasesDirRead = BaseUtils.simpleReverseComplement( refBases );
}
inconsistency = new int[readBases.length];
int iii;
int[] inconsistency = new int[readBases.length];
byte prevBase = (byte) colorSpace[0]; // The sentinel
for( iii = 0; iii < readBases.length; iii++ ) {
for( int iii = 0; iii < readBases.length; iii++ ) {
byte thisBase = (byte)getNextBaseFromColor( (char)prevBase, colorSpace[iii + 1] );
colorImpliedBases[iii] = thisBase;
inconsistency[iii] = ( thisBase == readBases[iii] ? 0 : 1 );
prevBase = ( thisBase == readBases[iii] ? thisBase : readBases[iii] );
//System.out.print((char)thisBase);
prevBase = readBases[iii];
}
boolean isMappedToReference = (refBases != null && refBases.length == inconsistency.length);
//System.out.println();
//for( iii = 0; iii < readBases.length; iii++ ) {
// System.out.print((char)readBases[iii]);
//}
//System.out.println();
//for( iii = 0; iii < readBases.length; iii++ ) {
// System.out.print(inconsistency[iii]);
//}
//System.out.println("\n");
if( SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") ) {
for( iii = 0; iii < originalQualScores.length; iii++ ) {
if( inconsistency[iii] == 1 ) {
originalQualScores[iii] = (byte)0;
}
}
// Now that we have the inconsistency array apply the desired correction to the inconsistent bases
if( SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") ) { // Set inconsistent bases and the one before it to Q0
boolean setBaseN = false;
originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, isMappedToReference, setBaseN);
} else if( SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO_BASE_N") ) {
byte[] tmpBases = read.getReadBases();
for( iii = 0; iii < originalQualScores.length; iii++ ) {
if( inconsistency[iii] == 1 ) {
originalQualScores[iii] = (byte)0;
tmpBases[iii] = (byte)'N';
}
}
read.setReadBases( tmpBases );
} else if( SOLID_RECAL_MODE.equalsIgnoreCase("REMOVE_REF_BIAS") ) {
if( read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG) != null ) { // BUGBUG: Warn user if using this mode but there are no color space quality scores in the bam
byte[] colorSpaceQuals = QualityUtils.fastqToPhred((String)read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG));
byte[] tmpBases = read.getReadBases();
if( read.getReadNegativeStrandFlag() ) {
colorImpliedBases = BaseUtils.simpleComplement(colorImpliedBases.clone());
}
for( iii = 0; iii < inconsistency.length - 1; iii++ ) {
if( inconsistency[iii] == 1 ) {
if( colorSpaceQuals[iii] > colorSpaceQuals[iii+1] ) { // reference was inserted here, but there is more evidence for the color implied base
tmpBases[iii] = colorImpliedBases[iii];
} else if( colorSpaceQuals[iii] == colorSpaceQuals[iii+1] ) { // equal evidence for the color implied base and the reference base, so flip a coin
int rand = coinFlip.nextInt( 2 );
if( rand == 0 ) { // the color implied base won the coin flip
tmpBases[iii] = colorImpliedBases[iii];
}
}
}
}
read.setReadBases( tmpBases );
}
boolean setBaseN = true;
originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, isMappedToReference, setBaseN);
} else if( SOLID_RECAL_MODE.equalsIgnoreCase("REMOVE_REF_BIAS") ) { // Use the color space quality to probabilistically remove ref bases at inconsistent color space bases
solidRecalRemoveRefBias(read, readBases, inconsistency, colorImpliedBases, refBases, isMappedToReference, coinFlip);
}
} else if ( !warnUserNoColorSpace ) { // Warn the user if we can't find the color space tag
@ -369,6 +341,107 @@ public class RecalDataManager {
return originalQualScores;
}
/**
* Perform the SET_Q_ZERO solid recalibration. Inconsistent color space bases and their previous base are set to quality zero
* @param read The SAMRecord to recalibrate
* @param readBases The bases in the read RC'd if necessary
* @param inconsistency The array of 1/0 that says if this base is inconsistent with its color
* @param originalQualScores The array of original quality scores to set to zero if needed
* @param refBases The reference which has been RC'd if necessary
* @param isMappedToRef Is this read mapped fully to a reference
* @param setBaseN Should we also set the base to N as well as quality zero in order to visualize in IGV or something similar
* @return The byte array of original quality scores some of which might have been set to zero
*/
private static byte[] solidRecalSetToQZero( SAMRecord read, byte[] readBases, int[] inconsistency, byte[] originalQualScores,
final char[] refBases, final boolean isMappedToRef, final boolean setBaseN ) {
for( int iii = 1; iii < originalQualScores.length - 1; iii++ ) { // BUGBUG: This relies on the fact that in SOLID bams the first and last base are set to Q0 and aren't recalibrated anyway
if( inconsistency[iii] == 1 ) {
if( !isMappedToRef || (char)readBases[iii] == refBases[iii] ) {
originalQualScores[iii] = (byte)0;
if( setBaseN ) { readBases[iii] = (byte)'N'; }
}
// Set the prev base to Q0 as well
if( !isMappedToRef || (char)readBases[iii-1] == refBases[iii-1] ) {
originalQualScores[iii-1] = (byte)0;
if( setBaseN ) { readBases[iii-1] = (byte)'N'; }
}
if( !isMappedToRef || (char)readBases[iii+1] == refBases[iii+1] ) {
originalQualScores[iii+1] = (byte)0;
if( setBaseN ) { readBases[iii+1] = (byte)'N'; }
}
}
}
if( read.getReadNegativeStrandFlag() ) {
readBases = BaseUtils.simpleReverseComplement( readBases.clone() ); // Put the bases back in reverse order to stuff them back in the read
}
read.setReadBases( readBases );
return originalQualScores;
}
/**
* Peform the REMOVE_REF_BIAS solid recalibration. Look at the color space qualities and probabilistically decide if the base should be change to match the color or left as reference
* @param read The SAMRecord to recalibrate
* @param readBases The bases in the read RC'd if necessary
* @param inconsistency The array of 1/0 that says if this base is inconsistent with its color
* @param colorImpliedBases The bases implied by the color space, RC'd if necessary
* @param refBases The reference which has been RC'd if necessary
* @param isMappedToRef Is this read mapped fully to a reference
* @param coinFlip A random number generator
*/
private static void solidRecalRemoveRefBias( SAMRecord read, byte[] readBases, int[] inconsistency, byte[] colorImpliedBases,
final char[] refBases, final boolean isMappedToRef, final Random coinFlip ) {
if( read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG) != null ) {
byte[] colorSpaceQuals = QualityUtils.fastqToPhred((String)read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG));
for( int iii = 1; iii < inconsistency.length - 2; iii++ ) {
if( inconsistency[iii] == 1 ) {
for( int jjj = iii - 1; jjj <= iii + 1; jjj++ ) { // Correct this base and the one before it along the direction of the read
if( !isMappedToRef || (char)readBases[jjj] == refBases[jjj] ) {
if( colorSpaceQuals[jjj] == colorSpaceQuals[jjj+1] ) { // Equal evidence for the color implied base and the reference base, so flip a coin
int rand = coinFlip.nextInt( 2 );
if( rand == 0 ) { // The color implied base won the coin flip
readBases[jjj] = colorImpliedBases[jjj];
}
} else {
int maxQuality = Math.max((int)colorSpaceQuals[jjj], (int)colorSpaceQuals[jjj+1]);
int minQuality = Math.min((int)colorSpaceQuals[jjj], (int)colorSpaceQuals[jjj+1]);
int diffInQuality = maxQuality - minQuality;
int numLow = minQuality;
if(numLow == 0) { numLow = 1; }
int numHigh = Math.round(numLow * (float)Math.pow(10.0f, (float) diffInQuality / 10.0f) ); // The color with higher quality is exponentially more likely
int rand = coinFlip.nextInt( numLow + numHigh );
if( rand >= numLow ) {
if( maxQuality == (int)colorSpaceQuals[jjj] ) {
readBases[jjj] = colorImpliedBases[jjj];
}
} else {
if( minQuality == (int)colorSpaceQuals[jjj] ) {
readBases[jjj] = colorImpliedBases[jjj];
}
}
}
}
}
}
}
if( read.getReadNegativeStrandFlag() ) {
readBases = BaseUtils.simpleReverseComplement( readBases.clone() ); // Put the bases back in reverse order to stuff them back in the read
}
read.setReadBases( readBases );
} else { // No color space quality tag in file
throw new StingException("REMOVE_REF_BIAS recal mode requires color space qualities but they can't be found for read: " + read.getReadName());
}
}
/**
* Given the base and the color calculate the next base in the sequence
* @param prevBase The base
* @param color The color
* @return The next base in the sequence
*/
private static char getNextBaseFromColor( final char prevBase, final char color ) {
switch(color) {
case '0': // same base

View File

@ -53,8 +53,8 @@ public class RecalibrationArgumentCollection {
public String FORCE_READ_GROUP = null;
@Argument(fullName="force_platform", shortName="fP", required=false, doc="If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
public String FORCE_PLATFORM = null;
@Argument(fullName="solid_recal_mode", shortName="sMode", required = false, doc="How should we recalibrate solid bases in which the reference was inserted? EXPERIMENTAL OPTION. USE AT OWN RISK. Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, COUNT_AS_MISMATCH, or REMOVE_REF_BIAS")
public String SOLID_RECAL_MODE = "DO_NOTHING";
@Argument(fullName="solid_recal_mode", shortName="sMode", required = false, doc="How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS")
public String SOLID_RECAL_MODE = "SET_Q_ZERO";
@Argument(fullName = "window_size_nqs", shortName="nqs", doc="The window size used by MinimumNQSCovariate for its calculation", required=false)
public int WINDOW_SIZE = 5;
@Argument(fullName = "homopolymer_nback", shortName="nback", doc="The number of previous bases to look at in HomopolymerCovariate", required=false)
@ -62,6 +62,6 @@ public class RecalibrationArgumentCollection {
public boolean checkSolidRecalMode() {
return ( SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") ||
SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO_BASE_N") || SOLID_RECAL_MODE.equalsIgnoreCase("COUNT_AS_MISMATCH") || SOLID_RECAL_MODE.equalsIgnoreCase("REMOVE_REF_BIAS") );
SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO_BASE_N") || SOLID_RECAL_MODE.equalsIgnoreCase("REMOVE_REF_BIAS") );
}
}

View File

@ -58,8 +58,7 @@ import java.lang.reflect.Field;
*/
@WalkerName("TableRecalibration")
@Requires({ DataSource.READS, DataSource.REFERENCE }) // This walker requires -I input.bam, it also requires -R reference.fasta, but by not saying @requires REFERENCE_BASES I'm telling the
// GATK to not actually spend time giving me the refBase array since I don't use it
@Requires({ DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES }) // This walker requires -I input.bam, it also requires -R reference.fasta
public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
/////////////////////////////
@ -94,9 +93,10 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
private static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
private static final Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
private static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
private static final String versionString = "v2.1.1"; // Major version, minor version, and build number
private static final String versionString = "v2.1.2"; // Major version, minor version, and build number
private SAMFileWriter OUTPUT_BAM = null;// The File Writer that will write out the recalibrated bam
private Random coinFlip; // Random number generator is used to remove reference bias in solid bams
private static final long RANDOM_SEED = 1032861495;
//---------------------------------------------------------------------------------------------------------------
//
@ -114,9 +114,9 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
logger.info( "TableRecalibrationWalker version: " + versionString );
if( RAC.FORCE_READ_GROUP != null ) { RAC.DEFAULT_READ_GROUP = RAC.FORCE_READ_GROUP; }
if( RAC.FORCE_PLATFORM != null ) { RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; }
coinFlip = new Random();
coinFlip = new Random(RANDOM_SEED);
if( !RAC.checkSolidRecalMode() ) {
throw new StingException( "Unrecognized --solid_recal_mode argument. Implemented options: DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, COUNT_AS_MISMATCH, or REMOVE_REF_BIAS");
throw new StingException( "Unrecognized --solid_recal_mode argument. Implemented options: DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS");
}
// Get a list of all available covariates
@ -282,7 +282,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
// Create a new datum using the number of observations, number of mismatches, and reported quality score
RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ) );
// Add that datum to all the collapsed tables which will be used in the sequential calculation
dataManager.addToAllTables( key, datum );
dataManager.addToAllTables( key, datum, PRESERVE_QSCORES_LESS_THAN );
}
@ -300,17 +300,14 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
*/
public SAMRecord map( char[] refBases, SAMRecord read ) {
// WARNING: refBases is always null because this walker doesn't have @Requires({DataSource.REFERENCE_BASES})
// This is done in order to speed up the code
RecalDataManager.parseSAMRecord( read, RAC );
byte[] originalQuals = read.getBaseQualities();
byte[] recalQuals = originalQuals.clone();
String platform = read.getReadGroup().getPlatform();
if( platform.equalsIgnoreCase("SOLID") && !RAC.SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") ) {
originalQuals = RecalDataManager.calcColorSpace( read, originalQuals, RAC.SOLID_RECAL_MODE, coinFlip );
originalQuals = RecalDataManager.calcColorSpace( read, originalQuals, RAC.SOLID_RECAL_MODE, coinFlip, refBases );
}
int startPos = 1;

View File

@ -321,6 +321,38 @@ public class BaseUtils {
return rcbases;
}
/**
* Reverse complement a char array of bases
*
* @param bases the char array of bases
* @return the reverse complement of the char byte array
*/
static public char[] simpleReverseComplement(char[] bases) {
char[] rcbases = new char[bases.length];
for (int i = 0; i < bases.length; i++) {
rcbases[i] = simpleComplement(bases[bases.length - 1 - i]);
}
return rcbases;
}
/**
* Complement a char array of bases
*
* @param bases the char array of bases
* @return the complement of the base char array
*/
static public char[] simpleComplement(char[] bases) {
char[] rcbases = new char[bases.length];
for (int i = 0; i < bases.length; i++) {
rcbases[i] = simpleComplement(bases[i]);
}
return rcbases;
}
/**
* Reverse complement a String of bases. Preserves ambiguous bases.
*
@ -358,6 +390,22 @@ public class BaseUtils {
return rcbases;
}
/**
* Reverse an int array of bases
*
* @param bases the int array of bases
* @return the reverse of the base int array
*/
static public int[] reverse(int[] bases) {
int[] rcbases = new int[bases.length];
for (int i = 0; i < bases.length; i++) {
rcbases[i] = bases[bases.length - i - 1];
}
return rcbases;
}
/**
* Reverse (NOT reverse-complement!!) a string
*

View File

@ -17,9 +17,9 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
public void testCountCovariates1() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12892.SLX.SRP000031.2009_06.selected.bam", "16f87c9644b27c3c3fe7a963eed45d2d" );
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "add8196af03d5a674aa3de702995cbbf");
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "5a4873064e602e1f945f3881c6b4a3e5");
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "b039f405580ad844cd1dfd4d8ba25913" );
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "c730bff43ffb89dcb0774926286cac6f" );
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "dbbb9c49f2e0b1f117b88eb386fb6d0f" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -37,7 +37,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
" -cov CycleCovariate" +
" -cov DinucCovariate" +
" --sorted_output" +
" --solid_recal_mode DO_NOTHING" +
" --solid_recal_mode SET_Q_ZERO" +
" -recalFile %s",
1, // just one output file
Arrays.asList(md5));
@ -50,9 +50,9 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
public void testTableRecalibrator1() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12892.SLX.SRP000031.2009_06.selected.bam", "d7f3e0db5ed9fefc917144a0f937d50d" );
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "fb82046d5acfb07625d5459526d5f6c8");
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "1f1770363f381d06e8db9fead03ad7bb");
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "3fe6ee95cfa6155f8cc8e3e0a6330ad2" );
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "e4eae955f7d408b2af34c9224616f564" );
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "732267e9cd02a72b8c85238f47487509" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -68,7 +68,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
? " -L 1:10,800,000-10,810,000" : " -L 1:10,100,000-10,300,000" ) +
" -outputBam %s" +
" --no_pg_tag" +
" --solid_recal_mode DO_NOTHING" +
" --solid_recal_mode SET_Q_ZERO" +
" -recalFile " + paramsFile,
1, // just one output file
Arrays.asList(md5));
@ -80,7 +80,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testCountCovariatesVCF() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "d0bdd1934f003406e93f935b63ab6edd");
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "bc0e62dc366f2f6ca70847a832b4aad4");
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -97,7 +97,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
" -cov CycleCovariate" +
" -cov DinucCovariate" +
" --sorted_output" +
" --solid_recal_mode DO_NOTHING" +
" --solid_recal_mode SET_Q_ZERO" +
" -recalFile %s",
1, // just one output file
Arrays.asList(md5));
@ -108,7 +108,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testCountCovariatesNoReadGroups() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "fadfa35415a694781ed2b4d1b1d9535b" );
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "9d83a1653e076e028fecf4c2575f56e9" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -126,7 +126,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
" -cov DinucCovariate" +
" --default_platform illumina" +
" --sorted_output" +
" --solid_recal_mode DO_NOTHING" +
" --solid_recal_mode SET_Q_ZERO" +
" -recalFile %s",
1, // just one output file
Arrays.asList(md5));
@ -138,7 +138,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
@Test
public void testTableRecalibratorNoReadGroups() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "4879036295b567514f637d21030bf9c8" );
e.put( "/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12762.SOLID.SRP000031.2009_07.chr1.10_20mb.bam", "8853add9cfd0a8171ca917b1c6452e0a" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
String bam = entry.getKey();
@ -153,7 +153,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
" -L 1:10,100,000-10,300,000" +
" -outputBam %s" +
" --no_pg_tag" +
" --solid_recal_mode DO_NOTHING" +
" --solid_recal_mode SET_Q_ZERO" +
" --default_platform illumina" +
" -recalFile " + paramsFile,
1, // just one output file