Solid processing in base quality recalibrator now has several options for how to handle no calls in the color space. --ignore_nocall_colorspace is removed and replace by --solid_nocall_strategy. Fixed some of the @Deprecated tags in BaseUtils. LocusWalkers now filter out FailsVendorQualityCheck reads. HLA caller integration test bam file had bad vendor reads so its integration test changed.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3831 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2010-07-19 19:10:29 +00:00
parent 18b0114e25
commit 8e31c01680
11 changed files with 271 additions and 130 deletions

View File

@ -0,0 +1,43 @@
/*
* Copyright (c) 2010, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.filters;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
/**
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: Jul 19, 2010
*
* Filter out FailsVendorQualityCheck reads.
*/
public class FailsVendorQualityCheckReadFilter implements SamRecordFilter {
public boolean filterOut( final SAMRecord read ) {
return read.getReadFailsVendorQualityCheckFlag();
}
}

View File

@ -1,13 +1,10 @@
package org.broadinstitute.sting.gatk.walkers;
import org.broadinstitute.sting.gatk.filters.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentReadFilter;
import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
import org.broadinstitute.sting.gatk.filters.InAdaptorFilter;
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
import org.broadinstitute.sting.gatk.iterators.LocusIteratorFilter;
import net.sf.picard.filter.SamRecordFilter;
@ -49,8 +46,10 @@ public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, R
SamRecordFilter filter1 = new UnmappedReadFilter();
SamRecordFilter filter2 = new NotPrimaryAlignmentReadFilter();
SamRecordFilter filter3 = new DuplicateReadFilter();
SamRecordFilter filter4 = new FailsVendorQualityCheckReadFilter();
//SamRecordFilter filter4 = new FailsVendorQualityReadFilter();
List<SamRecordFilter> x = super.getMandatoryReadFilters();
x.addAll(Arrays.asList(filter3, filter2, filter1));
x.addAll(Arrays.asList(filter4, filter3, filter2, filter1));
// }
return x;
}

View File

@ -137,9 +137,6 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> im
if( RAC.FORCE_READ_GROUP != null ) { RAC.DEFAULT_READ_GROUP = RAC.FORCE_READ_GROUP; }
if( RAC.FORCE_PLATFORM != null ) { RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; }
DBSNP_VALIDATION_CHECK_FREQUENCY *= PROCESS_EVERY_NTH_LOCUS;
if( !RAC.checkSolidRecalMode() ) {
throw new StingException( "Unrecognized --solid_recal_mode argument. Implemented options: DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS");
}
// Get a list of all available covariates
final List<Class<? extends Covariate>> covariateClasses = PackageUtils.getClassesImplementingInterface( Covariate.class );
@ -287,7 +284,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> im
RecalDataManager.parseSAMRecord( gatkRead, RAC );
// Skip over reads with no calls in the color space if the user requested it
if( RAC.IGNORE_NOCALL_COLORSPACE && RecalDataManager.checkNoCallColorSpace( gatkRead ) ) {
if( !(RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) && RecalDataManager.checkNoCallColorSpace( gatkRead ) ) {
gatkRead.setTemporaryAttribute( SKIP_RECORD_ATTRIBUTE, true);
continue;
}
@ -308,7 +305,8 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> im
if( BaseUtils.isRegularBase( bases[offset] ) ) {
// SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it
if( !gatkRead.getReadGroup().getPlatform().toUpperCase().contains("SOLID") || RAC.SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || !RecalDataManager.isInconsistentColorSpace( gatkRead, offset ) ) {
if( !gatkRead.getReadGroup().getPlatform().toUpperCase().contains("SOLID") || RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING ||
!RecalDataManager.isInconsistentColorSpace( gatkRead, offset ) ) {
// This base finally passed all the checks for a good base, so add it to the big data hashmap
updateDataFromRead( gatkRead, offset, refBase );
@ -347,16 +345,16 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> im
*
* @param counts The counts to be updated
* @param context The AlignmentContext which holds the reads covered by this locus
* @param ref The reference base
* @param refBase The reference base
*/
private static void updateMismatchCounts(final Pair<Long, Long> counts, final AlignmentContext context, final byte ref) {
private static void updateMismatchCounts(final Pair<Long, Long> counts, final AlignmentContext context, final byte refBase) {
for( PileupElement p : context.getBasePileup() ) {
final byte readChar = p.getBase();
final int readCharBaseIndex = BaseUtils.simpleBaseToBaseIndex(readChar);
final int refCharBaseIndex = BaseUtils.simpleBaseToBaseIndex(ref);
final byte readBase = p.getBase();
final int readBaseIndex = BaseUtils.simpleBaseToBaseIndex(readBase);
final int refBaseIndex = BaseUtils.simpleBaseToBaseIndex(refBase);
if( readCharBaseIndex != -1 && refCharBaseIndex != -1 ) {
if( readCharBaseIndex != refCharBaseIndex ) {
if( readBaseIndex != -1 && refBaseIndex != -1 ) {
if( readBaseIndex != refBaseIndex ) {
counts.first++;
}
counts.second++;
@ -410,7 +408,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> im
final long curMismatches = datum.getNumMismatches();
// Add one to the number of observations and potentially one to the number of mismatches
datum.increment( (char)base, (char)refBase ); // Dangerous: If you don't cast to char then the bytes default to the (long, long) version of the increment method which is really bad
datum.incrementBaseCounts( base, refBase );
countedBases++;
novel_counts.second++;
novel_counts.first += datum.getNumMismatches() - curMismatches; // For sanity check to ensure novel mismatch rate vs dnsnp mismatch rate is reasonable

View File

@ -136,7 +136,8 @@ public class DinucCovariate implements StandardCovariate {
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
public final Comparable getValue( final String str ) {
final Dinuc returnDinuc = dinucHashMap.get( Dinuc.hashBytes( (byte)str.charAt(0), (byte)str.charAt(1) ) );
byte[] bytes = str.getBytes();
final Dinuc returnDinuc = dinucHashMap.get( Dinuc.hashBytes( bytes[0], bytes[1] ) );
if( returnDinuc.compareTo(NO_DINUC) == 0 ) {
return null;
}
@ -149,7 +150,7 @@ public class DinucCovariate implements StandardCovariate {
*
* @param array
*/
private static final void reverse(final Comparable[] array) {
private static void reverse(final Comparable[] array) {
final int arrayLength = array.length;
for(int l = 0, r = arrayLength - 1; l < r; l++, r--) {
final Comparable temp = array[l];

View File

@ -60,6 +60,19 @@ public class RecalDataManager {
private static boolean warnUserNullReadGroup = false;
private static boolean warnUserNullPlatform = false;
public enum SOLID_RECAL_MODE {
DO_NOTHING,
SET_Q_ZERO,
SET_Q_ZERO_BASE_N,
REMOVE_REF_BIAS
}
public enum SOLID_NOCALL_STRATEGY {
THROW_EXCEPTION,
LEAVE_READ_UNRECALIBRATED,
PURGE_READ
}
RecalDataManager() {
data = new NestedHashMap();
dataCollapsedReadGroup = null;
@ -263,9 +276,9 @@ public class RecalDataManager {
if( read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null ) { // Haven't calculated the inconsistency array yet for this read
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
if( attr != null ) {
char[] colorSpace;
byte[] colorSpace;
if( attr instanceof String ) {
colorSpace = ((String)attr).toCharArray();
colorSpace = ((String)attr).getBytes();
} else {
throw new StingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
}
@ -277,9 +290,9 @@ public class RecalDataManager {
}
final byte[] inconsistency = new byte[readBases.length];
int iii;
byte prevBase = (byte) colorSpace[0]; // The sentinel
byte prevBase = colorSpace[0]; // The sentinel
for( iii = 0; iii < readBases.length; iii++ ) {
final byte thisBase = (byte)getNextBaseFromColor( (char)prevBase, colorSpace[iii + 1] );
final byte thisBase = getNextBaseFromColor( prevBase, colorSpace[iii + 1] );
inconsistency[iii] = (byte)( thisBase == readBases[iii] ? 0 : 1 );
prevBase = readBases[iii];
}
@ -298,18 +311,18 @@ public class RecalDataManager {
* This method doesn't add the inconsistent tag to the read like parseColorSpace does
* @param read The SAMRecord to parse
* @param originalQualScores The array of original quality scores to modify during the correction
* @param SOLID_RECAL_MODE Which mode of solid recalibration to apply
* @param solidRecalMode Which mode of solid recalibration to apply
* @param coinFlip A random number generator
* @param refBases The reference for this read
* @return A new array of quality scores that have been ref bias corrected
*/
public static byte[] calcColorSpace( final SAMRecord read, byte[] originalQualScores, final String SOLID_RECAL_MODE, final Random coinFlip, final char[] refBases ) {
public static byte[] calcColorSpace( final SAMRecord read, byte[] originalQualScores, final SOLID_RECAL_MODE solidRecalMode, final Random coinFlip, final byte[] refBases ) {
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
if( attr != null ) {
char[] colorSpace;
byte[] colorSpace;
if( attr instanceof String ) {
colorSpace = ((String)attr).toCharArray();
colorSpace = ((String)attr).getBytes();
} else {
throw new StingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
}
@ -317,28 +330,28 @@ public class RecalDataManager {
// Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
byte[] readBases = read.getReadBases();
final byte[] colorImpliedBases = readBases.clone();
char[] refBasesDirRead = AlignmentUtils.alignmentToCharArray( read.getCigar(), read.getReadString().toCharArray(), refBases ); //BUGBUG: This needs to change when read walkers are changed to give the aligned refBases
byte[] refBasesDirRead = AlignmentUtils.alignmentToByteArray( read.getCigar(), read.getReadBases(), refBases ); //BUGBUG: This needs to change when read walkers are changed to give the aligned refBases
if( read.getReadNegativeStrandFlag() ) {
readBases = BaseUtils.simpleReverseComplement( read.getReadBases() );
refBasesDirRead = BaseUtils.simpleReverseComplement( refBasesDirRead.clone() );
}
final int[] inconsistency = new int[readBases.length];
byte prevBase = (byte) colorSpace[0]; // The sentinel
byte prevBase = colorSpace[0]; // The sentinel
for( int iii = 0; iii < readBases.length; iii++ ) {
final byte thisBase = (byte)getNextBaseFromColor( (char)prevBase, colorSpace[iii + 1] );
final byte thisBase = getNextBaseFromColor( prevBase, colorSpace[iii + 1] );
colorImpliedBases[iii] = thisBase;
inconsistency[iii] = ( thisBase == readBases[iii] ? 0 : 1 );
prevBase = readBases[iii];
}
// Now that we have the inconsistency array apply the desired correction to the inconsistent bases
if( SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") ) { // Set inconsistent bases and the one before it to Q0
if( solidRecalMode == SOLID_RECAL_MODE.SET_Q_ZERO ) { // Set inconsistent bases and the one before it to Q0
final boolean setBaseN = false;
originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, setBaseN);
} else if( SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO_BASE_N") ) {
} else if( solidRecalMode == SOLID_RECAL_MODE.SET_Q_ZERO_BASE_N ) {
final boolean setBaseN = true;
originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, setBaseN);
} else if( SOLID_RECAL_MODE.equalsIgnoreCase("REMOVE_REF_BIAS") ) { // Use the color space quality to probabilistically remove ref bases at inconsistent color space bases
} else if( solidRecalMode == SOLID_RECAL_MODE.REMOVE_REF_BIAS ) { // Use the color space quality to probabilistically remove ref bases at inconsistent color space bases
solidRecalRemoveRefBias(read, readBases, inconsistency, colorImpliedBases, refBasesDirRead, coinFlip);
}
@ -354,15 +367,15 @@ public class RecalDataManager {
if( read.getReadGroup().getPlatform().toUpperCase().contains("SOLID") ) {
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
if( attr != null ) {
char[] colorSpace;
byte[] colorSpace;
if( attr instanceof String ) {
colorSpace = ((String)attr).substring(1).toCharArray(); // trim off the Sentinel
colorSpace = ((String)attr).substring(1).getBytes(); // trim off the Sentinel
} else {
throw new StingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
}
for( char color : colorSpace ) {
if( color != '0' && color != '1' && color != '2' && color != '3' ) {
for( byte color : colorSpace ) {
if( color != (byte)'0' && color != (byte)'1' && color != (byte)'2' && color != (byte)'3' ) {
return true; // There is a bad color in this SOLiD read and the user wants to skip over it
}
}
@ -387,18 +400,18 @@ public class RecalDataManager {
* @return The byte array of original quality scores some of which might have been set to zero
*/
private static byte[] solidRecalSetToQZero( final SAMRecord read, byte[] readBases, final int[] inconsistency, final byte[] originalQualScores,
final char[] refBases, final boolean setBaseN ) {
final byte[] refBases, final boolean setBaseN ) {
final boolean negStrand = read.getReadNegativeStrandFlag();
for( int iii = 1; iii < originalQualScores.length; iii++ ) {
if( inconsistency[iii] == 1 ) {
if( (char)readBases[iii] == refBases[iii] ) {
if( readBases[iii] == refBases[iii] ) {
if( negStrand ) { originalQualScores[originalQualScores.length-(iii+1)] = (byte)0; }
else { originalQualScores[iii] = (byte)0; }
if( setBaseN ) { readBases[iii] = (byte)'N'; }
}
// Set the prev base to Q0 as well
if( (char)readBases[iii-1] == refBases[iii-1] ) {
if( readBases[iii-1] == refBases[iii-1] ) {
if( negStrand ) { originalQualScores[originalQualScores.length-iii] = (byte)0; }
else { originalQualScores[iii-1] = (byte)0; }
if( setBaseN ) { readBases[iii-1] = (byte)'N'; }
@ -423,7 +436,7 @@ public class RecalDataManager {
* @param coinFlip A random number generator
*/
private static void solidRecalRemoveRefBias( final SAMRecord read, byte[] readBases, final int[] inconsistency, final byte[] colorImpliedBases,
final char[] refBases, final Random coinFlip) {
final byte[] refBases, final Random coinFlip) {
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG);
if( attr != null ) {
@ -440,7 +453,7 @@ public class RecalDataManager {
if( inconsistency[iii] == 1 ) {
for( int jjj = iii - 1; jjj <= iii; jjj++ ) { // Correct this base and the one before it along the direction of the read
if( jjj == iii || inconsistency[jjj] == 0 ) { // Don't want to correct the previous base a second time if it was already corrected in the previous step
if( (char)readBases[jjj] == refBases[jjj] ) {
if( readBases[jjj] == refBases[jjj] ) {
if( colorSpaceQuals[jjj] == colorSpaceQuals[jjj+1] ) { // Equal evidence for the color implied base and the reference base, so flip a coin
final int rand = coinFlip.nextInt( 2 );
if( rand == 0 ) { // The color implied base won the coin flip
@ -488,18 +501,18 @@ public class RecalDataManager {
* @param color The color
* @return The next base in the sequence
*/
private static char getNextBaseFromColor( final char prevBase, final char color ) {
private static byte getNextBaseFromColor( final byte prevBase, final byte color ) {
switch(color) {
case '0': // same base
case '0':
return prevBase;
case '1': // transversion
return BaseUtils.transversion( prevBase );
case '2': // transition
return BaseUtils.transition( prevBase );
case '3': // simple complement
return BaseUtils.simpleComplement( prevBase );
case '1':
return performColorOne( prevBase );
case '2':
return performColorTwo( prevBase );
case '3':
return performColorThree( prevBase );
default:
throw new StingException( "Unrecognized color space in SOLID read, color = " + color +
throw new StingException( "Unrecognized color space in SOLID read, color = " + (char)color +
" Unfortunately this bam file can not be recalibrated without full color space information because of potential reference bias.");
}
}
@ -516,9 +529,9 @@ public class RecalDataManager {
final byte[] inconsistency = (byte[])attr;
// NOTE: The inconsistency array is in the direction of the read, not aligned to the reference!
if( read.getReadNegativeStrandFlag() ) { // Negative direction
return inconsistency[inconsistency.length - offset - 1] != 0;
return inconsistency[inconsistency.length - offset - 1] != (byte)0;
} else { // Forward direction
return inconsistency[offset] != 0;
return inconsistency[offset] != (byte)0;
}
// This block of code is for if you want to check both the offset and the next base for color space inconsistency
@ -572,4 +585,64 @@ public class RecalDataManager {
return covariateValues_offset_x_covar;
}
/**
* Perform a ceratin transversion (A <-> C or G <-> T) on the base.
*
* @param base the base [AaCcGgTt]
* @return the transversion of the base, or the input base if it's not one of the understood ones
*/
private static byte performColorOne(byte base) {
switch (base) {
case 'A':
case 'a': return 'C';
case 'C':
case 'c': return 'A';
case 'G':
case 'g': return 'T';
case 'T':
case 't': return 'G';
default: return base;
}
}
/**
* Perform a transition (A <-> G or C <-> T) on the base.
*
* @param base the base [AaCcGgTt]
* @return the transition of the base, or the input base if it's not one of the understood ones
*/
private static byte performColorTwo(byte base) {
switch (base) {
case 'A':
case 'a': return 'G';
case 'C':
case 'c': return 'T';
case 'G':
case 'g': return 'A';
case 'T':
case 't': return 'C';
default: return base;
}
}
/**
* Return the complement (A <-> T or C <-> G) of a base.
*
* @param base the base [AaCcGgTt]
* @return the complementary base, or the input base if it's not one of the understood ones
*/
private static byte performColorThree(byte base) {
switch (base) {
case 'A':
case 'a': return 'T';
case 'C':
case 'c': return 'G';
case 'G':
case 'g': return 'C';
case 'T':
case 't': return 'A';
default: return base;
}
}
}

View File

@ -86,7 +86,7 @@ public class RecalDatumOptimized {
}
}
public synchronized final void increment( final char curBase, final char refBase ) {
public synchronized final void incrementBaseCounts( final byte curBase, final byte refBase ) {
increment( 1, BaseUtils.simpleBaseToBaseIndex(curBase) == BaseUtils.simpleBaseToBaseIndex(refBase) ? 0 : 1 ); // increment takes num observations, then num mismatches
}

View File

@ -49,21 +49,14 @@ public class RecalibrationArgumentCollection {
public String FORCE_READ_GROUP = null;
@Argument(fullName="force_platform", shortName="fP", required=false, doc="If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
public String FORCE_PLATFORM = null;
@Argument(fullName="solid_recal_mode", shortName="sMode", required = false, doc="How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS")
public String SOLID_RECAL_MODE = "SET_Q_ZERO";
@Argument(fullName = "window_size_nqs", shortName="nqs", doc="The window size used by MinimumNQSCovariate for its calculation", required=false)
public int WINDOW_SIZE = 5;
@Argument(fullName = "homopolymer_nback", shortName="nback", doc="The number of previous bases to look at in HomopolymerCovariate", required=false)
public int HOMOPOLYMER_NBACK = 7;
@Argument(fullName = "exception_if_no_tile", shortName="throwTileException", doc="If provided, TileCovariate will throw an exception when no tile can be found. The default behavior is to use tile = -1", required=false)
public boolean EXCEPTION_IF_NO_TILE = false;
@Argument(fullName = "ignore_nocall_colorspace", shortName="ignore_nocall_colorspace", doc="If provided, the recalibrator will skip over reads with no calls in the color space instead of halting with an exception", required=false)
public boolean IGNORE_NOCALL_COLORSPACE = false;
public final boolean checkSolidRecalMode() {
return ( SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") ||
SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO_BASE_N") || SOLID_RECAL_MODE.equalsIgnoreCase("REMOVE_REF_BIAS") );
}
@Argument(fullName="solid_recal_mode", shortName="sMode", required = false, doc="How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS")
public RecalDataManager.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.SET_Q_ZERO;
@Argument(fullName = "solid_nocall_strategy", shortName="solid_nocall_strategy", doc="Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ", required=false)
public RecalDataManager.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION;
}

View File

@ -87,7 +87,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
/////////////////////////////
@ArgumentCollection private RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
@Argument(fullName="recal_file", shortName="recalFile", required=false, doc="Filename for the outputted covariates table recalibration file")
@Argument(fullName="recal_file", shortName="recalFile", required=false, doc="Filename for the input covariates table recalibration .csv file")
public String RECAL_FILE = "output.recal_data.csv";
/////////////////////////////
@ -145,9 +145,6 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
if( RAC.FORCE_READ_GROUP != null ) { RAC.DEFAULT_READ_GROUP = RAC.FORCE_READ_GROUP; }
if( RAC.FORCE_PLATFORM != null ) { RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; }
if( !RAC.checkSolidRecalMode() ) {
throw new StingException( "Unrecognized --solid_recal_mode argument. Implemented options: DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS");
}
// Get a list of all available covariates
final List<Class<? extends Covariate>> classes = PackageUtils.getClassesImplementingInterface(Covariate.class);
@ -347,15 +344,20 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
final byte[] recalQuals = originalQuals.clone();
final String platform = read.getReadGroup().getPlatform();
if( platform.toUpperCase().contains("SOLID") && !RAC.SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") ) {
if( RAC.IGNORE_NOCALL_COLORSPACE ) {
if( platform.toUpperCase().contains("SOLID") && !(RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING) ) {
if( !(RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) ) {
final boolean badColor = RecalDataManager.checkNoCallColorSpace( read );
if( badColor ) {
numReadsWithMalformedColorSpace++;
return read; // can't recalibrate a SOLiD read with no calls in the color space, and the user wants to skip over them
if( RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.LEAVE_READ_UNRECALIBRATED ) {
return read; // can't recalibrate a SOLiD read with no calls in the color space, and the user wants to skip over them
} else if ( RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.PURGE_READ ) {
read.setReadFailsVendorQualityCheckFlag(true);
return read;
}
}
}
originalQuals = RecalDataManager.calcColorSpace( read, originalQuals, RAC.SOLID_RECAL_MODE, coinFlip, refBases == null ? null : refBases.getBasesAsChars() );
originalQuals = RecalDataManager.calcColorSpace( read, originalQuals, RAC.SOLID_RECAL_MODE, coinFlip, refBases == null ? null : refBases.getBases() );
}
//compute all covariate values for this read
@ -512,10 +514,18 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
*/
public void onTraversalDone(SAMFileWriter output) {
if( numReadsWithMalformedColorSpace != 0 ) {
Utils.warnUser("Discovered " + numReadsWithMalformedColorSpace + " SOLiD reads with no calls in the color space. Unfortunately these reads cannot be recalibrated with this recalibration algorithm " +
if( RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.LEAVE_READ_UNRECALIBRATED ) {
Utils.warnUser("Discovered " + numReadsWithMalformedColorSpace + " SOLiD reads with no calls in the color space. Unfortunately these reads cannot be recalibrated with this recalibration algorithm " +
"because we use reference mismatch rate as the only indication of a base's true quality. These reads have had reference bases inserted as a way of correcting " +
"for color space misalignments and there is now no way of knowing how often it mismatches the reference and therefore no way to recalibrate the quality score. " +
"These reads remain in the output bam file but haven't been corrected for reference bias. Use at your own risk.");
"These reads remain in the output bam file but haven't been corrected for reference bias. !!! USE AT YOUR OWN RISK !!!");
} else if ( RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.PURGE_READ ) {
Utils.warnUser("Discovered " + numReadsWithMalformedColorSpace + " SOLiD reads with no calls in the color space. Unfortunately these reads cannot be recalibrated with this recalibration algorithm " +
"because we use reference mismatch rate as the only indication of a base's true quality. These reads have had reference bases inserted as a way of correcting " +
"for color space misalignments and there is now no way of knowing how often it mismatches the reference and therefore no way to recalibrate the quality score. " +
"These reads were completely removed from the output bam file.");
}
}
}
}

View File

@ -49,10 +49,10 @@ public class BaseUtils {
public static final byte DELETION_INDEX = 4;
public static final byte NO_CALL_INDEX = 5; // (this is 'N')
public static int gIndex = BaseUtils.simpleBaseToBaseIndex('G');
public static int cIndex = BaseUtils.simpleBaseToBaseIndex('C');
public static int aIndex = BaseUtils.simpleBaseToBaseIndex('A');
public static int tIndex = BaseUtils.simpleBaseToBaseIndex('T');
public static int gIndex = BaseUtils.simpleBaseToBaseIndex((byte)'G');
public static int cIndex = BaseUtils.simpleBaseToBaseIndex((byte)'C');
public static int aIndex = BaseUtils.simpleBaseToBaseIndex((byte)'A');
public static int tIndex = BaseUtils.simpleBaseToBaseIndex((byte)'T');
/// In genetics, a transition is a mutation changing a purine to another purine nucleotide (A <-> G) or
@ -157,6 +157,32 @@ public class BaseUtils {
return bases;
}
/**
* Converts a simple base to a base index
*
* @param base [AaCcGgTt]
* @return 0, 1, 2, 3, or -1 if the base can't be understood
*/
static public int simpleBaseToBaseIndex(byte base) {
switch (base) {
case '*': // the wildcard character counts as an A
case 'A':
case 'a': return 0;
case 'C':
case 'c': return 1;
case 'G':
case 'g': return 2;
case 'T':
case 't': return 3;
default: return -1;
}
}
/**
* Converts a simple base to a base index
*
@ -194,10 +220,6 @@ public class BaseUtils {
}
}
static public int simpleBaseToBaseIndex(byte base) {
return simpleBaseToBaseIndex((char)base);
}
@Deprecated
static public boolean isRegularBase(char base) {
return simpleBaseToBaseIndex(base) != -1;
@ -303,48 +325,6 @@ public class BaseUtils {
return base2;
}
/**
* Perform a transition (A <-> G or C <-> T) on the base, or the specified base if it can't be done (i.e. an ambiguous base).
*
* @param base the base [AaCcGgTt]
* @return the transition of the base, or the input base if it's not one of the understood ones
*/
// todo -- are these right? Put into recalibator if really color space specific
static public char transition(char base) {
switch (base) {
case 'A':
case 'a': return 'G';
case 'C':
case 'c': return 'T';
case 'G':
case 'g': return 'A';
case 'T':
case 't': return 'C';
default: return base;
}
}
/**
* Perform a transversion (A <-> C or G <-> T) on the base, or the specified base if it can't be done (i.e. an ambiguous base).
*
* @param base the base [AaCcGgTt]
* @return the transversion of the base, or the input base if it's not one of the understood ones
*/
// todo -- are these right? Put into recalibator if really color space specific
static public char transversion(char base) {
switch (base) {
case 'A':
case 'a': return 'C';
case 'C':
case 'c': return 'A';
case 'G':
case 'g': return 'T';
case 'T':
case 't': return 'G';
default: return base;
}
}
/**
* Return the complement (A <-> T or C <-> G) of a base, or the specified base if it can't be complemented (i.e. an ambiguous base).
*
@ -380,7 +360,7 @@ public class BaseUtils {
byte[] rcbases = new byte[bases.length];
for (int i = 0; i < bases.length; i++) {
rcbases[i] = (byte)simpleComplement((char) bases[bases.length - 1 - i]);
rcbases[i] = simpleComplement(bases[bases.length - 1 - i]);
}
return rcbases;
@ -396,7 +376,7 @@ public class BaseUtils {
byte[] rcbases = new byte[bases.length];
for (int i = 0; i < bases.length; i++) {
rcbases[i] = (byte)simpleComplement((char) bases[i]);
rcbases[i] = simpleComplement(bases[i]);
}
return rcbases;
@ -442,6 +422,7 @@ public class BaseUtils {
* @param bases the String of bases
* @return the reverse complement of the String
*/
@Deprecated
static public String simpleReverseComplement(String bases) {
return new String(simpleReverseComplement(bases.getBytes()));
}
@ -453,6 +434,7 @@ public class BaseUtils {
* @param bases the String of bases
* @return the complement of the String
*/
@Deprecated
static public String simpleComplement(String bases) {
return new String(simpleComplement(bases.getBytes()));
}
@ -468,7 +450,7 @@ public class BaseUtils {
int[] baseCounts = new int[4];
for ( byte base : sequence ) {
int baseIndex = simpleBaseToBaseIndex((char) base);
int baseIndex = simpleBaseToBaseIndex(base);
if (baseIndex >= 0) {
baseCounts[baseIndex]++;
@ -522,6 +504,7 @@ public class BaseUtils {
*
* @return a random base (A, C, G, T)
*/
@Deprecated
static public byte getRandomBase() {
return getRandomBase('.');
}
@ -532,6 +515,7 @@ public class BaseUtils {
* @param excludeBase the base to exclude
* @return a random base, excluding the one specified (A, C, G, T)
*/
@Deprecated
static public byte getRandomBase(char excludeBase) {
return BaseUtils.baseIndexToSimpleBase(getRandomBaseIndex(BaseUtils.simpleBaseToBaseIndex(excludeBase)));
}

View File

@ -236,6 +236,7 @@ public class AlignmentUtils {
return n;
}
@Deprecated
public static char[] alignmentToCharArray( final Cigar cigar, final char[] read, final char[] ref ) {
final char[] alignment = new char[read.length];
@ -275,6 +276,45 @@ public class AlignmentUtils {
return alignment;
}
public static byte[] alignmentToByteArray( final Cigar cigar, final byte[] read, final byte[] ref ) {
final byte[] alignment = new byte[read.length];
int refPos = 0;
int alignPos = 0;
for ( int iii = 0 ; iii < cigar.numCigarElements() ; iii++ ) {
final CigarElement ce = cigar.getCigarElement(iii);
final int elementLength = ce.getLength();
switch( ce.getOperator() ) {
case I:
case S:
for ( int jjj = 0 ; jjj < elementLength; jjj++ ) {
alignment[alignPos++] = '+';
}
break;
case D:
case N:
refPos++;
break;
case M:
for ( int jjj = 0 ; jjj < elementLength; jjj++ ) {
alignment[alignPos] = ref[refPos];
alignPos++;
refPos++;
}
break;
case H:
case P:
break;
default:
throw new StingException( "Unsupported cigar operator: " + ce.getOperator() );
}
}
return alignment;
}
/**
* Due to (unfortunate) multiple ways to indicate that read is unmapped allowed by SAM format
* specification, one may need this convenience shortcut. Checks both 'read unmapped' flag and

View File

@ -47,7 +47,7 @@ public class HLACallerIntegrationTest extends WalkerTest {
public void testCalculateBaseLikelihoods() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T CalculateBaseLikelihoods -I " + validationDataLocation + "NA12878.HISEQ.HLA.bam -R " + oneKGLocation + "reference/human_b36_both.fasta -L " + intervals + " -filter " + validationDataLocation + "HLA_HISEQ.filter -maxAllowedMismatches 6 -minRequiredMatches 0 -o %s", 1,
Arrays.asList("3272e3db32a3370b728457cd9a071339"));
Arrays.asList("98e64882f93bf7550457bee4182caab6"));
executeTest("test CalculateBaseLikelihoods", spec);
}