Solid processing in base quality recalibrator now has several options for how to handle no calls in the color space. --ignore_nocall_colorspace is removed and replace by --solid_nocall_strategy. Fixed some of the @Deprecated tags in BaseUtils. LocusWalkers now filter out FailsVendorQualityCheck reads. HLA caller integration test bam file had bad vendor reads so its integration test changed.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3831 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
18b0114e25
commit
8e31c01680
|
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
* Copyright (c) 2010, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.filters;
|
||||
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Jul 19, 2010
|
||||
*
|
||||
* Filter out FailsVendorQualityCheck reads.
|
||||
*/
|
||||
|
||||
public class FailsVendorQualityCheckReadFilter implements SamRecordFilter {
|
||||
public boolean filterOut( final SAMRecord read ) {
|
||||
return read.getReadFailsVendorQualityCheckFlag();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,13 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.filters.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
|
||||
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentReadFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.InAdaptorFilter;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIteratorFilter;
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
|
|
@ -49,8 +46,10 @@ public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, R
|
|||
SamRecordFilter filter1 = new UnmappedReadFilter();
|
||||
SamRecordFilter filter2 = new NotPrimaryAlignmentReadFilter();
|
||||
SamRecordFilter filter3 = new DuplicateReadFilter();
|
||||
SamRecordFilter filter4 = new FailsVendorQualityCheckReadFilter();
|
||||
//SamRecordFilter filter4 = new FailsVendorQualityReadFilter();
|
||||
List<SamRecordFilter> x = super.getMandatoryReadFilters();
|
||||
x.addAll(Arrays.asList(filter3, filter2, filter1));
|
||||
x.addAll(Arrays.asList(filter4, filter3, filter2, filter1));
|
||||
// }
|
||||
return x;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -137,9 +137,6 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> im
|
|||
if( RAC.FORCE_READ_GROUP != null ) { RAC.DEFAULT_READ_GROUP = RAC.FORCE_READ_GROUP; }
|
||||
if( RAC.FORCE_PLATFORM != null ) { RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; }
|
||||
DBSNP_VALIDATION_CHECK_FREQUENCY *= PROCESS_EVERY_NTH_LOCUS;
|
||||
if( !RAC.checkSolidRecalMode() ) {
|
||||
throw new StingException( "Unrecognized --solid_recal_mode argument. Implemented options: DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS");
|
||||
}
|
||||
|
||||
// Get a list of all available covariates
|
||||
final List<Class<? extends Covariate>> covariateClasses = PackageUtils.getClassesImplementingInterface( Covariate.class );
|
||||
|
|
@ -287,7 +284,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> im
|
|||
RecalDataManager.parseSAMRecord( gatkRead, RAC );
|
||||
|
||||
// Skip over reads with no calls in the color space if the user requested it
|
||||
if( RAC.IGNORE_NOCALL_COLORSPACE && RecalDataManager.checkNoCallColorSpace( gatkRead ) ) {
|
||||
if( !(RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) && RecalDataManager.checkNoCallColorSpace( gatkRead ) ) {
|
||||
gatkRead.setTemporaryAttribute( SKIP_RECORD_ATTRIBUTE, true);
|
||||
continue;
|
||||
}
|
||||
|
|
@ -308,7 +305,8 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> im
|
|||
if( BaseUtils.isRegularBase( bases[offset] ) ) {
|
||||
|
||||
// SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it
|
||||
if( !gatkRead.getReadGroup().getPlatform().toUpperCase().contains("SOLID") || RAC.SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || !RecalDataManager.isInconsistentColorSpace( gatkRead, offset ) ) {
|
||||
if( !gatkRead.getReadGroup().getPlatform().toUpperCase().contains("SOLID") || RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING ||
|
||||
!RecalDataManager.isInconsistentColorSpace( gatkRead, offset ) ) {
|
||||
|
||||
// This base finally passed all the checks for a good base, so add it to the big data hashmap
|
||||
updateDataFromRead( gatkRead, offset, refBase );
|
||||
|
|
@ -347,16 +345,16 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> im
|
|||
*
|
||||
* @param counts The counts to be updated
|
||||
* @param context The AlignmentContext which holds the reads covered by this locus
|
||||
* @param ref The reference base
|
||||
* @param refBase The reference base
|
||||
*/
|
||||
private static void updateMismatchCounts(final Pair<Long, Long> counts, final AlignmentContext context, final byte ref) {
|
||||
private static void updateMismatchCounts(final Pair<Long, Long> counts, final AlignmentContext context, final byte refBase) {
|
||||
for( PileupElement p : context.getBasePileup() ) {
|
||||
final byte readChar = p.getBase();
|
||||
final int readCharBaseIndex = BaseUtils.simpleBaseToBaseIndex(readChar);
|
||||
final int refCharBaseIndex = BaseUtils.simpleBaseToBaseIndex(ref);
|
||||
final byte readBase = p.getBase();
|
||||
final int readBaseIndex = BaseUtils.simpleBaseToBaseIndex(readBase);
|
||||
final int refBaseIndex = BaseUtils.simpleBaseToBaseIndex(refBase);
|
||||
|
||||
if( readCharBaseIndex != -1 && refCharBaseIndex != -1 ) {
|
||||
if( readCharBaseIndex != refCharBaseIndex ) {
|
||||
if( readBaseIndex != -1 && refBaseIndex != -1 ) {
|
||||
if( readBaseIndex != refBaseIndex ) {
|
||||
counts.first++;
|
||||
}
|
||||
counts.second++;
|
||||
|
|
@ -410,7 +408,7 @@ public class CovariateCounterWalker extends LocusWalker<Integer, PrintStream> im
|
|||
final long curMismatches = datum.getNumMismatches();
|
||||
|
||||
// Add one to the number of observations and potentially one to the number of mismatches
|
||||
datum.increment( (char)base, (char)refBase ); // Dangerous: If you don't cast to char then the bytes default to the (long, long) version of the increment method which is really bad
|
||||
datum.incrementBaseCounts( base, refBase );
|
||||
countedBases++;
|
||||
novel_counts.second++;
|
||||
novel_counts.first += datum.getNumMismatches() - curMismatches; // For sanity check to ensure novel mismatch rate vs dnsnp mismatch rate is reasonable
|
||||
|
|
|
|||
|
|
@ -136,7 +136,8 @@ public class DinucCovariate implements StandardCovariate {
|
|||
|
||||
// Used to get the covariate's value from input csv file in TableRecalibrationWalker
|
||||
public final Comparable getValue( final String str ) {
|
||||
final Dinuc returnDinuc = dinucHashMap.get( Dinuc.hashBytes( (byte)str.charAt(0), (byte)str.charAt(1) ) );
|
||||
byte[] bytes = str.getBytes();
|
||||
final Dinuc returnDinuc = dinucHashMap.get( Dinuc.hashBytes( bytes[0], bytes[1] ) );
|
||||
if( returnDinuc.compareTo(NO_DINUC) == 0 ) {
|
||||
return null;
|
||||
}
|
||||
|
|
@ -149,7 +150,7 @@ public class DinucCovariate implements StandardCovariate {
|
|||
*
|
||||
* @param array
|
||||
*/
|
||||
private static final void reverse(final Comparable[] array) {
|
||||
private static void reverse(final Comparable[] array) {
|
||||
final int arrayLength = array.length;
|
||||
for(int l = 0, r = arrayLength - 1; l < r; l++, r--) {
|
||||
final Comparable temp = array[l];
|
||||
|
|
|
|||
|
|
@ -60,6 +60,19 @@ public class RecalDataManager {
|
|||
private static boolean warnUserNullReadGroup = false;
|
||||
private static boolean warnUserNullPlatform = false;
|
||||
|
||||
public enum SOLID_RECAL_MODE {
|
||||
DO_NOTHING,
|
||||
SET_Q_ZERO,
|
||||
SET_Q_ZERO_BASE_N,
|
||||
REMOVE_REF_BIAS
|
||||
}
|
||||
|
||||
public enum SOLID_NOCALL_STRATEGY {
|
||||
THROW_EXCEPTION,
|
||||
LEAVE_READ_UNRECALIBRATED,
|
||||
PURGE_READ
|
||||
}
|
||||
|
||||
RecalDataManager() {
|
||||
data = new NestedHashMap();
|
||||
dataCollapsedReadGroup = null;
|
||||
|
|
@ -263,9 +276,9 @@ public class RecalDataManager {
|
|||
if( read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null ) { // Haven't calculated the inconsistency array yet for this read
|
||||
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
|
||||
if( attr != null ) {
|
||||
char[] colorSpace;
|
||||
byte[] colorSpace;
|
||||
if( attr instanceof String ) {
|
||||
colorSpace = ((String)attr).toCharArray();
|
||||
colorSpace = ((String)attr).getBytes();
|
||||
} else {
|
||||
throw new StingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
|
||||
}
|
||||
|
|
@ -277,9 +290,9 @@ public class RecalDataManager {
|
|||
}
|
||||
final byte[] inconsistency = new byte[readBases.length];
|
||||
int iii;
|
||||
byte prevBase = (byte) colorSpace[0]; // The sentinel
|
||||
byte prevBase = colorSpace[0]; // The sentinel
|
||||
for( iii = 0; iii < readBases.length; iii++ ) {
|
||||
final byte thisBase = (byte)getNextBaseFromColor( (char)prevBase, colorSpace[iii + 1] );
|
||||
final byte thisBase = getNextBaseFromColor( prevBase, colorSpace[iii + 1] );
|
||||
inconsistency[iii] = (byte)( thisBase == readBases[iii] ? 0 : 1 );
|
||||
prevBase = readBases[iii];
|
||||
}
|
||||
|
|
@ -298,18 +311,18 @@ public class RecalDataManager {
|
|||
* This method doesn't add the inconsistent tag to the read like parseColorSpace does
|
||||
* @param read The SAMRecord to parse
|
||||
* @param originalQualScores The array of original quality scores to modify during the correction
|
||||
* @param SOLID_RECAL_MODE Which mode of solid recalibration to apply
|
||||
* @param solidRecalMode Which mode of solid recalibration to apply
|
||||
* @param coinFlip A random number generator
|
||||
* @param refBases The reference for this read
|
||||
* @return A new array of quality scores that have been ref bias corrected
|
||||
*/
|
||||
public static byte[] calcColorSpace( final SAMRecord read, byte[] originalQualScores, final String SOLID_RECAL_MODE, final Random coinFlip, final char[] refBases ) {
|
||||
public static byte[] calcColorSpace( final SAMRecord read, byte[] originalQualScores, final SOLID_RECAL_MODE solidRecalMode, final Random coinFlip, final byte[] refBases ) {
|
||||
|
||||
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
|
||||
if( attr != null ) {
|
||||
char[] colorSpace;
|
||||
byte[] colorSpace;
|
||||
if( attr instanceof String ) {
|
||||
colorSpace = ((String)attr).toCharArray();
|
||||
colorSpace = ((String)attr).getBytes();
|
||||
} else {
|
||||
throw new StingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
|
||||
}
|
||||
|
|
@ -317,28 +330,28 @@ public class RecalDataManager {
|
|||
// Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read
|
||||
byte[] readBases = read.getReadBases();
|
||||
final byte[] colorImpliedBases = readBases.clone();
|
||||
char[] refBasesDirRead = AlignmentUtils.alignmentToCharArray( read.getCigar(), read.getReadString().toCharArray(), refBases ); //BUGBUG: This needs to change when read walkers are changed to give the aligned refBases
|
||||
byte[] refBasesDirRead = AlignmentUtils.alignmentToByteArray( read.getCigar(), read.getReadBases(), refBases ); //BUGBUG: This needs to change when read walkers are changed to give the aligned refBases
|
||||
if( read.getReadNegativeStrandFlag() ) {
|
||||
readBases = BaseUtils.simpleReverseComplement( read.getReadBases() );
|
||||
refBasesDirRead = BaseUtils.simpleReverseComplement( refBasesDirRead.clone() );
|
||||
}
|
||||
final int[] inconsistency = new int[readBases.length];
|
||||
byte prevBase = (byte) colorSpace[0]; // The sentinel
|
||||
byte prevBase = colorSpace[0]; // The sentinel
|
||||
for( int iii = 0; iii < readBases.length; iii++ ) {
|
||||
final byte thisBase = (byte)getNextBaseFromColor( (char)prevBase, colorSpace[iii + 1] );
|
||||
final byte thisBase = getNextBaseFromColor( prevBase, colorSpace[iii + 1] );
|
||||
colorImpliedBases[iii] = thisBase;
|
||||
inconsistency[iii] = ( thisBase == readBases[iii] ? 0 : 1 );
|
||||
prevBase = readBases[iii];
|
||||
}
|
||||
|
||||
// Now that we have the inconsistency array apply the desired correction to the inconsistent bases
|
||||
if( SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") ) { // Set inconsistent bases and the one before it to Q0
|
||||
if( solidRecalMode == SOLID_RECAL_MODE.SET_Q_ZERO ) { // Set inconsistent bases and the one before it to Q0
|
||||
final boolean setBaseN = false;
|
||||
originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, setBaseN);
|
||||
} else if( SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO_BASE_N") ) {
|
||||
} else if( solidRecalMode == SOLID_RECAL_MODE.SET_Q_ZERO_BASE_N ) {
|
||||
final boolean setBaseN = true;
|
||||
originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, setBaseN);
|
||||
} else if( SOLID_RECAL_MODE.equalsIgnoreCase("REMOVE_REF_BIAS") ) { // Use the color space quality to probabilistically remove ref bases at inconsistent color space bases
|
||||
} else if( solidRecalMode == SOLID_RECAL_MODE.REMOVE_REF_BIAS ) { // Use the color space quality to probabilistically remove ref bases at inconsistent color space bases
|
||||
solidRecalRemoveRefBias(read, readBases, inconsistency, colorImpliedBases, refBasesDirRead, coinFlip);
|
||||
}
|
||||
|
||||
|
|
@ -354,15 +367,15 @@ public class RecalDataManager {
|
|||
if( read.getReadGroup().getPlatform().toUpperCase().contains("SOLID") ) {
|
||||
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
|
||||
if( attr != null ) {
|
||||
char[] colorSpace;
|
||||
byte[] colorSpace;
|
||||
if( attr instanceof String ) {
|
||||
colorSpace = ((String)attr).substring(1).toCharArray(); // trim off the Sentinel
|
||||
colorSpace = ((String)attr).substring(1).getBytes(); // trim off the Sentinel
|
||||
} else {
|
||||
throw new StingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName()));
|
||||
}
|
||||
|
||||
for( char color : colorSpace ) {
|
||||
if( color != '0' && color != '1' && color != '2' && color != '3' ) {
|
||||
for( byte color : colorSpace ) {
|
||||
if( color != (byte)'0' && color != (byte)'1' && color != (byte)'2' && color != (byte)'3' ) {
|
||||
return true; // There is a bad color in this SOLiD read and the user wants to skip over it
|
||||
}
|
||||
}
|
||||
|
|
@ -387,18 +400,18 @@ public class RecalDataManager {
|
|||
* @return The byte array of original quality scores some of which might have been set to zero
|
||||
*/
|
||||
private static byte[] solidRecalSetToQZero( final SAMRecord read, byte[] readBases, final int[] inconsistency, final byte[] originalQualScores,
|
||||
final char[] refBases, final boolean setBaseN ) {
|
||||
final byte[] refBases, final boolean setBaseN ) {
|
||||
|
||||
final boolean negStrand = read.getReadNegativeStrandFlag();
|
||||
for( int iii = 1; iii < originalQualScores.length; iii++ ) {
|
||||
if( inconsistency[iii] == 1 ) {
|
||||
if( (char)readBases[iii] == refBases[iii] ) {
|
||||
if( readBases[iii] == refBases[iii] ) {
|
||||
if( negStrand ) { originalQualScores[originalQualScores.length-(iii+1)] = (byte)0; }
|
||||
else { originalQualScores[iii] = (byte)0; }
|
||||
if( setBaseN ) { readBases[iii] = (byte)'N'; }
|
||||
}
|
||||
// Set the prev base to Q0 as well
|
||||
if( (char)readBases[iii-1] == refBases[iii-1] ) {
|
||||
if( readBases[iii-1] == refBases[iii-1] ) {
|
||||
if( negStrand ) { originalQualScores[originalQualScores.length-iii] = (byte)0; }
|
||||
else { originalQualScores[iii-1] = (byte)0; }
|
||||
if( setBaseN ) { readBases[iii-1] = (byte)'N'; }
|
||||
|
|
@ -423,7 +436,7 @@ public class RecalDataManager {
|
|||
* @param coinFlip A random number generator
|
||||
*/
|
||||
private static void solidRecalRemoveRefBias( final SAMRecord read, byte[] readBases, final int[] inconsistency, final byte[] colorImpliedBases,
|
||||
final char[] refBases, final Random coinFlip) {
|
||||
final byte[] refBases, final Random coinFlip) {
|
||||
|
||||
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG);
|
||||
if( attr != null ) {
|
||||
|
|
@ -440,7 +453,7 @@ public class RecalDataManager {
|
|||
if( inconsistency[iii] == 1 ) {
|
||||
for( int jjj = iii - 1; jjj <= iii; jjj++ ) { // Correct this base and the one before it along the direction of the read
|
||||
if( jjj == iii || inconsistency[jjj] == 0 ) { // Don't want to correct the previous base a second time if it was already corrected in the previous step
|
||||
if( (char)readBases[jjj] == refBases[jjj] ) {
|
||||
if( readBases[jjj] == refBases[jjj] ) {
|
||||
if( colorSpaceQuals[jjj] == colorSpaceQuals[jjj+1] ) { // Equal evidence for the color implied base and the reference base, so flip a coin
|
||||
final int rand = coinFlip.nextInt( 2 );
|
||||
if( rand == 0 ) { // The color implied base won the coin flip
|
||||
|
|
@ -488,18 +501,18 @@ public class RecalDataManager {
|
|||
* @param color The color
|
||||
* @return The next base in the sequence
|
||||
*/
|
||||
private static char getNextBaseFromColor( final char prevBase, final char color ) {
|
||||
private static byte getNextBaseFromColor( final byte prevBase, final byte color ) {
|
||||
switch(color) {
|
||||
case '0': // same base
|
||||
case '0':
|
||||
return prevBase;
|
||||
case '1': // transversion
|
||||
return BaseUtils.transversion( prevBase );
|
||||
case '2': // transition
|
||||
return BaseUtils.transition( prevBase );
|
||||
case '3': // simple complement
|
||||
return BaseUtils.simpleComplement( prevBase );
|
||||
case '1':
|
||||
return performColorOne( prevBase );
|
||||
case '2':
|
||||
return performColorTwo( prevBase );
|
||||
case '3':
|
||||
return performColorThree( prevBase );
|
||||
default:
|
||||
throw new StingException( "Unrecognized color space in SOLID read, color = " + color +
|
||||
throw new StingException( "Unrecognized color space in SOLID read, color = " + (char)color +
|
||||
" Unfortunately this bam file can not be recalibrated without full color space information because of potential reference bias.");
|
||||
}
|
||||
}
|
||||
|
|
@ -516,9 +529,9 @@ public class RecalDataManager {
|
|||
final byte[] inconsistency = (byte[])attr;
|
||||
// NOTE: The inconsistency array is in the direction of the read, not aligned to the reference!
|
||||
if( read.getReadNegativeStrandFlag() ) { // Negative direction
|
||||
return inconsistency[inconsistency.length - offset - 1] != 0;
|
||||
return inconsistency[inconsistency.length - offset - 1] != (byte)0;
|
||||
} else { // Forward direction
|
||||
return inconsistency[offset] != 0;
|
||||
return inconsistency[offset] != (byte)0;
|
||||
}
|
||||
|
||||
// This block of code is for if you want to check both the offset and the next base for color space inconsistency
|
||||
|
|
@ -572,4 +585,64 @@ public class RecalDataManager {
|
|||
|
||||
return covariateValues_offset_x_covar;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform a ceratin transversion (A <-> C or G <-> T) on the base.
|
||||
*
|
||||
* @param base the base [AaCcGgTt]
|
||||
* @return the transversion of the base, or the input base if it's not one of the understood ones
|
||||
*/
|
||||
private static byte performColorOne(byte base) {
|
||||
switch (base) {
|
||||
case 'A':
|
||||
case 'a': return 'C';
|
||||
case 'C':
|
||||
case 'c': return 'A';
|
||||
case 'G':
|
||||
case 'g': return 'T';
|
||||
case 'T':
|
||||
case 't': return 'G';
|
||||
default: return base;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform a transition (A <-> G or C <-> T) on the base.
|
||||
*
|
||||
* @param base the base [AaCcGgTt]
|
||||
* @return the transition of the base, or the input base if it's not one of the understood ones
|
||||
*/
|
||||
private static byte performColorTwo(byte base) {
|
||||
switch (base) {
|
||||
case 'A':
|
||||
case 'a': return 'G';
|
||||
case 'C':
|
||||
case 'c': return 'T';
|
||||
case 'G':
|
||||
case 'g': return 'A';
|
||||
case 'T':
|
||||
case 't': return 'C';
|
||||
default: return base;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the complement (A <-> T or C <-> G) of a base.
|
||||
*
|
||||
* @param base the base [AaCcGgTt]
|
||||
* @return the complementary base, or the input base if it's not one of the understood ones
|
||||
*/
|
||||
private static byte performColorThree(byte base) {
|
||||
switch (base) {
|
||||
case 'A':
|
||||
case 'a': return 'T';
|
||||
case 'C':
|
||||
case 'c': return 'G';
|
||||
case 'G':
|
||||
case 'g': return 'C';
|
||||
case 'T':
|
||||
case 't': return 'A';
|
||||
default: return base;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@ public class RecalDatumOptimized {
|
|||
}
|
||||
}
|
||||
|
||||
public synchronized final void increment( final char curBase, final char refBase ) {
|
||||
public synchronized final void incrementBaseCounts( final byte curBase, final byte refBase ) {
|
||||
increment( 1, BaseUtils.simpleBaseToBaseIndex(curBase) == BaseUtils.simpleBaseToBaseIndex(refBase) ? 0 : 1 ); // increment takes num observations, then num mismatches
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -49,21 +49,14 @@ public class RecalibrationArgumentCollection {
|
|||
public String FORCE_READ_GROUP = null;
|
||||
@Argument(fullName="force_platform", shortName="fP", required=false, doc="If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
|
||||
public String FORCE_PLATFORM = null;
|
||||
@Argument(fullName="solid_recal_mode", shortName="sMode", required = false, doc="How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS")
|
||||
public String SOLID_RECAL_MODE = "SET_Q_ZERO";
|
||||
@Argument(fullName = "window_size_nqs", shortName="nqs", doc="The window size used by MinimumNQSCovariate for its calculation", required=false)
|
||||
public int WINDOW_SIZE = 5;
|
||||
@Argument(fullName = "homopolymer_nback", shortName="nback", doc="The number of previous bases to look at in HomopolymerCovariate", required=false)
|
||||
public int HOMOPOLYMER_NBACK = 7;
|
||||
@Argument(fullName = "exception_if_no_tile", shortName="throwTileException", doc="If provided, TileCovariate will throw an exception when no tile can be found. The default behavior is to use tile = -1", required=false)
|
||||
public boolean EXCEPTION_IF_NO_TILE = false;
|
||||
@Argument(fullName = "ignore_nocall_colorspace", shortName="ignore_nocall_colorspace", doc="If provided, the recalibrator will skip over reads with no calls in the color space instead of halting with an exception", required=false)
|
||||
public boolean IGNORE_NOCALL_COLORSPACE = false;
|
||||
|
||||
|
||||
public final boolean checkSolidRecalMode() {
|
||||
return ( SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") ||
|
||||
SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO_BASE_N") || SOLID_RECAL_MODE.equalsIgnoreCase("REMOVE_REF_BIAS") );
|
||||
}
|
||||
|
||||
@Argument(fullName="solid_recal_mode", shortName="sMode", required = false, doc="How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS")
|
||||
public RecalDataManager.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.SET_Q_ZERO;
|
||||
@Argument(fullName = "solid_nocall_strategy", shortName="solid_nocall_strategy", doc="Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ", required=false)
|
||||
public RecalDataManager.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -87,7 +87,7 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
/////////////////////////////
|
||||
@ArgumentCollection private RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
||||
|
||||
@Argument(fullName="recal_file", shortName="recalFile", required=false, doc="Filename for the outputted covariates table recalibration file")
|
||||
@Argument(fullName="recal_file", shortName="recalFile", required=false, doc="Filename for the input covariates table recalibration .csv file")
|
||||
public String RECAL_FILE = "output.recal_data.csv";
|
||||
|
||||
/////////////////////////////
|
||||
|
|
@ -145,9 +145,6 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
|
||||
if( RAC.FORCE_READ_GROUP != null ) { RAC.DEFAULT_READ_GROUP = RAC.FORCE_READ_GROUP; }
|
||||
if( RAC.FORCE_PLATFORM != null ) { RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; }
|
||||
if( !RAC.checkSolidRecalMode() ) {
|
||||
throw new StingException( "Unrecognized --solid_recal_mode argument. Implemented options: DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS");
|
||||
}
|
||||
|
||||
// Get a list of all available covariates
|
||||
final List<Class<? extends Covariate>> classes = PackageUtils.getClassesImplementingInterface(Covariate.class);
|
||||
|
|
@ -347,15 +344,20 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
final byte[] recalQuals = originalQuals.clone();
|
||||
|
||||
final String platform = read.getReadGroup().getPlatform();
|
||||
if( platform.toUpperCase().contains("SOLID") && !RAC.SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") ) {
|
||||
if( RAC.IGNORE_NOCALL_COLORSPACE ) {
|
||||
if( platform.toUpperCase().contains("SOLID") && !(RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING) ) {
|
||||
if( !(RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) ) {
|
||||
final boolean badColor = RecalDataManager.checkNoCallColorSpace( read );
|
||||
if( badColor ) {
|
||||
numReadsWithMalformedColorSpace++;
|
||||
return read; // can't recalibrate a SOLiD read with no calls in the color space, and the user wants to skip over them
|
||||
if( RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.LEAVE_READ_UNRECALIBRATED ) {
|
||||
return read; // can't recalibrate a SOLiD read with no calls in the color space, and the user wants to skip over them
|
||||
} else if ( RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.PURGE_READ ) {
|
||||
read.setReadFailsVendorQualityCheckFlag(true);
|
||||
return read;
|
||||
}
|
||||
}
|
||||
}
|
||||
originalQuals = RecalDataManager.calcColorSpace( read, originalQuals, RAC.SOLID_RECAL_MODE, coinFlip, refBases == null ? null : refBases.getBasesAsChars() );
|
||||
originalQuals = RecalDataManager.calcColorSpace( read, originalQuals, RAC.SOLID_RECAL_MODE, coinFlip, refBases == null ? null : refBases.getBases() );
|
||||
}
|
||||
|
||||
//compute all covariate values for this read
|
||||
|
|
@ -512,10 +514,18 @@ public class TableRecalibrationWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
*/
|
||||
public void onTraversalDone(SAMFileWriter output) {
|
||||
if( numReadsWithMalformedColorSpace != 0 ) {
|
||||
Utils.warnUser("Discovered " + numReadsWithMalformedColorSpace + " SOLiD reads with no calls in the color space. Unfortunately these reads cannot be recalibrated with this recalibration algorithm " +
|
||||
if( RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.LEAVE_READ_UNRECALIBRATED ) {
|
||||
Utils.warnUser("Discovered " + numReadsWithMalformedColorSpace + " SOLiD reads with no calls in the color space. Unfortunately these reads cannot be recalibrated with this recalibration algorithm " +
|
||||
"because we use reference mismatch rate as the only indication of a base's true quality. These reads have had reference bases inserted as a way of correcting " +
|
||||
"for color space misalignments and there is now no way of knowing how often it mismatches the reference and therefore no way to recalibrate the quality score. " +
|
||||
"These reads remain in the output bam file but haven't been corrected for reference bias. Use at your own risk.");
|
||||
"These reads remain in the output bam file but haven't been corrected for reference bias. !!! USE AT YOUR OWN RISK !!!");
|
||||
} else if ( RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.PURGE_READ ) {
|
||||
Utils.warnUser("Discovered " + numReadsWithMalformedColorSpace + " SOLiD reads with no calls in the color space. Unfortunately these reads cannot be recalibrated with this recalibration algorithm " +
|
||||
"because we use reference mismatch rate as the only indication of a base's true quality. These reads have had reference bases inserted as a way of correcting " +
|
||||
"for color space misalignments and there is now no way of knowing how often it mismatches the reference and therefore no way to recalibrate the quality score. " +
|
||||
"These reads were completely removed from the output bam file.");
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -49,10 +49,10 @@ public class BaseUtils {
|
|||
public static final byte DELETION_INDEX = 4;
|
||||
public static final byte NO_CALL_INDEX = 5; // (this is 'N')
|
||||
|
||||
public static int gIndex = BaseUtils.simpleBaseToBaseIndex('G');
|
||||
public static int cIndex = BaseUtils.simpleBaseToBaseIndex('C');
|
||||
public static int aIndex = BaseUtils.simpleBaseToBaseIndex('A');
|
||||
public static int tIndex = BaseUtils.simpleBaseToBaseIndex('T');
|
||||
public static int gIndex = BaseUtils.simpleBaseToBaseIndex((byte)'G');
|
||||
public static int cIndex = BaseUtils.simpleBaseToBaseIndex((byte)'C');
|
||||
public static int aIndex = BaseUtils.simpleBaseToBaseIndex((byte)'A');
|
||||
public static int tIndex = BaseUtils.simpleBaseToBaseIndex((byte)'T');
|
||||
|
||||
|
||||
/// In genetics, a transition is a mutation changing a purine to another purine nucleotide (A <-> G) or
|
||||
|
|
@ -157,6 +157,32 @@ public class BaseUtils {
|
|||
return bases;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a simple base to a base index
|
||||
*
|
||||
* @param base [AaCcGgTt]
|
||||
* @return 0, 1, 2, 3, or -1 if the base can't be understood
|
||||
*/
|
||||
static public int simpleBaseToBaseIndex(byte base) {
|
||||
switch (base) {
|
||||
case '*': // the wildcard character counts as an A
|
||||
case 'A':
|
||||
case 'a': return 0;
|
||||
|
||||
case 'C':
|
||||
case 'c': return 1;
|
||||
|
||||
case 'G':
|
||||
case 'g': return 2;
|
||||
|
||||
case 'T':
|
||||
case 't': return 3;
|
||||
|
||||
default: return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Converts a simple base to a base index
|
||||
*
|
||||
|
|
@ -194,10 +220,6 @@ public class BaseUtils {
|
|||
}
|
||||
}
|
||||
|
||||
static public int simpleBaseToBaseIndex(byte base) {
|
||||
return simpleBaseToBaseIndex((char)base);
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
static public boolean isRegularBase(char base) {
|
||||
return simpleBaseToBaseIndex(base) != -1;
|
||||
|
|
@ -303,48 +325,6 @@ public class BaseUtils {
|
|||
return base2;
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform a transition (A <-> G or C <-> T) on the base, or the specified base if it can't be done (i.e. an ambiguous base).
|
||||
*
|
||||
* @param base the base [AaCcGgTt]
|
||||
* @return the transition of the base, or the input base if it's not one of the understood ones
|
||||
*/
|
||||
// todo -- are these right? Put into recalibator if really color space specific
|
||||
static public char transition(char base) {
|
||||
switch (base) {
|
||||
case 'A':
|
||||
case 'a': return 'G';
|
||||
case 'C':
|
||||
case 'c': return 'T';
|
||||
case 'G':
|
||||
case 'g': return 'A';
|
||||
case 'T':
|
||||
case 't': return 'C';
|
||||
default: return base;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform a transversion (A <-> C or G <-> T) on the base, or the specified base if it can't be done (i.e. an ambiguous base).
|
||||
*
|
||||
* @param base the base [AaCcGgTt]
|
||||
* @return the transversion of the base, or the input base if it's not one of the understood ones
|
||||
*/
|
||||
// todo -- are these right? Put into recalibator if really color space specific
|
||||
static public char transversion(char base) {
|
||||
switch (base) {
|
||||
case 'A':
|
||||
case 'a': return 'C';
|
||||
case 'C':
|
||||
case 'c': return 'A';
|
||||
case 'G':
|
||||
case 'g': return 'T';
|
||||
case 'T':
|
||||
case 't': return 'G';
|
||||
default: return base;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the complement (A <-> T or C <-> G) of a base, or the specified base if it can't be complemented (i.e. an ambiguous base).
|
||||
*
|
||||
|
|
@ -380,7 +360,7 @@ public class BaseUtils {
|
|||
byte[] rcbases = new byte[bases.length];
|
||||
|
||||
for (int i = 0; i < bases.length; i++) {
|
||||
rcbases[i] = (byte)simpleComplement((char) bases[bases.length - 1 - i]);
|
||||
rcbases[i] = simpleComplement(bases[bases.length - 1 - i]);
|
||||
}
|
||||
|
||||
return rcbases;
|
||||
|
|
@ -396,7 +376,7 @@ public class BaseUtils {
|
|||
byte[] rcbases = new byte[bases.length];
|
||||
|
||||
for (int i = 0; i < bases.length; i++) {
|
||||
rcbases[i] = (byte)simpleComplement((char) bases[i]);
|
||||
rcbases[i] = simpleComplement(bases[i]);
|
||||
}
|
||||
|
||||
return rcbases;
|
||||
|
|
@ -442,6 +422,7 @@ public class BaseUtils {
|
|||
* @param bases the String of bases
|
||||
* @return the reverse complement of the String
|
||||
*/
|
||||
@Deprecated
|
||||
static public String simpleReverseComplement(String bases) {
|
||||
return new String(simpleReverseComplement(bases.getBytes()));
|
||||
}
|
||||
|
|
@ -453,6 +434,7 @@ public class BaseUtils {
|
|||
* @param bases the String of bases
|
||||
* @return the complement of the String
|
||||
*/
|
||||
@Deprecated
|
||||
static public String simpleComplement(String bases) {
|
||||
return new String(simpleComplement(bases.getBytes()));
|
||||
}
|
||||
|
|
@ -468,7 +450,7 @@ public class BaseUtils {
|
|||
int[] baseCounts = new int[4];
|
||||
|
||||
for ( byte base : sequence ) {
|
||||
int baseIndex = simpleBaseToBaseIndex((char) base);
|
||||
int baseIndex = simpleBaseToBaseIndex(base);
|
||||
|
||||
if (baseIndex >= 0) {
|
||||
baseCounts[baseIndex]++;
|
||||
|
|
@ -522,6 +504,7 @@ public class BaseUtils {
|
|||
*
|
||||
* @return a random base (A, C, G, T)
|
||||
*/
|
||||
@Deprecated
|
||||
static public byte getRandomBase() {
|
||||
return getRandomBase('.');
|
||||
}
|
||||
|
|
@ -532,6 +515,7 @@ public class BaseUtils {
|
|||
* @param excludeBase the base to exclude
|
||||
* @return a random base, excluding the one specified (A, C, G, T)
|
||||
*/
|
||||
@Deprecated
|
||||
static public byte getRandomBase(char excludeBase) {
|
||||
return BaseUtils.baseIndexToSimpleBase(getRandomBaseIndex(BaseUtils.simpleBaseToBaseIndex(excludeBase)));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -236,6 +236,7 @@ public class AlignmentUtils {
|
|||
return n;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public static char[] alignmentToCharArray( final Cigar cigar, final char[] read, final char[] ref ) {
|
||||
|
||||
final char[] alignment = new char[read.length];
|
||||
|
|
@ -275,6 +276,45 @@ public class AlignmentUtils {
|
|||
return alignment;
|
||||
}
|
||||
|
||||
public static byte[] alignmentToByteArray( final Cigar cigar, final byte[] read, final byte[] ref ) {
|
||||
|
||||
final byte[] alignment = new byte[read.length];
|
||||
int refPos = 0;
|
||||
int alignPos = 0;
|
||||
|
||||
for ( int iii = 0 ; iii < cigar.numCigarElements() ; iii++ ) {
|
||||
|
||||
final CigarElement ce = cigar.getCigarElement(iii);
|
||||
final int elementLength = ce.getLength();
|
||||
|
||||
switch( ce.getOperator() ) {
|
||||
case I:
|
||||
case S:
|
||||
for ( int jjj = 0 ; jjj < elementLength; jjj++ ) {
|
||||
alignment[alignPos++] = '+';
|
||||
}
|
||||
break;
|
||||
case D:
|
||||
case N:
|
||||
refPos++;
|
||||
break;
|
||||
case M:
|
||||
for ( int jjj = 0 ; jjj < elementLength; jjj++ ) {
|
||||
alignment[alignPos] = ref[refPos];
|
||||
alignPos++;
|
||||
refPos++;
|
||||
}
|
||||
break;
|
||||
case H:
|
||||
case P:
|
||||
break;
|
||||
default:
|
||||
throw new StingException( "Unsupported cigar operator: " + ce.getOperator() );
|
||||
}
|
||||
}
|
||||
return alignment;
|
||||
}
|
||||
|
||||
/**
|
||||
* Due to (unfortunate) multiple ways to indicate that read is unmapped allowed by SAM format
|
||||
* specification, one may need this convenience shortcut. Checks both 'read unmapped' flag and
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ public class HLACallerIntegrationTest extends WalkerTest {
|
|||
public void testCalculateBaseLikelihoods() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T CalculateBaseLikelihoods -I " + validationDataLocation + "NA12878.HISEQ.HLA.bam -R " + oneKGLocation + "reference/human_b36_both.fasta -L " + intervals + " -filter " + validationDataLocation + "HLA_HISEQ.filter -maxAllowedMismatches 6 -minRequiredMatches 0 -o %s", 1,
|
||||
Arrays.asList("3272e3db32a3370b728457cd9a071339"));
|
||||
Arrays.asList("98e64882f93bf7550457bee4182caab6"));
|
||||
executeTest("test CalculateBaseLikelihoods", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue