From 8e31c01680c9ed94fa4037e9236f5cab7345bc0d Mon Sep 17 00:00:00 2001 From: rpoplin Date: Mon, 19 Jul 2010 19:10:29 +0000 Subject: [PATCH] Solid processing in base quality recalibrator now has several options for how to handle no calls in the color space. --ignore_nocall_colorspace is removed and replace by --solid_nocall_strategy. Fixed some of the @Deprecated tags in BaseUtils. LocusWalkers now filter out FailsVendorQualityCheck reads. HLA caller integration test bam file had bad vendor reads so its integration test changed. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3831 348d0f76-0448-11de-a6fe-93d51630548a --- .../FailsVendorQualityCheckReadFilter.java | 43 ++++++ .../sting/gatk/walkers/LocusWalker.java | 9 +- .../recalibration/CovariateCounterWalker.java | 24 ++- .../walkers/recalibration/DinucCovariate.java | 5 +- .../recalibration/RecalDataManager.java | 141 +++++++++++++----- .../recalibration/RecalDatumOptimized.java | 2 +- .../RecalibrationArgumentCollection.java | 15 +- .../TableRecalibrationWalker.java | 30 ++-- .../broadinstitute/sting/utils/BaseUtils.java | 90 +++++------ .../sting/utils/sam/AlignmentUtils.java | 40 +++++ .../HLAcaller/HLACallerIntegrationTest.java | 2 +- 11 files changed, 271 insertions(+), 130 deletions(-) create mode 100755 java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckReadFilter.java diff --git a/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckReadFilter.java b/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckReadFilter.java new file mode 100755 index 000000000..17a0c698f --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/filters/FailsVendorQualityCheckReadFilter.java @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.filters; + +import net.sf.picard.filter.SamRecordFilter; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.traversals.TraversalStatistics; + +/** + * Created by IntelliJ IDEA. + * User: rpoplin + * Date: Jul 19, 2010 + * + * Filter out FailsVendorQualityCheck reads. + */ + +public class FailsVendorQualityCheckReadFilter implements SamRecordFilter { + public boolean filterOut( final SAMRecord read ) { + return read.getReadFailsVendorQualityCheckFlag(); + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java index 5d9eb5258..3e64e404b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java @@ -1,13 +1,10 @@ package org.broadinstitute.sting.gatk.walkers; +import org.broadinstitute.sting.gatk.filters.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.traversals.TraversalStatistics; -import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter; -import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentReadFilter; -import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter; -import org.broadinstitute.sting.gatk.filters.InAdaptorFilter; import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState; import org.broadinstitute.sting.gatk.iterators.LocusIteratorFilter; import net.sf.picard.filter.SamRecordFilter; @@ -49,8 +46,10 @@ public abstract class LocusWalker extends Walker x = super.getMandatoryReadFilters(); - x.addAll(Arrays.asList(filter3, filter2, filter1)); + x.addAll(Arrays.asList(filter4, filter3, filter2, filter1)); // } return x; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java index 54c1a8995..060404343 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CovariateCounterWalker.java @@ -137,9 +137,6 @@ public class CovariateCounterWalker extends LocusWalker im if( RAC.FORCE_READ_GROUP != null ) { RAC.DEFAULT_READ_GROUP = RAC.FORCE_READ_GROUP; } if( RAC.FORCE_PLATFORM != null ) { RAC.DEFAULT_PLATFORM = RAC.FORCE_PLATFORM; } DBSNP_VALIDATION_CHECK_FREQUENCY *= PROCESS_EVERY_NTH_LOCUS; - if( !RAC.checkSolidRecalMode() ) { - throw new StingException( "Unrecognized --solid_recal_mode argument. Implemented options: DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS"); - } // Get a list of all available covariates final List> covariateClasses = PackageUtils.getClassesImplementingInterface( Covariate.class ); @@ -287,7 +284,7 @@ public class CovariateCounterWalker extends LocusWalker im RecalDataManager.parseSAMRecord( gatkRead, RAC ); // Skip over reads with no calls in the color space if the user requested it - if( RAC.IGNORE_NOCALL_COLORSPACE && RecalDataManager.checkNoCallColorSpace( gatkRead ) ) { + if( !(RAC.SOLID_NOCALL_STRATEGY == RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION) && RecalDataManager.checkNoCallColorSpace( gatkRead ) ) { gatkRead.setTemporaryAttribute( SKIP_RECORD_ATTRIBUTE, true); continue; } @@ -308,7 +305,8 @@ public class CovariateCounterWalker extends LocusWalker im if( BaseUtils.isRegularBase( bases[offset] ) ) { // SOLID bams have inserted the reference base into the read if the color space in inconsistent with the read base so skip it - if( !gatkRead.getReadGroup().getPlatform().toUpperCase().contains("SOLID") || RAC.SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || !RecalDataManager.isInconsistentColorSpace( gatkRead, offset ) ) { + if( !gatkRead.getReadGroup().getPlatform().toUpperCase().contains("SOLID") || RAC.SOLID_RECAL_MODE == RecalDataManager.SOLID_RECAL_MODE.DO_NOTHING || + !RecalDataManager.isInconsistentColorSpace( gatkRead, offset ) ) { // This base finally passed all the checks for a good base, so add it to the big data hashmap updateDataFromRead( gatkRead, offset, refBase ); @@ -347,16 +345,16 @@ public class CovariateCounterWalker extends LocusWalker im * * @param counts The counts to be updated * @param context The AlignmentContext which holds the reads covered by this locus - * @param ref The reference base + * @param refBase The reference base */ - private static void updateMismatchCounts(final Pair counts, final AlignmentContext context, final byte ref) { + private static void updateMismatchCounts(final Pair counts, final AlignmentContext context, final byte refBase) { for( PileupElement p : context.getBasePileup() ) { - final byte readChar = p.getBase(); - final int readCharBaseIndex = BaseUtils.simpleBaseToBaseIndex(readChar); - final int refCharBaseIndex = BaseUtils.simpleBaseToBaseIndex(ref); + final byte readBase = p.getBase(); + final int readBaseIndex = BaseUtils.simpleBaseToBaseIndex(readBase); + final int refBaseIndex = BaseUtils.simpleBaseToBaseIndex(refBase); - if( readCharBaseIndex != -1 && refCharBaseIndex != -1 ) { - if( readCharBaseIndex != refCharBaseIndex ) { + if( readBaseIndex != -1 && refBaseIndex != -1 ) { + if( readBaseIndex != refBaseIndex ) { counts.first++; } counts.second++; @@ -410,7 +408,7 @@ public class CovariateCounterWalker extends LocusWalker im final long curMismatches = datum.getNumMismatches(); // Add one to the number of observations and potentially one to the number of mismatches - datum.increment( (char)base, (char)refBase ); // Dangerous: If you don't cast to char then the bytes default to the (long, long) version of the increment method which is really bad + datum.incrementBaseCounts( base, refBase ); countedBases++; novel_counts.second++; novel_counts.first += datum.getNumMismatches() - curMismatches; // For sanity check to ensure novel mismatch rate vs dnsnp mismatch rate is reasonable diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java index ef55c2302..0de6897d0 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/DinucCovariate.java @@ -136,7 +136,8 @@ public class DinucCovariate implements StandardCovariate { // Used to get the covariate's value from input csv file in TableRecalibrationWalker public final Comparable getValue( final String str ) { - final Dinuc returnDinuc = dinucHashMap.get( Dinuc.hashBytes( (byte)str.charAt(0), (byte)str.charAt(1) ) ); + byte[] bytes = str.getBytes(); + final Dinuc returnDinuc = dinucHashMap.get( Dinuc.hashBytes( bytes[0], bytes[1] ) ); if( returnDinuc.compareTo(NO_DINUC) == 0 ) { return null; } @@ -149,7 +150,7 @@ public class DinucCovariate implements StandardCovariate { * * @param array */ - private static final void reverse(final Comparable[] array) { + private static void reverse(final Comparable[] array) { final int arrayLength = array.length; for(int l = 0, r = arrayLength - 1; l < r; l++, r--) { final Comparable temp = array[l]; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java index 66e8cc4d1..be41e248e 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java @@ -60,6 +60,19 @@ public class RecalDataManager { private static boolean warnUserNullReadGroup = false; private static boolean warnUserNullPlatform = false; + public enum SOLID_RECAL_MODE { + DO_NOTHING, + SET_Q_ZERO, + SET_Q_ZERO_BASE_N, + REMOVE_REF_BIAS + } + + public enum SOLID_NOCALL_STRATEGY { + THROW_EXCEPTION, + LEAVE_READ_UNRECALIBRATED, + PURGE_READ + } + RecalDataManager() { data = new NestedHashMap(); dataCollapsedReadGroup = null; @@ -263,9 +276,9 @@ public class RecalDataManager { if( read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null ) { // Haven't calculated the inconsistency array yet for this read final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG); if( attr != null ) { - char[] colorSpace; + byte[] colorSpace; if( attr instanceof String ) { - colorSpace = ((String)attr).toCharArray(); + colorSpace = ((String)attr).getBytes(); } else { throw new StingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName())); } @@ -277,9 +290,9 @@ public class RecalDataManager { } final byte[] inconsistency = new byte[readBases.length]; int iii; - byte prevBase = (byte) colorSpace[0]; // The sentinel + byte prevBase = colorSpace[0]; // The sentinel for( iii = 0; iii < readBases.length; iii++ ) { - final byte thisBase = (byte)getNextBaseFromColor( (char)prevBase, colorSpace[iii + 1] ); + final byte thisBase = getNextBaseFromColor( prevBase, colorSpace[iii + 1] ); inconsistency[iii] = (byte)( thisBase == readBases[iii] ? 0 : 1 ); prevBase = readBases[iii]; } @@ -298,18 +311,18 @@ public class RecalDataManager { * This method doesn't add the inconsistent tag to the read like parseColorSpace does * @param read The SAMRecord to parse * @param originalQualScores The array of original quality scores to modify during the correction - * @param SOLID_RECAL_MODE Which mode of solid recalibration to apply + * @param solidRecalMode Which mode of solid recalibration to apply * @param coinFlip A random number generator * @param refBases The reference for this read * @return A new array of quality scores that have been ref bias corrected */ - public static byte[] calcColorSpace( final SAMRecord read, byte[] originalQualScores, final String SOLID_RECAL_MODE, final Random coinFlip, final char[] refBases ) { + public static byte[] calcColorSpace( final SAMRecord read, byte[] originalQualScores, final SOLID_RECAL_MODE solidRecalMode, final Random coinFlip, final byte[] refBases ) { final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG); if( attr != null ) { - char[] colorSpace; + byte[] colorSpace; if( attr instanceof String ) { - colorSpace = ((String)attr).toCharArray(); + colorSpace = ((String)attr).getBytes(); } else { throw new StingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName())); } @@ -317,28 +330,28 @@ public class RecalDataManager { // Loop over the read and calculate first the inferred bases from the color and then check if it is consistent with the read byte[] readBases = read.getReadBases(); final byte[] colorImpliedBases = readBases.clone(); - char[] refBasesDirRead = AlignmentUtils.alignmentToCharArray( read.getCigar(), read.getReadString().toCharArray(), refBases ); //BUGBUG: This needs to change when read walkers are changed to give the aligned refBases + byte[] refBasesDirRead = AlignmentUtils.alignmentToByteArray( read.getCigar(), read.getReadBases(), refBases ); //BUGBUG: This needs to change when read walkers are changed to give the aligned refBases if( read.getReadNegativeStrandFlag() ) { readBases = BaseUtils.simpleReverseComplement( read.getReadBases() ); refBasesDirRead = BaseUtils.simpleReverseComplement( refBasesDirRead.clone() ); } final int[] inconsistency = new int[readBases.length]; - byte prevBase = (byte) colorSpace[0]; // The sentinel + byte prevBase = colorSpace[0]; // The sentinel for( int iii = 0; iii < readBases.length; iii++ ) { - final byte thisBase = (byte)getNextBaseFromColor( (char)prevBase, colorSpace[iii + 1] ); + final byte thisBase = getNextBaseFromColor( prevBase, colorSpace[iii + 1] ); colorImpliedBases[iii] = thisBase; inconsistency[iii] = ( thisBase == readBases[iii] ? 0 : 1 ); prevBase = readBases[iii]; } // Now that we have the inconsistency array apply the desired correction to the inconsistent bases - if( SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") ) { // Set inconsistent bases and the one before it to Q0 + if( solidRecalMode == SOLID_RECAL_MODE.SET_Q_ZERO ) { // Set inconsistent bases and the one before it to Q0 final boolean setBaseN = false; originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, setBaseN); - } else if( SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO_BASE_N") ) { + } else if( solidRecalMode == SOLID_RECAL_MODE.SET_Q_ZERO_BASE_N ) { final boolean setBaseN = true; originalQualScores = solidRecalSetToQZero(read, readBases, inconsistency, originalQualScores, refBasesDirRead, setBaseN); - } else if( SOLID_RECAL_MODE.equalsIgnoreCase("REMOVE_REF_BIAS") ) { // Use the color space quality to probabilistically remove ref bases at inconsistent color space bases + } else if( solidRecalMode == SOLID_RECAL_MODE.REMOVE_REF_BIAS ) { // Use the color space quality to probabilistically remove ref bases at inconsistent color space bases solidRecalRemoveRefBias(read, readBases, inconsistency, colorImpliedBases, refBasesDirRead, coinFlip); } @@ -354,15 +367,15 @@ public class RecalDataManager { if( read.getReadGroup().getPlatform().toUpperCase().contains("SOLID") ) { final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG); if( attr != null ) { - char[] colorSpace; + byte[] colorSpace; if( attr instanceof String ) { - colorSpace = ((String)attr).substring(1).toCharArray(); // trim off the Sentinel + colorSpace = ((String)attr).substring(1).getBytes(); // trim off the Sentinel } else { throw new StingException(String.format("Value encoded by %s in %s isn't a string!", RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG, read.getReadName())); } - for( char color : colorSpace ) { - if( color != '0' && color != '1' && color != '2' && color != '3' ) { + for( byte color : colorSpace ) { + if( color != (byte)'0' && color != (byte)'1' && color != (byte)'2' && color != (byte)'3' ) { return true; // There is a bad color in this SOLiD read and the user wants to skip over it } } @@ -387,18 +400,18 @@ public class RecalDataManager { * @return The byte array of original quality scores some of which might have been set to zero */ private static byte[] solidRecalSetToQZero( final SAMRecord read, byte[] readBases, final int[] inconsistency, final byte[] originalQualScores, - final char[] refBases, final boolean setBaseN ) { + final byte[] refBases, final boolean setBaseN ) { final boolean negStrand = read.getReadNegativeStrandFlag(); for( int iii = 1; iii < originalQualScores.length; iii++ ) { if( inconsistency[iii] == 1 ) { - if( (char)readBases[iii] == refBases[iii] ) { + if( readBases[iii] == refBases[iii] ) { if( negStrand ) { originalQualScores[originalQualScores.length-(iii+1)] = (byte)0; } else { originalQualScores[iii] = (byte)0; } if( setBaseN ) { readBases[iii] = (byte)'N'; } } // Set the prev base to Q0 as well - if( (char)readBases[iii-1] == refBases[iii-1] ) { + if( readBases[iii-1] == refBases[iii-1] ) { if( negStrand ) { originalQualScores[originalQualScores.length-iii] = (byte)0; } else { originalQualScores[iii-1] = (byte)0; } if( setBaseN ) { readBases[iii-1] = (byte)'N'; } @@ -423,7 +436,7 @@ public class RecalDataManager { * @param coinFlip A random number generator */ private static void solidRecalRemoveRefBias( final SAMRecord read, byte[] readBases, final int[] inconsistency, final byte[] colorImpliedBases, - final char[] refBases, final Random coinFlip) { + final byte[] refBases, final Random coinFlip) { final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_QUAL_ATTRIBUTE_TAG); if( attr != null ) { @@ -440,7 +453,7 @@ public class RecalDataManager { if( inconsistency[iii] == 1 ) { for( int jjj = iii - 1; jjj <= iii; jjj++ ) { // Correct this base and the one before it along the direction of the read if( jjj == iii || inconsistency[jjj] == 0 ) { // Don't want to correct the previous base a second time if it was already corrected in the previous step - if( (char)readBases[jjj] == refBases[jjj] ) { + if( readBases[jjj] == refBases[jjj] ) { if( colorSpaceQuals[jjj] == colorSpaceQuals[jjj+1] ) { // Equal evidence for the color implied base and the reference base, so flip a coin final int rand = coinFlip.nextInt( 2 ); if( rand == 0 ) { // The color implied base won the coin flip @@ -488,18 +501,18 @@ public class RecalDataManager { * @param color The color * @return The next base in the sequence */ - private static char getNextBaseFromColor( final char prevBase, final char color ) { + private static byte getNextBaseFromColor( final byte prevBase, final byte color ) { switch(color) { - case '0': // same base + case '0': return prevBase; - case '1': // transversion - return BaseUtils.transversion( prevBase ); - case '2': // transition - return BaseUtils.transition( prevBase ); - case '3': // simple complement - return BaseUtils.simpleComplement( prevBase ); + case '1': + return performColorOne( prevBase ); + case '2': + return performColorTwo( prevBase ); + case '3': + return performColorThree( prevBase ); default: - throw new StingException( "Unrecognized color space in SOLID read, color = " + color + + throw new StingException( "Unrecognized color space in SOLID read, color = " + (char)color + " Unfortunately this bam file can not be recalibrated without full color space information because of potential reference bias."); } } @@ -516,9 +529,9 @@ public class RecalDataManager { final byte[] inconsistency = (byte[])attr; // NOTE: The inconsistency array is in the direction of the read, not aligned to the reference! if( read.getReadNegativeStrandFlag() ) { // Negative direction - return inconsistency[inconsistency.length - offset - 1] != 0; + return inconsistency[inconsistency.length - offset - 1] != (byte)0; } else { // Forward direction - return inconsistency[offset] != 0; + return inconsistency[offset] != (byte)0; } // This block of code is for if you want to check both the offset and the next base for color space inconsistency @@ -572,4 +585,64 @@ public class RecalDataManager { return covariateValues_offset_x_covar; } + + /** + * Perform a ceratin transversion (A <-> C or G <-> T) on the base. + * + * @param base the base [AaCcGgTt] + * @return the transversion of the base, or the input base if it's not one of the understood ones + */ + private static byte performColorOne(byte base) { + switch (base) { + case 'A': + case 'a': return 'C'; + case 'C': + case 'c': return 'A'; + case 'G': + case 'g': return 'T'; + case 'T': + case 't': return 'G'; + default: return base; + } + } + + /** + * Perform a transition (A <-> G or C <-> T) on the base. + * + * @param base the base [AaCcGgTt] + * @return the transition of the base, or the input base if it's not one of the understood ones + */ + private static byte performColorTwo(byte base) { + switch (base) { + case 'A': + case 'a': return 'G'; + case 'C': + case 'c': return 'T'; + case 'G': + case 'g': return 'A'; + case 'T': + case 't': return 'C'; + default: return base; + } + } + + /** + * Return the complement (A <-> T or C <-> G) of a base. + * + * @param base the base [AaCcGgTt] + * @return the complementary base, or the input base if it's not one of the understood ones + */ + private static byte performColorThree(byte base) { + switch (base) { + case 'A': + case 'a': return 'T'; + case 'C': + case 'c': return 'G'; + case 'G': + case 'g': return 'C'; + case 'T': + case 't': return 'A'; + default: return base; + } + } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDatumOptimized.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDatumOptimized.java index c4ad3869a..7ba441ccc 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDatumOptimized.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDatumOptimized.java @@ -86,7 +86,7 @@ public class RecalDatumOptimized { } } - public synchronized final void increment( final char curBase, final char refBase ) { + public synchronized final void incrementBaseCounts( final byte curBase, final byte refBase ) { increment( 1, BaseUtils.simpleBaseToBaseIndex(curBase) == BaseUtils.simpleBaseToBaseIndex(refBase) ? 0 : 1 ); // increment takes num observations, then num mismatches } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java index 62d8543a6..0e7f7d111 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationArgumentCollection.java @@ -49,21 +49,14 @@ public class RecalibrationArgumentCollection { public String FORCE_READ_GROUP = null; @Argument(fullName="force_platform", shortName="fP", required=false, doc="If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.") public String FORCE_PLATFORM = null; - @Argument(fullName="solid_recal_mode", shortName="sMode", required = false, doc="How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS") - public String SOLID_RECAL_MODE = "SET_Q_ZERO"; @Argument(fullName = "window_size_nqs", shortName="nqs", doc="The window size used by MinimumNQSCovariate for its calculation", required=false) public int WINDOW_SIZE = 5; @Argument(fullName = "homopolymer_nback", shortName="nback", doc="The number of previous bases to look at in HomopolymerCovariate", required=false) public int HOMOPOLYMER_NBACK = 7; @Argument(fullName = "exception_if_no_tile", shortName="throwTileException", doc="If provided, TileCovariate will throw an exception when no tile can be found. The default behavior is to use tile = -1", required=false) public boolean EXCEPTION_IF_NO_TILE = false; - @Argument(fullName = "ignore_nocall_colorspace", shortName="ignore_nocall_colorspace", doc="If provided, the recalibrator will skip over reads with no calls in the color space instead of halting with an exception", required=false) - public boolean IGNORE_NOCALL_COLORSPACE = false; - - - public final boolean checkSolidRecalMode() { - return ( SOLID_RECAL_MODE.equalsIgnoreCase("DO_NOTHING") || SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO") || - SOLID_RECAL_MODE.equalsIgnoreCase("SET_Q_ZERO_BASE_N") || SOLID_RECAL_MODE.equalsIgnoreCase("REMOVE_REF_BIAS") ); - } - + @Argument(fullName="solid_recal_mode", shortName="sMode", required = false, doc="How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS") + public RecalDataManager.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalDataManager.SOLID_RECAL_MODE.SET_Q_ZERO; + @Argument(fullName = "solid_nocall_strategy", shortName="solid_nocall_strategy", doc="Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ", required=false) + public RecalDataManager.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalDataManager.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java index 4a8c85415..b28085d5b 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/TableRecalibrationWalker.java @@ -87,7 +87,7 @@ public class TableRecalibrationWalker extends ReadWalker> classes = PackageUtils.getClassesImplementingInterface(Covariate.class); @@ -347,15 +344,20 @@ public class TableRecalibrationWalker extends ReadWalker G) or @@ -157,6 +157,32 @@ public class BaseUtils { return bases; } + /** + * Converts a simple base to a base index + * + * @param base [AaCcGgTt] + * @return 0, 1, 2, 3, or -1 if the base can't be understood + */ + static public int simpleBaseToBaseIndex(byte base) { + switch (base) { + case '*': // the wildcard character counts as an A + case 'A': + case 'a': return 0; + + case 'C': + case 'c': return 1; + + case 'G': + case 'g': return 2; + + case 'T': + case 't': return 3; + + default: return -1; + } + } + + /** * Converts a simple base to a base index * @@ -194,10 +220,6 @@ public class BaseUtils { } } - static public int simpleBaseToBaseIndex(byte base) { - return simpleBaseToBaseIndex((char)base); - } - @Deprecated static public boolean isRegularBase(char base) { return simpleBaseToBaseIndex(base) != -1; @@ -303,48 +325,6 @@ public class BaseUtils { return base2; } - /** - * Perform a transition (A <-> G or C <-> T) on the base, or the specified base if it can't be done (i.e. an ambiguous base). - * - * @param base the base [AaCcGgTt] - * @return the transition of the base, or the input base if it's not one of the understood ones - */ - // todo -- are these right? Put into recalibator if really color space specific - static public char transition(char base) { - switch (base) { - case 'A': - case 'a': return 'G'; - case 'C': - case 'c': return 'T'; - case 'G': - case 'g': return 'A'; - case 'T': - case 't': return 'C'; - default: return base; - } - } - - /** - * Perform a transversion (A <-> C or G <-> T) on the base, or the specified base if it can't be done (i.e. an ambiguous base). - * - * @param base the base [AaCcGgTt] - * @return the transversion of the base, or the input base if it's not one of the understood ones - */ - // todo -- are these right? Put into recalibator if really color space specific - static public char transversion(char base) { - switch (base) { - case 'A': - case 'a': return 'C'; - case 'C': - case 'c': return 'A'; - case 'G': - case 'g': return 'T'; - case 'T': - case 't': return 'G'; - default: return base; - } - } - /** * Return the complement (A <-> T or C <-> G) of a base, or the specified base if it can't be complemented (i.e. an ambiguous base). * @@ -380,7 +360,7 @@ public class BaseUtils { byte[] rcbases = new byte[bases.length]; for (int i = 0; i < bases.length; i++) { - rcbases[i] = (byte)simpleComplement((char) bases[bases.length - 1 - i]); + rcbases[i] = simpleComplement(bases[bases.length - 1 - i]); } return rcbases; @@ -396,7 +376,7 @@ public class BaseUtils { byte[] rcbases = new byte[bases.length]; for (int i = 0; i < bases.length; i++) { - rcbases[i] = (byte)simpleComplement((char) bases[i]); + rcbases[i] = simpleComplement(bases[i]); } return rcbases; @@ -442,6 +422,7 @@ public class BaseUtils { * @param bases the String of bases * @return the reverse complement of the String */ + @Deprecated static public String simpleReverseComplement(String bases) { return new String(simpleReverseComplement(bases.getBytes())); } @@ -453,6 +434,7 @@ public class BaseUtils { * @param bases the String of bases * @return the complement of the String */ + @Deprecated static public String simpleComplement(String bases) { return new String(simpleComplement(bases.getBytes())); } @@ -468,7 +450,7 @@ public class BaseUtils { int[] baseCounts = new int[4]; for ( byte base : sequence ) { - int baseIndex = simpleBaseToBaseIndex((char) base); + int baseIndex = simpleBaseToBaseIndex(base); if (baseIndex >= 0) { baseCounts[baseIndex]++; @@ -522,6 +504,7 @@ public class BaseUtils { * * @return a random base (A, C, G, T) */ + @Deprecated static public byte getRandomBase() { return getRandomBase('.'); } @@ -532,6 +515,7 @@ public class BaseUtils { * @param excludeBase the base to exclude * @return a random base, excluding the one specified (A, C, G, T) */ + @Deprecated static public byte getRandomBase(char excludeBase) { return BaseUtils.baseIndexToSimpleBase(getRandomBaseIndex(BaseUtils.simpleBaseToBaseIndex(excludeBase))); } diff --git a/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java b/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java index 08f0bf5af..50c1a68f2 100644 --- a/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java +++ b/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java @@ -236,6 +236,7 @@ public class AlignmentUtils { return n; } + @Deprecated public static char[] alignmentToCharArray( final Cigar cigar, final char[] read, final char[] ref ) { final char[] alignment = new char[read.length]; @@ -275,6 +276,45 @@ public class AlignmentUtils { return alignment; } + public static byte[] alignmentToByteArray( final Cigar cigar, final byte[] read, final byte[] ref ) { + + final byte[] alignment = new byte[read.length]; + int refPos = 0; + int alignPos = 0; + + for ( int iii = 0 ; iii < cigar.numCigarElements() ; iii++ ) { + + final CigarElement ce = cigar.getCigarElement(iii); + final int elementLength = ce.getLength(); + + switch( ce.getOperator() ) { + case I: + case S: + for ( int jjj = 0 ; jjj < elementLength; jjj++ ) { + alignment[alignPos++] = '+'; + } + break; + case D: + case N: + refPos++; + break; + case M: + for ( int jjj = 0 ; jjj < elementLength; jjj++ ) { + alignment[alignPos] = ref[refPos]; + alignPos++; + refPos++; + } + break; + case H: + case P: + break; + default: + throw new StingException( "Unsupported cigar operator: " + ce.getOperator() ); + } + } + return alignment; + } + /** * Due to (unfortunate) multiple ways to indicate that read is unmapped allowed by SAM format * specification, one may need this convenience shortcut. Checks both 'read unmapped' flag and diff --git a/java/test/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/HLACallerIntegrationTest.java b/java/test/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/HLACallerIntegrationTest.java index f71c1a841..e9f337fde 100755 --- a/java/test/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/HLACallerIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/playground/gatk/walkers/HLAcaller/HLACallerIntegrationTest.java @@ -47,7 +47,7 @@ public class HLACallerIntegrationTest extends WalkerTest { public void testCalculateBaseLikelihoods() { WalkerTestSpec spec = new WalkerTestSpec( "-T CalculateBaseLikelihoods -I " + validationDataLocation + "NA12878.HISEQ.HLA.bam -R " + oneKGLocation + "reference/human_b36_both.fasta -L " + intervals + " -filter " + validationDataLocation + "HLA_HISEQ.filter -maxAllowedMismatches 6 -minRequiredMatches 0 -o %s", 1, - Arrays.asList("3272e3db32a3370b728457cd9a071339")); + Arrays.asList("98e64882f93bf7550457bee4182caab6")); executeTest("test CalculateBaseLikelihoods", spec); }