Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

2012-06-11 10:38:50 -04:00 · 2012-06-11 10:38:50 -04:00 · e4d371dc80
parent 683d4b508e 4aad7e23ef
commit e4d371dc80
10 changed files with 220 additions and 99 deletions
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRKeyManager.java
@ -25,8 +25,11 @@ import java.util.*;
 * @since 3/6/12
 */
 public class BQSRKeyManager {
-    private final List<RequiredCovariateInfo> requiredCovariates;
-    private final List<OptionalCovariateInfo> optionalCovariates;
+
+    private final List<Covariate> requiredCovariates;
+    private final List<Covariate> optionalCovariates;
+    private final List<RequiredCovariateInfo> requiredCovariatesInfo;
+    private final List<OptionalCovariateInfo> optionalCovariatesInfo;
    private final Map<String, Short> covariateNameToIDMap;

    private int nRequiredBits;                                                                                          // Number of bits used to represent the required covariates
@ -44,15 +47,17 @@ public class BQSRKeyManager {
     * @param optionalCovariates the ordered list of optional covariates
     */
    public BQSRKeyManager(List<Covariate> requiredCovariates, List<Covariate> optionalCovariates) {
-        this.requiredCovariates = new ArrayList<RequiredCovariateInfo>(requiredCovariates.size());                      // initialize the required covariates list
-        this.optionalCovariates = new ArrayList<OptionalCovariateInfo>(optionalCovariates.size());                      // initialize the optional covariates list (size may be 0, it's okay)
-        this.covariateNameToIDMap = new HashMap<String, Short>(optionalCovariates.size()*2);                            // the map from covariate name to covariate id (when reading GATK Reports, we get the IDs as names of covariates)
+        this.requiredCovariates = new ArrayList<Covariate>(requiredCovariates);
+        this.optionalCovariates = new ArrayList<Covariate>(optionalCovariates);
+        requiredCovariatesInfo = new ArrayList<RequiredCovariateInfo>(requiredCovariates.size());                       // initialize the required covariates list
+        optionalCovariatesInfo = new ArrayList<OptionalCovariateInfo>(optionalCovariates.size());                       // initialize the optional covariates list (size may be 0, it's okay)
+        covariateNameToIDMap = new HashMap<String, Short>(optionalCovariates.size()*2);                                 // the map from covariate name to covariate id (when reading GATK Reports, we get the IDs as names of covariates)
        
        nRequiredBits = 0;
        for (Covariate required : requiredCovariates) {                                                                 // create a list of required covariates with the extra information for key management
            int nBits = required.numberOfBits();                                                                        // number of bits used by this covariate
            BitSet mask = genericMask(nRequiredBits, nBits);                                                            // create a mask for this covariate
-            this.requiredCovariates.add(new RequiredCovariateInfo(nRequiredBits, mask, required));                      // Create an object for this required covariate
+            requiredCovariatesInfo.add(new RequiredCovariateInfo(nRequiredBits, mask, required));                       // Create an object for this required covariate
            nRequiredBits += nBits;
        }

@ -61,10 +66,10 @@ public class BQSRKeyManager {
        for (Covariate optional : optionalCovariates) {
            int nBits = optional.numberOfBits();                                                                        // number of bits used by this covariate
            nOptionalBits = Math.max(nOptionalBits, nBits);                                                             // optional covariates are represented by the number of bits needed by biggest covariate
-            BitSet optionalID = BitSetUtils.bitSetFrom(id);                                                             // calculate the optional covariate ID for this covariate
-            this.optionalCovariates.add(new OptionalCovariateInfo(optionalID, optional));                               // optional covariates have standardized mask and number of bits, so no need to store in the RequiredCovariateInfo object
+            BitSet optionalID = bitSetFromId(id);                                                                       // calculate the optional covariate ID for this covariate
+            optionalCovariatesInfo.add(new OptionalCovariateInfo(optionalID, optional));                                // optional covariates have standardized mask and number of bits, so no need to store in the RequiredCovariateInfo object
            String covariateName = optional.getClass().getSimpleName().split("Covariate")[0];                           // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport
-            this.covariateNameToIDMap.put(covariateName, id);
+            covariateNameToIDMap.put(covariateName, id);
            id++;
        }

@ -93,17 +98,17 @@ public class BQSRKeyManager {
     * @return one key in bitset representation per covariate
     */
    public List<BitSet> bitSetsFromAllKeys(BitSet[] allKeys, EventType eventType) {
-        List<BitSet> allBitSets = new LinkedList<BitSet>();                                                             // Generate one key per optional covariate
+        List<BitSet> allBitSets = new ArrayList<BitSet>();                                                              // Generate one key per optional covariate

-        BitSet eventBitSet = BitSetUtils.bitSetFrom(eventType.index);                                                   // create a bitset with the event type
+        BitSet eventBitSet = bitSetFromEvent(eventType);                                                                // create a bitset with the event type
        int eventTypeBitIndex = nRequiredBits + nOptionalBits + nOptionalIDBits;                                        // Location in the bit set to add the event type bits

        int covariateIndex = 0;
        BitSet requiredKey = new BitSet(nRequiredBits);                                                                 // This will be a bitset holding all the required keys, to replicate later on
-        for (RequiredCovariateInfo infoRequired : requiredCovariates)
+        for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo)
            addBitSetToKeyAtLocation(requiredKey, allKeys[covariateIndex++], infoRequired.bitsBefore);                  // Add all the required covariates to the key set

-        for (OptionalCovariateInfo infoOptional : optionalCovariates) {
+        for (OptionalCovariateInfo infoOptional : optionalCovariatesInfo) {
            BitSet covariateKey = allKeys[covariateIndex++];                                                            // get the bitset from all keys
            if (covariateKey == null)
                continue;                                                                                               // do not add nulls to the final set of keys.
@ -116,7 +121,7 @@ public class BQSRKeyManager {
            allBitSets.add(optionalKey);                                                                                // add this key to the list of keys
        }

-        if (optionalCovariates.size() == 0) {                                                                           // special case when we have no optional covariates, add the event type to the required key (our only key)
+        if (optionalCovariatesInfo.size() == 0) {                                                                       // special case when we have no optional covariates, add the event type to the required key (our only key)
            addBitSetToKeyAtLocation(requiredKey, eventBitSet, eventTypeBitIndex);                                      // Add the event type
            allBitSets.add(requiredKey);                                                                                // add this key to the list of keys
        }
@ -140,16 +145,16 @@ public class BQSRKeyManager {
        BitSet bitSetKey = new BitSet(totalNumberOfBits);
        
        int requiredCovariate = 0;
-        for (RequiredCovariateInfo infoRequired : requiredCovariates) {             
+        for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo) {
            BitSet covariateBitSet = infoRequired.covariate.bitSetFromKey(key[requiredCovariate++]);                    // create a bitset from the object key provided using the required covariate's interface
            addBitSetToKeyAtLocation(bitSetKey, covariateBitSet, infoRequired.bitsBefore);                              // add it to the bitset key
        }
        
-        if (optionalCovariates.size() > 0) {
-            int optionalCovariate = requiredCovariates.size();                                                          // the optional covariate index in the key array
+        if (optionalCovariatesInfo.size() > 0) {
+            int optionalCovariate = requiredCovariatesInfo.size();                                                      // the optional covariate index in the key array
            int covariateIDIndex = optionalCovariate + 1;                                                               // the optional covariate ID index is right after the optional covariate's
            int covariateID = parseCovariateID(key[covariateIDIndex]);                                                  // when reading the GATK Report the ID may come in a String instead of an index
-            OptionalCovariateInfo infoOptional = optionalCovariates.get(covariateID);                                   // so we can get the optional covariate information
+            OptionalCovariateInfo infoOptional = optionalCovariatesInfo.get(covariateID);                               // so we can get the optional covariate information
            
            BitSet covariateBitSet = infoOptional.covariate.bitSetFromKey(key[optionalCovariate]);                      // convert the optional covariate key into a bitset using the covariate's interface
            addBitSetToKeyAtLocation(bitSetKey, covariateBitSet, nRequiredBits);                                        // add the optional covariate right after the required covariates
@ -185,16 +190,16 @@ public class BQSRKeyManager {
     */
    public List<Object> keySetFrom(BitSet key) {
        List<Object> objectKeys = new ArrayList<Object>();
-        for (RequiredCovariateInfo info : requiredCovariates) {
+        for (RequiredCovariateInfo info : requiredCovariatesInfo) {
            BitSet covariateBitSet = extractBitSetFromKey(key, info.mask, info.bitsBefore);                             // get the covariate's bitset
            objectKeys.add(info.covariate.keyFromBitSet(covariateBitSet));                                              // convert the bitset to object using covariate's interface
        }

-        if (optionalCovariates.size() > 0) {
+        if (optionalCovariatesInfo.size() > 0) {
            BitSet covBitSet = extractBitSetFromKey(key, optionalCovariateMask, nRequiredBits);                         // mask out the covariate bit set
            BitSet idbs = extractBitSetFromKey(key, optionalCovariateIDMask, nRequiredBits + nOptionalBits);            // mask out the covariate order (to identify which covariate this is)
            short id = BitSetUtils.shortFrom(idbs);                                                                     // covert the id bitset into a short
-            Covariate covariate = optionalCovariates.get(id).covariate;                                                 // get the corresponding optional covariate object
+            Covariate covariate = optionalCovariatesInfo.get(id).covariate;                                                 // get the corresponding optional covariate object
            objectKeys.add(covariate.keyFromBitSet(covBitSet));                                                         // add the optional covariate to the key set
            objectKeys.add(covariate.getClass().getSimpleName().split("Covariate")[0]);                                 // add the covariate name using the id
        }
@ -204,17 +209,11 @@ public class BQSRKeyManager {
    }

    public List<Covariate> getRequiredCovariates() {
-        ArrayList<Covariate> list = new ArrayList<Covariate>(requiredCovariates.size());
-        for (RequiredCovariateInfo info : requiredCovariates) 
-            list.add(info.covariate);
-        return list;
+        return requiredCovariates;
    }

    public List<Covariate> getOptionalCovariates() {
-        ArrayList<Covariate> list = new ArrayList<Covariate>(optionalCovariates.size());
-        for (OptionalCovariateInfo info : optionalCovariates)
-            list.add(info.covariate);
-        return list;
+        return optionalCovariates;
    }

    /**
@ -258,9 +257,20 @@ public class BQSRKeyManager {
            eventKey.set(i - firstBitIndex);
        return EventType.eventFrom(BitSetUtils.shortFrom(eventKey));
    }
-    
-    private BitSet bitSetFromEvent(EventType eventType) {
-        return BitSetUtils.bitSetFrom(eventType.index);
+
+    // cache the BitSet representing an event since it's otherwise created a massive amount of times
+    private static final Map<EventType, BitSet> eventTypeCache = new HashMap<EventType, BitSet>(EventType.values().length);
+    static {
+        for (final EventType eventType : EventType.values())
+            eventTypeCache.put(eventType, BitSetUtils.bitSetFrom(eventType.index));
+    }
+
+    private BitSet bitSetFromEvent(final EventType eventType) {
+        return eventTypeCache.get(eventType);
+    }
+
+    private BitSet bitSetFromId(final short id) {
+        return BitSetUtils.bitSetFrom(id);
    }

    private int bitsInEventType() {
@ -287,24 +297,24 @@ public class BQSRKeyManager {
        if (this == other)
            return true;

-        if (requiredCovariates.size() != other.requiredCovariates.size() || optionalCovariates.size() != other.optionalCovariates.size())
+        if (requiredCovariatesInfo.size() != other.requiredCovariatesInfo.size() ||
+                optionalCovariatesInfo.size() != other.optionalCovariatesInfo.size())
            return false;

-        Iterator<RequiredCovariateInfo> otherRequiredIterator = other.requiredCovariates.iterator();
-        for (RequiredCovariateInfo thisInfo: requiredCovariates) {
-            RequiredCovariateInfo otherInfo = otherRequiredIterator.next();
-
-            String thisName = thisInfo.covariate.getClass().getSimpleName();
-            String otherName = otherInfo.covariate.getClass().getSimpleName();
+        for (int i = 0; i < requiredCovariates.size(); i++) {
+            Covariate myRequiredCovariate = requiredCovariates.get(i);
+            Covariate otherRequiredCovariate = other.requiredCovariates.get(i);
+            String thisName = myRequiredCovariate.getClass().getSimpleName();
+            String otherName = otherRequiredCovariate.getClass().getSimpleName();
            if (!thisName.equals(otherName))
                return false;
        }

-        Iterator<OptionalCovariateInfo> otherOptionalIterator = other.optionalCovariates.iterator();
-        for (OptionalCovariateInfo thisInfo : optionalCovariates) {
-            OptionalCovariateInfo otherInfo = otherOptionalIterator.next();
-            String thisName = thisInfo.covariate.getClass().getSimpleName();
-            String otherName = otherInfo.covariate.getClass().getSimpleName();
+        for (int i = 0; i < optionalCovariates.size(); i++) {
+            Covariate myOptionalCovariate = optionalCovariates.get(i);
+            Covariate otherOptionalCovariate = other.optionalCovariates.get(i);
+            String thisName = myOptionalCovariate.getClass().getSimpleName();
+            String otherName = otherOptionalCovariate.getClass().getSimpleName();
            if (!thisName.equals(otherName))
                return false;
        }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java
@ -126,8 +126,8 @@ public class ContextCovariate implements StandardCovariate {
    private BitSet contextWith(byte[] bases, int offset, int contextSize) {
        BitSet result = null;
        if (offset - contextSize + 1 >= 0) {
-            String context = new String(Arrays.copyOfRange(bases, offset - contextSize + 1, offset + 1));
-            if (!context.contains("N"))
+            final byte[] context = Arrays.copyOfRange(bases, offset - contextSize + 1, offset + 1);
+            if (!BaseUtils.containsBase(context, BaseUtils.N))
                result = BitSetUtils.bitSetFrom(context);
        }
        return result;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
@ -139,7 +139,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
    // Outputs
    /////////////////////////////
    @Output(fullName="recal_file", shortName="recalFile", doc="The output recal file used by ApplyRecalibration", required=true)
-    protected File recalFile = null;
    protected VariantContextWriter recalWriter = null;

    @Output(fullName="tranches_file", shortName="tranchesFile", doc="The output tranches file used by ApplyRecalibration", required=true)
@ -230,9 +229,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
            throw new UserException.CommandLineException( "No truth set found! Please provide sets of known polymorphic loci marked with the truth=true ROD binding tag. For example, -B:hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf" );
        }

-        final VCFHeader vcfHeader = new VCFHeader();
-        recalWriter = VariantContextWriterFactory.create(recalFile, getMasterSequenceDictionary());
-        recalWriter.writeHeader(vcfHeader);
+        recalWriter.writeHeader( new VCFHeader() );
    }

    //---------------------------------------------------------------------------------------------------------------
--- a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java
@ -101,6 +101,17 @@ public class BaseUtils {
        return extendedBaseToBaseIndex(base1) == extendedBaseToBaseIndex(base2);
    }

+    /**
+     * @return true iff the bases array contains at least one instance of base
+     */
+    static public boolean containsBase(final byte[] bases, final byte base) {
+        for ( final byte b : bases ) {
+            if ( b == base )
+                return true;
+        }
+        return false;
+    }
+
    /**
     * Converts a IUPAC nucleotide code to a pair of bases
     *
--- a/public/java/src/org/broadinstitute/sting/utils/BitSetUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/BitSetUtils.java
@ -5,6 +5,8 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import java.io.ByteArrayOutputStream;
 import java.io.ObjectOutputStream;
 import java.util.BitSet;
+import java.util.HashMap;
+import java.util.Map;

 /**
 * Utilities for bitset conversion
@ -71,8 +73,15 @@ public class BitSetUtils {
     * @return a bitset representation of the short
     */
    public static BitSet bitSetFrom(short number) {
-        return bitSetFrom(number, NBITS_SHORT_REPRESENTATION);
+        BitSet result = shortCache.get(number);
+        if (result == null) {
+            result = bitSetFrom(number, NBITS_SHORT_REPRESENTATION);
+            shortCache.put(number, result);
+        }
+        return result;
    }
+    // use a static cache for shorts (but not for longs, because there could be a lot of entries)
+    private static final Map<Short, BitSet> shortCache = new HashMap<Short, BitSet>(2 * Short.MAX_VALUE);

    /**
     * Creates a BitSet representation of an arbitrary integer (number of bits capped at 64 -- long precision)
@ -82,7 +91,7 @@ public class BitSetUtils {
     * @return a bitset representation of the integer
     */
    public static BitSet bitSetFrom(long number, int nBits) {
-        BitSet bitSet = new BitSet();
+        BitSet bitSet = new BitSet(nBits);
        boolean isNegative = number < 0;
        int bitIndex = 0;
        while (number != 0) {
@ -130,32 +139,32 @@ public class BitSetUtils {
        if (number < 0)
            throw new ReviewedStingException("dna conversion cannot handle negative numbers. Possible overflow?");

-        int length = contextLengthFor(number);  // the length of the context (the number of combinations is memoized, so costs zero to separate this into two method calls)
-        number -= combinationsFor(length - 1);  // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation
+        final int length = contextLengthFor(number);  // the length of the context (the number of combinations is memoized, so costs zero to separate this into two method calls)
+        number -= combinationsFor(length - 1);        // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation

-        String dna = "";
+        StringBuilder dna = new StringBuilder();
        while (number > 0) {                    // perform a simple base_10 to base_4 conversion (quasi-canonical)
            byte base = (byte) (number % 4);
            switch (base) {
                case 0:
-                    dna = "A" + dna;
+                    dna.append('A');
                    break;
                case 1:
-                    dna = "C" + dna;
+                    dna.append('C');
                    break;
                case 2:
-                    dna = "G" + dna;
+                    dna.append('G');
                    break;
                case 3:
-                    dna = "T" + dna;
+                    dna.append('T');
                    break;
            }
            number /= 4;
        }
        for (int j = dna.length(); j < length; j++)
-            dna = "A" + dna;                    // add leading A's as necessary (due to the "quasi" canonical status, see description above)
+            dna.append('A');                          // add leading A's as necessary (due to the "quasi" canonical status, see description above)

-        return dna;
+        return dna.reverse().toString();              // make sure to reverse the string since we should have been pre-pending all along
    }

    /**
@ -178,27 +187,18 @@ public class BitSetUtils {
     * @return the bitset representing the dna sequence
     */
    public static BitSet bitSetFrom(String dna) {
-        if (dna.length() > MAX_DNA_CONTEXT)
-            throw new ReviewedStingException(String.format("DNA Length cannot be bigger than %d. dna: %s (%d)", MAX_DNA_CONTEXT, dna, dna.length()));
+        return bitSetFrom(dna.getBytes());
+    }

-        long baseTen = 0;                                       // the number in base_10 that we are going to use to generate the bit set
-        long preContext = combinationsFor(dna.length() - 1);    // the sum of all combinations that preceded the length of the dna string
-        for (int i = 0; i < dna.length(); i++) {
+    public static BitSet bitSetFrom(final byte[] dna) {
+        if (dna.length > MAX_DNA_CONTEXT)
+            throw new ReviewedStingException(String.format("DNA Length cannot be bigger than %d. dna: %s (%d)", MAX_DNA_CONTEXT, dna, dna.length));
+
+        final long preContext = combinationsFor(dna.length - 1);      // the sum of all combinations that preceded the length of the dna string
+        long baseTen = 0;                                             // the number in base_10 that we are going to use to generate the bit set
+        for (final byte base : dna) {
            baseTen *= 4;
-            switch (dna.charAt(i)) {
-                case 'A':
-                    baseTen += 0;
-                    break;
-                case 'C':
-                    baseTen += 1;
-                    break;
-                case 'G':
-                    baseTen += 2;
-                    break;
-                case 'T':
-                    baseTen += 3;
-                    break;
-            }
+            baseTen += BaseUtils.simpleBaseToBaseIndex(base);
        }
        return bitSetFrom(baseTen + preContext);                // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length.
    }
--- a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java
+++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java
@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils.clipping;
 import com.google.java.contract.Requires;
 import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
+import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -135,10 +136,9 @@ public class ReadClipper {
                ops.clear();
                if ( clippedRead.isEmpty() )
                    return GATKSAMRecord.emptyRead(clippedRead);
-//                    return new GATKSAMRecord( clippedRead.getHeader() );
                return clippedRead;
            } catch (CloneNotSupportedException e) {
-                throw new RuntimeException(e); // this should never happen
+                throw new RuntimeException(e);                                                                          // this should never happen
            }
        }
    }
@ -188,7 +188,6 @@ public class ReadClipper {
    private GATKSAMRecord hardClipByReadCoordinates(int start, int stop) {
        if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1))
            return GATKSAMRecord.emptyRead(read);
-//            return new GATKSAMRecord(read.getHeader());

        this.addOp(new ClippingOp(start, stop));
        return clipRead(ClippingRepresentation.HARDCLIP_BASES);
@ -213,14 +212,12 @@ public class ReadClipper {
    private GATKSAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) {
        if (read.isEmpty() || left == right)
            return GATKSAMRecord.emptyRead(read);
-//            return new GATKSAMRecord(read.getHeader());
        GATKSAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1);

        // after clipping one tail, it is possible that the consequent hard clipping of adjacent deletions
        // make the left cut index no longer part of the read. In that case, clip the read entirely.
        if (left > leftTailRead.getAlignmentEnd())
            return GATKSAMRecord.emptyRead(read);
-//            return new GATKSAMRecord(read.getHeader());

        ReadClipper clipper = new ReadClipper(leftTailRead);
        return clipper.hardClipByReferenceCoordinatesLeftTail(left);
@ -402,17 +399,77 @@ public class ReadClipper {

    /**
     * Turns soft clipped bases into matches
-     *
     * @return a new read with every soft clip turned into a match
     */
    private GATKSAMRecord revertSoftClippedBases() {
-        this.addOp(new ClippingOp(0, 0));     // UNSOFTCLIP_BASES doesn't need coordinates
+        if (read.isEmpty())
+            return read;
+
+        this.addOp(new ClippingOp(0, 0));
        return this.clipRead(ClippingRepresentation.REVERT_SOFTCLIPPED_BASES);
    }
+
+    /**
+     * Reverts ALL soft-clipped bases
+     *
+     * @param read the read
+     * @return the read with all soft-clipped bases turned into matches
+     */
    public static GATKSAMRecord revertSoftClippedBases(GATKSAMRecord read) {
        return (new ReadClipper(read)).revertSoftClippedBases();
    }

+    /**
+     * Reverts only soft clipped bases with quality score greater than or equal to minQual
+     *
+     * Note: Will write a temporary field with the number of soft clips that were undone on each side (left: 'SL', right: 'SR')
+     *
+     * @param read    the read
+     * @param minQual the mininum base quality score to revert the base (inclusive)
+     * @return the read with high quality soft clips reverted
+     */
+    public static GATKSAMRecord revertSoftClippedBases(GATKSAMRecord read, byte minQual) {
+        int nLeadingSoftClips = read.getAlignmentStart() - read.getSoftStart();
+        if (read.isEmpty() || nLeadingSoftClips > read.getReadLength())
+            return GATKSAMRecord.emptyRead(read);
+
+        byte [] quals = read.getBaseQualities(EventType.BASE_SUBSTITUTION);
+        int left = -1;
+
+        if (nLeadingSoftClips > 0) {
+            for (int i = nLeadingSoftClips - 1; i >= 0; i--) {
+                if (quals[i] >= minQual)
+                    left = i;
+                else
+                    break;
+            }
+        }
+
+        int right = -1;
+        int nTailingSoftClips = read.getSoftEnd() - read.getAlignmentEnd();
+        if (nTailingSoftClips > 0) {
+            for (int i = read.getReadLength() - nTailingSoftClips; i < read.getReadLength() ; i++) {
+                if (quals[i] >= minQual)
+                    right = i;
+                else
+                    break;
+            }
+        }
+
+        GATKSAMRecord clippedRead = read;
+        if (right >= 0) {
+            if (right + 1 < clippedRead.getReadLength())
+                clippedRead = hardClipByReadCoordinates(clippedRead, right+1, clippedRead.getReadLength()-1);           // first we hard clip the low quality soft clips on the left tail
+            clippedRead.setTemporaryAttribute("SR", nTailingSoftClips - (read.getReadLength() - right - 1));                // keep track of how may bases to 're-softclip' after processing
+        }
+        if (left >= 0) {
+            if (left - 1 > 0)
+                clippedRead = hardClipByReadCoordinates(clippedRead, 0, left-1);                                        // then we hard clip the low quality soft clips on the right tail
+            clippedRead.setTemporaryAttribute("SL", nLeadingSoftClips - left);                                          // keep track of how may bases to 're-softclip' after processing
+        }
+        return revertSoftClippedBases(clippedRead);                                                                     // now that we have only good bases in the soft clips, we can revert them all
+    }
+
    /**
     * Generic functionality to hard clip a read, used internally by hardClipByReferenceCoordinatesLeftTail
     * and hardClipByReferenceCoordinatesRightTail. Should not be used directly.
@ -446,9 +503,6 @@ public class ReadClipper {
            stop = read.getReadLength() - 1;
        }

-//        if ((start == 0 && stop == read.getReadLength() - 1))
-//            return GATKSAMRecord.emptyRead(read);
-
        if (start < 0 || stop > read.getReadLength() - 1)
            throw new ReviewedStingException("Trying to clip before the start or after the end of a read");

--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/BaseRecalibration.java
@ -88,19 +88,18 @@ public class BaseRecalibration {
    public void recalibrateRead(final GATKSAMRecord read) {
        final ReadCovariates readCovariates = RecalDataManager.computeCovariates(read, requestedCovariates);            // compute all covariates for the read
        for (final EventType errorModel : EventType.values()) {                                                         // recalibrate all three quality strings
-            final byte[] originalQuals = read.getBaseQualities(errorModel);
-            final byte[] recalQuals = originalQuals.clone();
+            final byte[] quals = read.getBaseQualities(errorModel);

            for (int offset = 0; offset < read.getReadLength(); offset++) {                                             // recalibrate all bases in the read
-                byte qualityScore = originalQuals[offset];
+                final byte originalQualityScore = quals[offset];

-                if (qualityScore >= QualityUtils.MIN_USABLE_Q_SCORE) {                                                  // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
+                if (originalQualityScore >= QualityUtils.MIN_USABLE_Q_SCORE) {                                          // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
                    final BitSet[] keySet = readCovariates.getKeySet(offset, errorModel);                               // get the keyset for this base using the error model
-                    qualityScore = performSequentialQualityCalculation(keySet, errorModel);                             // recalibrate the base
+                    final byte recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel);      // recalibrate the base
+                    quals[offset] = recalibratedQualityScore;
                }
-                recalQuals[offset] = qualityScore;
            }
-            read.setBaseQualities(recalQuals, errorModel);
+            read.setBaseQualities(quals, errorModel);
        }
    }

--- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
@ -428,8 +428,8 @@ public class GATKSAMRecord extends BAMRecord {
     * Use this method if you want to create a new empty GATKSAMRecord based on
     * another GATKSAMRecord
     *
-     * @param read
-     * @return
+     * @param read a read to copy the header from
+     * @return a read with no bases but safe for the GATK
     */
    public static GATKSAMRecord emptyRead(GATKSAMRecord read) {
        GATKSAMRecord emptyRead = new GATKSAMRecord(read.getHeader(),
--- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUnclippedStartWithNoTiesComparator.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUnclippedStartWithNoTiesComparator.java
@ -0,0 +1,48 @@
+package org.broadinstitute.sting.utils.sam;
+
+import com.google.java.contract.Ensures;
+import com.google.java.contract.Requires;
+import net.sf.samtools.SAMRecord;
+
+import java.util.Comparator;
+
+public class ReadUnclippedStartWithNoTiesComparator implements Comparator<SAMRecord> {
+    @Requires("c1 >= 0 && c2 >= 0")
+    @Ensures("result == 0 || result == 1 || result == -1")
+    private int compareContigs(int c1, int c2) {
+        if (c1 == c2)
+            return 0;
+        else if (c1 > c2)
+            return 1;
+        return -1;
+    }
+
+    @Requires("r1 != null && r2 != null")
+    @Ensures("result == 0 || result == 1 || result == -1")
+    public int compare(SAMRecord r1, SAMRecord r2) {
+        int result;
+
+        if (r1 == r2)
+            result = 0;
+
+        else if (r1.getReadUnmappedFlag())
+            result = 1;
+        else if (r2.getReadUnmappedFlag())
+            result = -1;
+        else {
+            final int cmpContig = compareContigs(r1.getReferenceIndex(), r2.getReferenceIndex());
+
+            if (cmpContig != 0)
+                result = cmpContig;
+
+            else {
+                if (r1.getUnclippedStart() < r2.getUnclippedStart())
+                    result = -1;
+                else
+                    result = 1;
+            }
+        }
+
+        return result;
+    }
+}
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java
@ -47,6 +47,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
                        " -T VariantRecalibrator" +
                        " -input " + params.inVCF +
                        " -L 20:1,000,000-40,000,000" +
+                        " --no_cmdline_in_header" +
                        " -an QD -an HaplotypeScore -an HRun" +
                        " -percentBad 0.07" +
                        " --minNumBadVariants 0" +
@ -92,6 +93,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
                        " -T VariantRecalibrator" +
                        " -input " + params.inVCF +
                        " -L 20:1,000,000-40,000,000" +
+                        " --no_cmdline_in_header" +
                        " -an QD -an ReadPosRankSum -an HaplotypeScore" +
                        " -percentBad 0.08" +
                        " -mode INDEL -mG 3" +