Resolve stale merge changes

2012-01-18 20:56:36 -05:00 · 2012-01-18 20:56:36 -05:00 · b123416c4c
parent 2eb45340e1 0133d1a901
commit b123416c4c
29 changed files with 440 additions and 347 deletions
--- a/public/c/bwa/build_linux.sh
+++ b/public/c/bwa/build_linux.sh
@ -1,5 +1,5 @@
 #!/bin/sh
-export BWA_HOME="/humgen/gsa-scr1/hanna/src/bwa-trunk/bwa"
+export BWA_HOME="/humgen/gsa-scr1/hanna/src/bio-bwa/bwa"
 export JAVA_INCLUDE="/broad/tools/Linux/x86_64/pkgs/jdk_1.6.0_12/include -I/broad/tools/Linux/x86_64/pkgs/jdk_1.6.0_12/include/linux"
 export TARGET_LIB="libbwa.so"
 export EXTRA_LIBS="-lc -lz -lstdc++ -lpthread"
--- a/public/c/bwa/bwa_gateway.cpp
+++ b/public/c/bwa/bwa_gateway.cpp
@ -233,6 +233,8 @@ void BWA::set_disallow_indel_within_range(int indel_range) { options.indel_end_s
 void BWA::set_mismatch_penalty(int penalty) { options.s_mm = penalty; }
 void BWA::set_gap_open_penalty(int penalty) { options.s_gapo = penalty; }
 void BWA::set_gap_extension_penalty(int penalty) { options.s_gape = penalty; }
+void BWA::set_mode_nonstop() { options.mode |= BWA_MODE_NONSTOP; options.max_top2 = 0x7fffffff; }
+void BWA::set_max_entries_in_queue(int max_entries) { options.max_entries = max_entries; }

 /**
 * Create a sequence with a set of reasonable initial defaults.  
--- a/public/c/bwa/bwa_gateway.h
+++ b/public/c/bwa/bwa_gateway.h
@ -60,6 +60,8 @@ class BWA {
  void set_mismatch_penalty(int penalty);
  void set_gap_open_penalty(int penalty);
  void set_gap_extension_penalty(int penalty);
+  void set_mode_nonstop();
+  void set_max_entries_in_queue(int max_entries);

  // Perform the alignment
  Alignment* generate_single_alignment(const char* bases, 
--- a/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp
+++ b/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp
@ -8,11 +8,13 @@
 #include "bwa_gateway.h"
 #include "org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h"

+typedef void (BWA::*boolean_setter)();
 typedef void (BWA::*int_setter)(int value);
 typedef void (BWA::*float_setter)(float value);

 static jobject convert_to_java_alignment(JNIEnv* env, const jbyte* read_bases, const jsize read_length, const Alignment& alignment);
 static jstring get_configuration_file(JNIEnv* env, jobject configuration, const char* field_name);
+static void set_boolean_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, boolean_setter setter);
 static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter);
 static void set_float_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, float_setter setter);
 static void throw_config_value_exception(JNIEnv* env, const char* field_name, const char* message);
@ -100,6 +102,10 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner
  if(env->ExceptionCheck()) return; 
  set_int_configuration_param(env, configuration, "gapExtensionPenalty", bwa, &BWA::set_gap_extension_penalty);
  if(env->ExceptionCheck()) return;
+  set_boolean_configuration_param(env, configuration, "nonStopMode", bwa, &BWA::set_mode_nonstop);
+  if(env->ExceptionCheck()) return;
+  set_int_configuration_param(env, configuration, "maxEntriesInQueue", bwa, &BWA::set_max_entries_in_queue);
+  if(env->ExceptionCheck()) return;
 }

 JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getPaths(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases) 
@ -357,6 +363,36 @@ static jstring get_configuration_file(JNIEnv* env, jobject configuration, const
  return path;
 }

+static void set_boolean_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, boolean_setter setter) {
+  jclass configuration_class = env->GetObjectClass(configuration);
+  if(configuration_class == NULL) return;
+
+  jfieldID configuration_field = env->GetFieldID(configuration_class, field_name, "Ljava/lang/Boolean;");
+  if(configuration_field == NULL) return;
+
+  jobject boxed_value = env->GetObjectField(configuration,configuration_field);
+  if(env->ExceptionCheck()) return;
+
+  if(boxed_value != NULL) {
+    jclass boolean_box_class = env->FindClass("java/lang/Boolean");
+    if(boolean_box_class == NULL) return;
+
+    jmethodID boolean_extractor = env->GetMethodID(boolean_box_class,"booleanValue", "()Z");
+    if(boolean_extractor == NULL) return;
+
+    jboolean value = env->CallBooleanMethod(boxed_value,boolean_extractor);
+    if(env->ExceptionCheck()) return;
+
+    if(value)
+      (bwa->*setter)();
+
+    env->DeleteLocalRef(boolean_box_class);
+  }
+
+  env->DeleteLocalRef(boxed_value);
+  env->DeleteLocalRef(configuration_class);
+}
+
 static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter) {
  jclass configuration_class = env->GetObjectClass(configuration);
  if(configuration_class == NULL) return;
--- a/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAConfiguration.java
+++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAConfiguration.java
@ -41,4 +41,14 @@ public class BWAConfiguration {
     * What is the scoring penalty for a gap extension?
     */
    public Integer gapExtensionPenalty = null;
+
+    /**
+     * Enter bwa's 'non-stop' mode (equivalent to bwa aln -N parameter).
+     */
+    public Boolean nonStopMode = false;
+
+    /**
+     * Set the max queue size that bwa will use when searching for matches (equivalent to bwa aln -m parameter).
+     */
+    public Integer maxEntriesInQueue = null;
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java
@ -256,7 +256,7 @@ public class BAMScheduler implements Iterator<FilePointer> {
            // Naive algorithm: find all elements in current contig for proper schedule creation.
            List<GenomeLoc> lociInContig = new LinkedList<GenomeLoc>();
            for(GenomeLoc locus: loci) {
-                if(dataSource.getHeader().getSequence(locus.getContig()).getSequenceIndex() == lastReferenceSequenceLoaded)
+                if(!GenomeLoc.isUnmapped(locus) && dataSource.getHeader().getSequence(locus.getContig()).getSequenceIndex() == lastReferenceSequenceLoaded)
                    lociInContig.add(locus);
            }

--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java
@ -88,7 +88,7 @@ public class GATKBAMIndex {
        seek(0);
        final byte[] buffer = readBytes(4);
        if (!Arrays.equals(buffer, BAM_INDEX_MAGIC)) {
-            throw new RuntimeException("Invalid file header in BAM index " + mFile +
+            throw new ReviewedStingException("Invalid file header in BAM index " + mFile +
                                       ": " + new String(buffer));
        }

@ -112,7 +112,7 @@ public class GATKBAMIndex {
        openIndexFile();

        if (referenceSequence >= sequenceCount)
-            throw new ReviewedStingException("Invalid sequence number " + referenceSequence);
+            throw new ReviewedStingException("Invalid sequence number " + referenceSequence + " in index file " + mFile);

        skipToSequence(referenceSequence);

@ -183,12 +183,12 @@ public class GATKBAMIndex {
    public int getLevelForBin(final Bin bin) {
        GATKBin gatkBin = new GATKBin(bin);
        if(gatkBin.getBinNumber() >= MAX_BINS)
-            throw new SAMException("Tried to get level for invalid bin.");
+            throw new ReviewedStingException("Tried to get level for invalid bin in index file " + mFile);
        for(int i = getNumIndexLevels()-1; i >= 0; i--) {
            if(gatkBin.getBinNumber() >= LEVEL_STARTS[i])
                return i;
        }
-        throw new SAMException("Unable to find correct bin for bin "+bin);
+        throw new ReviewedStingException("Unable to find correct bin for bin " + bin + " in index file " + mFile);
    }

    /**
@ -352,7 +352,7 @@ public class GATKBAMIndex {
            fileChannel.read(buffer);
        }
        catch(IOException ex) {
-            throw new ReviewedStingException("Index: unable to read bytes from index file.");
+            throw new ReviewedStingException("Index: unable to read bytes from index file " + mFile);
        }
    }

@ -379,7 +379,7 @@ public class GATKBAMIndex {
            fileChannel.position(fileChannel.position() + count);
        }
        catch(IOException ex) {
-            throw new ReviewedStingException("Index: unable to reposition file channel.");
+            throw new ReviewedStingException("Index: unable to reposition file channel of index file " + mFile);
        }
    }

@ -388,7 +388,7 @@ public class GATKBAMIndex {
            fileChannel.position(position);
        }
        catch(IOException ex) {
-            throw new ReviewedStingException("Index: unable to reposition of file channel.");
+            throw new ReviewedStingException("Index: unable to reposition of file channel of index file " + mFile);
        }
    }

--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
@ -147,13 +147,13 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
                ActiveRegion bestRegion = activeRegion;
                for( final ActiveRegion otherRegionToTest : workQueue ) {
                    if( otherRegionToTest.getLocation().sizeOfOverlap(readLoc) >= maxOverlap ) {
-                        maxOverlap = otherRegionToTest.getLocation().sizeOfOverlap(readLoc);
+                        maxOverlap = otherRegionToTest.getLocation().sizeOfOverlap( readLoc );
                        bestRegion = otherRegionToTest;
                    }
                }
                bestRegion.add( (GATKSAMRecord) read, true );

-                // The read is also added to all other region in which it overlaps but marked as non-primary
+                // The read is also added to all other regions in which it overlaps but marked as non-primary
                if( !bestRegion.equals(activeRegion) ) {
                    activeRegion.add( (GATKSAMRecord) read, false );
                }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java
@ -30,18 +30,19 @@ import net.sf.samtools.SAMReadGroupRecord;
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
 import org.broadinstitute.sting.utils.SampleUtils;
 import org.broadinstitute.sting.utils.baq.BAQ;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

 import java.io.File;
 import java.util.Collection;
+import java.util.Random;
 import java.util.Set;
 import java.util.TreeSet;

-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
-import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
-
 /**
 * Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear in the input file.
 *
@ -70,12 +71,21 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 *   -I input2.bam \
 *   --read_filter MappingQualityZero
 *
+ * // Prints the first 2000 reads in the BAM file
 * java -Xmx2g -jar GenomeAnalysisTK.jar \
 *   -R ref.fasta \
 *   -T PrintReads \
 *   -o output.bam \
 *   -I input.bam \
 *   -n 2000
+ *
+ * // Downsamples BAM file to 25%
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T PrintReads \
+ *   -o output.bam \
+ *   -I input.bam \
+ *   -ds 0.25
 * </pre>
 *
 */
@ -95,9 +105,18 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
    @Argument(fullName = "platform", shortName = "platform", doc="Exclude all reads with this platform from the output", required = false)
    String platform = null;

+    /**
+     * Only prints the first n reads of the file
+     */
    @Argument(fullName = "number", shortName = "n", doc="Print the first n reads from the file, discarding the rest", required = false)
    int nReadsToPrint = -1;

+    /**
+     * Downsamples the bam file by the given ratio, printing only approximately the given percentage of reads. The downsampling is balanced (over the entire coverage)
+     */
+    @Argument(fullName = "downsample_coverage", shortName = "ds", doc="Downsample BAM to desired coverage", required = false)
+    public double downsampleRatio = 1.0;
+    
    /**
     * Only reads from samples listed in the provided file(s) will be included in the output.
     */
@ -112,6 +131,8 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {

    private TreeSet<String> samplesToChoose = new TreeSet<String>();
    private boolean SAMPLES_SPECIFIED = false;
+    
+    Random random;

    /**
     * The initialize function.
@ -132,13 +153,15 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
        if(!samplesToChoose.isEmpty()) {
            SAMPLES_SPECIFIED = true;
        }
+        
+        random = GenomeAnalysisEngine.getRandomGenerator();

    }

    /**
     * The reads filter function.
     *
-     * @param ref the reference bases that correspond to our read, if a reference was provided
+     * @param ref  the reference bases that correspond to our read, if a reference was provided
     * @param read the read itself, as a SAMRecord
     * @return true if the read passes the filter, false if it doesn't
     */
@ -177,13 +200,14 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
            nReadsToPrint--;       // n > 0 means there are still reads to be printed.
        }

-        return true;
-	}
+        // if downsample option is turned off (= 1) then don't waste time  getting the next random number.
+        return (downsampleRatio == 1 || random.nextDouble() < downsampleRatio);
+    }

    /**
     * The reads map function.
     *
-     * @param ref the reference bases that correspond to our read, if a reference was provided
+     * @param ref  the reference bases that correspond to our read, if a reference was provided
     * @param read the read itself, as a SAMRecord
     * @return the read itself
     */
@ -194,6 +218,7 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
    /**
     * reduceInit is called once before any calls to the map function.  We use it here to setup the output
     * bam file, if it was specified on the command line
+     *
     * @return SAMFileWriter, set to the BAM output file if the command line option was set, null otherwise
     */
    public SAMFileWriter reduceInit() {
@ -202,7 +227,8 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {

    /**
     * given a read and a output location, reduce by emitting the read
-     * @param read the read itself
+     *
+     * @param read   the read itself
     * @param output the output source
     * @return the SAMFileWriter, so that the next reduce can emit to the same source
     */
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java
@ -8,7 +8,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
 import org.broadinstitute.sting.utils.MathUtils;
-import org.broadinstitute.sting.utils.MendelianViolation;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
 import org.broadinstitute.sting.utils.exceptions.UserException;
@ -18,7 +17,7 @@ import java.util.*;

 /**
 * Created by IntelliJ IDEA.
- * User: rpoplin
+ * User: rpoplin, lfran
 * Date: 11/14/11
 */

@ -28,6 +27,7 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen
    private final static int REF = 0;
    private final static int HET = 1;
    private final static int HOM = 2;
+    private final static int MIN_NUM_VALID_TRIOS = 5; // don't calculate this population-level statistic if there are less than X trios with full genotype likelihood information

    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
        if ( trios == null ) {
@ -50,7 +50,9 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen
            }
        }

-        toRet.put("TDT", calculateTDT( vc, triosToTest ));
+        if( triosToTest.size() >= MIN_NUM_VALID_TRIOS ) {
+            toRet.put("TDT", calculateTDT( vc, triosToTest ));
+        }

        return toRet;
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java
@ -35,7 +35,7 @@ import java.util.*;

 public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {

-    private final static boolean DEBUG = false;
+    // private final static boolean DEBUG = false;

    private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6

@ -177,12 +177,18 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
        ACqueue.add(zeroSet);
        indexesToACset.put(zeroSet.ACcounts, zeroSet);

+	// optimization: create the temporary storage for computing L(j,k) just once
+	final int maxPossibleDependencies = numAlternateAlleles + (numAlternateAlleles * (numAlternateAlleles + 1) / 2) + 1;
+	final double[][] tempLog10ConformationLikelihoods = new double[numSamples+1][maxPossibleDependencies];
+	for ( int i = 0; i < maxPossibleDependencies; i++ )
+	    tempLog10ConformationLikelihoods[0][i] = Double.NEGATIVE_INFINITY;
+
        // keep processing while we have AC conformations that need to be calculated
        double maxLog10L = Double.NEGATIVE_INFINITY;
        while ( !ACqueue.isEmpty() ) {
            // compute log10Likelihoods
            final ExactACset set = ACqueue.remove();
-            final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, preserveData, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result);
+            final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, preserveData, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result, tempLog10ConformationLikelihoods);

            // adjust max likelihood seen if needed
            maxLog10L = Math.max(maxLog10L, log10LofKs);
@ -197,20 +203,21 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
                                                           final Queue<ExactACset> ACqueue,
                                                           final HashMap<ExactACcounts, ExactACset> indexesToACset,
                                                           final double[][] log10AlleleFrequencyPriors,
-                                                           final AlleleFrequencyCalculationResult result) {
+                                                           final AlleleFrequencyCalculationResult result,
+							   final double[][] tempLog10ConformationLikelihoods) {

-        if ( DEBUG )
-            System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts);
+        //if ( DEBUG )
+        //    System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts);

        // compute the log10Likelihoods
-        computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, result);
+        computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, result, tempLog10ConformationLikelihoods);

        // clean up memory
        if ( !preserveData ) {
            for ( ExactACcounts index : set.dependentACsetsToDelete ) {
                indexesToACset.put(index, null);
-                if ( DEBUG )
-                    System.out.printf(" *** removing used set=%s after seeing final dependent set=%s%n", index, set.ACcounts);
+                //if ( DEBUG )
+                //    System.out.printf(" *** removing used set=%s after seeing final dependent set=%s%n", index, set.ACcounts);
            }
        }

@ -218,8 +225,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {

        // can we abort early because the log10Likelihoods are so small?
        if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
-            if ( DEBUG )
-                System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
+            //if ( DEBUG )
+            //    System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);

            // no reason to keep this data around because nothing depends on it
            if ( !preserveData )
@ -262,8 +269,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
        // if the last dependent set was not at the back of the queue (i.e. not just added), then we need to iterate
        // over all the dependent sets to find the last one in the queue (otherwise it will be cleaned up too early)
        if ( !preserveData && lastSet == null ) {
-            if ( DEBUG )
-                System.out.printf(" *** iterating over dependent sets for set=%s%n", set.ACcounts);
+            //if ( DEBUG )
+            //    System.out.printf(" *** iterating over dependent sets for set=%s%n", set.ACcounts);
            lastSet = determineLastDependentSetInQueue(set.ACcounts, ACqueue);
        }
        if ( lastSet != null )
@ -309,7 +316,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
                                    final ArrayList<double[]> genotypeLikelihoods,
                                    final HashMap<ExactACcounts, ExactACset> indexesToACset,
                                    final double[][] log10AlleleFrequencyPriors,
-                                    final AlleleFrequencyCalculationResult result) {
+                                    final AlleleFrequencyCalculationResult result,
+				    final double[][] tempLog10ConformationLikelihoods) {

        set.log10Likelihoods[0] = 0.0; // the zero case
        final int totalK = set.getACsum();
@ -321,38 +329,41 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
        }
        // k > 0 for at least one k
        else {
-            // all possible likelihoods for a given cell from which to choose the max
-            final int numPaths = set.ACsetIndexToPLIndex.size() + 1;
-            final double[] log10ConformationLikelihoods = new double[numPaths]; // TODO can be created just once, since you initialize it
+	    // deal with the non-AA possible conformations
+	    int conformationIndex = 1;
+	    for ( Map.Entry<ExactACcounts, Integer> mapping : set.ACsetIndexToPLIndex.entrySet() ) {
+		//if ( DEBUG )
+		//    System.out.printf(" *** evaluating set=%s which depends on set=%s%n", set.ACcounts, mapping.getKey());

-            for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {
-                final double[] gl = genotypeLikelihoods.get(j);
-                final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
+		ExactACset dependent = indexesToACset.get(mapping.getKey());

-                // initialize
-                for ( int i = 0; i < numPaths; i++ )
-                    // TODO -- Arrays.fill?
-                    // todo -- is this even necessary?  Why not have as else below?
-                    log10ConformationLikelihoods[i] = Double.NEGATIVE_INFINITY;
+		for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {

-                // deal with the AA case first
-                if ( totalK < 2*j-1 )
-                    log10ConformationLikelihoods[0] = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX];
-
-                // deal with the other possible conformations now
-                if ( totalK <= 2*j ) { // skip impossible conformations
-                    int conformationIndex = 1;
-                    for ( Map.Entry<ExactACcounts, Integer> mapping : set.ACsetIndexToPLIndex.entrySet() ) {
-                        if ( DEBUG )
-                            System.out.printf(" *** evaluating set=%s which depends on set=%s%n", set.ACcounts, mapping.getKey());
-                        log10ConformationLikelihoods[conformationIndex++] =
-                                determineCoefficient(mapping.getValue(), j, set.ACcounts.getCounts(), totalK) + indexesToACset.get(mapping.getKey()).log10Likelihoods[j-1] + gl[mapping.getValue()];
-                    }
+		    if ( totalK <= 2*j ) { // skip impossible conformations
+			final double[] gl = genotypeLikelihoods.get(j);
+			tempLog10ConformationLikelihoods[j][conformationIndex] =
+			    determineCoefficient(mapping.getValue(), j, set.ACcounts.getCounts(), totalK) + dependent.log10Likelihoods[j-1] + gl[mapping.getValue()];
+                    } else {
+			tempLog10ConformationLikelihoods[j][conformationIndex] = Double.NEGATIVE_INFINITY;
+		    }
                }

-                final double log10Max = MathUtils.approximateLog10SumLog10(log10ConformationLikelihoods);
+		conformationIndex++;
+	    }

-                // finally, update the L(j,k) value
+	    // finally, deal with the AA case (which depends on previous cells in this column) and then update the L(j,k) value
+            final int numPaths = set.ACsetIndexToPLIndex.size() + 1;
+	    for ( int j = 1; j < set.log10Likelihoods.length; j++ ) {
+
+		if ( totalK < 2*j-1 ) {
+		    final double[] gl = genotypeLikelihoods.get(j);
+		    tempLog10ConformationLikelihoods[j][0] = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX];
+		} else {
+		    tempLog10ConformationLikelihoods[j][0] = Double.NEGATIVE_INFINITY;
+		}
+
+		final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
+                final double log10Max = MathUtils.approximateLog10SumLog10(tempLog10ConformationLikelihoods[j], numPaths);
                set.log10Likelihoods[j] = log10Max - logDenominator;
            }
        }
@ -523,7 +534,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
            lastK = k;
            maxLog10L = Math.max(maxLog10L, log10LofK);
            if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
-                if ( DEBUG ) System.out.printf("  *** breaking early k=%d log10L=%.2f maxLog10L=%.2f%n", k, log10LofK, maxLog10L);
+                //if ( DEBUG ) System.out.printf("  *** breaking early k=%d log10L=%.2f maxLog10L=%.2f%n", k, log10LofK, maxLog10L);
                done = true;
            }

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@ -858,171 +858,4 @@ public class UnifiedGenotyperEngine {

        return calls;
    }
-
-    /**
-     * @param vc            variant context with genotype likelihoods
-     * @param allelesToUse  bit vector describing which alternate alleles from the vc are okay to use
-     * @param exactAC       integer array describing the AC from the exact model for the corresponding alleles
-     * @return genotypes
-     */
-    public static GenotypesContext constrainedAssignGenotypes(VariantContext vc, boolean[] allelesToUse, int[] exactAC ) {
-
-        final GenotypesContext GLs = vc.getGenotypes();
-
-        // samples
-        final List<String> sampleIndices = GLs.getSampleNamesOrderedByName();
-
-        // we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward
-        final int numOriginalAltAlleles = allelesToUse.length;
-        final List<Allele> newAlleles = new ArrayList<Allele>(numOriginalAltAlleles+1);
-        newAlleles.add(vc.getReference());
-        final HashMap<Allele,Integer> alleleIndexMap = new HashMap<Allele,Integer>(); // need this for skipping dimensions
-        int[] alleleCount = new int[exactAC.length];
-        for ( int i = 0; i < numOriginalAltAlleles; i++ ) {
-            if ( allelesToUse[i] ) {
-                newAlleles.add(vc.getAlternateAllele(i));
-                alleleIndexMap.put(vc.getAlternateAllele(i),i);
-                alleleCount[i] = exactAC[i];
-            } else {
-                alleleCount[i] = 0;
-            }
-        }
-        final List<Allele> newAltAlleles = newAlleles.subList(1,newAlleles.size());
-        final int numNewAltAlleles = newAltAlleles.size();
-        ArrayList<Integer> likelihoodIndexesToUse = null;
-
-        // an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles,
-        // then we can keep the PLs as is; otherwise, we determine which ones to keep
-        final int[][] PLcache;
-        if ( numNewAltAlleles != numOriginalAltAlleles && numNewAltAlleles > 0 ) {
-            likelihoodIndexesToUse = new ArrayList<Integer>(30);
-            PLcache = PLIndexToAlleleIndex[numOriginalAltAlleles];
-
-            for ( int PLindex = 0; PLindex < PLcache.length; PLindex++ ) {
-                int[] alleles = PLcache[PLindex];
-                // consider this entry only if both of the alleles are good
-                if ( (alleles[0] == 0 || allelesToUse[alleles[0] - 1]) && (alleles[1] == 0 || allelesToUse[alleles[1] - 1]) )
-                    likelihoodIndexesToUse.add(PLindex);
-            }
-        } else {
-            PLcache = PLIndexToAlleleIndex[numOriginalAltAlleles];
-        }
-
-        // set up the trellis dimensions
-        // SAMPLE x alt 1 x alt 2 x alt 3
-        // todo -- check that exactAC has alt counts at [1],[2],[3] (and not [0],[1],[2])
-        double[][][][] transitionTrellis = new double[sampleIndices.size()+1][exactAC[1]][exactAC[2]][exactAC[3]];
-        // N x AC1 x AC2 x AC3; worst performance in multi-allelic where all alleles are moderate frequency
-        // capped at the MLE ACs*
-        // todo -- there's an optimization: not all states in the rectangular matrix will be reached, in fact
-        // todo -- for tT[0] we only care about tT[0][0][0][0], and for tT[1], only combinations of 0,1,2.
-        int idx = 1; // index of which sample we're on
-        int prevMaxState = 0; // the maximum state (e.g. AC) reached by the previous sample. Symmetric. (AC capping handled by logic in loop)
-        // iterate over each sample
-        for ( String sample : sampleIndices ) {
-            // push the likelihoods into the next possible states, that is to say
-            // L[state] = L[prev state] + L[genotype getting into state]
-            // iterate over each previous state, by dimension
-            // and contribute the likelihoods for transitions to this state
-            double[][][] prevState = transitionTrellis[idx-1];
-            double[][][] thisState = transitionTrellis[idx];
-            Genotype genotype = GLs.get(sample);
-            if ( genotype.isNoCall() || genotype.isFiltered() ) {
-                thisState = prevState.clone();
-            } else {
-                double[] likelihoods = genotype.getLikelihoods().getAsVector();
-                int dim1min = Math.max(0, alleleCount[0]-2*(sampleIndices.size()-idx+1));
-                int dim1max = Math.min(prevMaxState,alleleCount[0]);
-                int dim2min = Math.max(0,alleleCount[1]-2*(sampleIndices.size()-idx+1));
-                int dim2max = Math.min(prevMaxState,alleleCount[1]);
-                int dim3min = Math.max(0,alleleCount[2]-2*(sampleIndices.size()-idx+1));
-                int dim3max = Math.min(prevMaxState,alleleCount[2]);
-                // cue annoying nested for loop
-                for ( int a1 = dim1min ; a1 <= dim1max; a1++ ) {
-                    for ( int a2 = dim2min; a2 <= dim2max; a2++ ) {
-                        for ( int a3 = dim3min; a3 <= dim3max; a3++ ) {
-                            double base = prevState[a1][a2][a3];
-                            for ( int likIdx : likelihoodIndexesToUse ) {
-                                int[] offsets = calculateOffsets(PLcache[likIdx]);
-                                thisState[a1+offsets[1]][a2+offsets[2]][a3+offsets[3]] = base + likelihoods[likIdx];
-                            }
-                        }
-                    }
-                }
-                prevMaxState += 2;
-            }
-            idx++;
-        }
-
-        // after all that pain, we have a fully calculated trellis. Now just march backwards from the EAC state and
-        // assign genotypes along the greedy path
-
-        GenotypesContext calls = GenotypesContext.create(sampleIndices.size());
-        int[] state = alleleCount;
-        for ( String sample : Utils.reverse(sampleIndices) ) {
-            --idx;
-            // the next state will be the maximum achievable state
-            Genotype g = GLs.get(sample);
-            if ( g.isNoCall() || ! g.hasLikelihoods() ) {
-                calls.add(g);
-                continue;
-            }
-
-            // subset to the new likelihoods. These are not used except for subsetting in the context iself.
-            // i.e. they are not a part of the calculation.
-            final double[] originalLikelihoods = GLs.get(sample).getLikelihoods().getAsVector();
-            double[] newLikelihoods;
-            if ( likelihoodIndexesToUse == null ) {
-                newLikelihoods = originalLikelihoods;
-            } else {
-                newLikelihoods = new double[likelihoodIndexesToUse.size()];
-                int newIndex = 0;
-                for ( int oldIndex : likelihoodIndexesToUse )
-                    newLikelihoods[newIndex++] = originalLikelihoods[oldIndex];
-
-                // might need to re-normalize
-                newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
-            }
-
-            // todo -- alter this. For ease of programming, likelihood indeces are
-            // todo -- used to iterate over achievable states.
-            double max = Double.NEGATIVE_INFINITY;
-            int[] bestState = null;
-            int[] bestAlleles = null;
-            int bestLikIdx = -1;
-            for ( int likIdx : likelihoodIndexesToUse ) {
-                int[] offsets = calculateOffsets(PLcache[likIdx]);
-                double val = transitionTrellis[idx-1][state[0]-offsets[0]][state[1]-offsets[1]][state[2]-offsets[2]];
-                if ( val > max ) {
-                    max = val;
-                    bestState = new int[] { state[0]-offsets[0],state[1]-offsets[1],state[2]-offsets[2]};
-                    bestAlleles = PLcache[likIdx];
-                    bestLikIdx = likIdx;
-                }
-            }
-            state = bestState;
-            List<Allele> gtAlleles = new ArrayList<Allele>(2);
-            gtAlleles.add(newAlleles.get(bestAlleles[0]));
-            gtAlleles.add(newAlleles.get(bestAlleles[1]));
-
-            final double qual = numNewAltAlleles == 0 ? Genotype.NO_LOG10_PERROR : GenotypeLikelihoods.getQualFromLikelihoods(bestLikIdx, newLikelihoods);
-            Map<String, Object> attrs = new HashMap<String, Object>(g.getAttributes());
-            if ( numNewAltAlleles == 0 )
-                attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
-            else
-                attrs.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(newLikelihoods));
-            calls.add(new Genotype(sample, gtAlleles, qual, null, attrs, false));
-
-        }
-        return calls;
-    }
-
-    private static int[] calculateOffsets(int[] alleleIndeces) {
-        int[] offsets = new int[4];
-        for ( int i = 0; i < alleleIndeces.length; i++ ) {
-            offsets[alleleIndeces[i]]++;
-        }
-
-        return offsets;
-    }
 }
--- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
@ -63,14 +63,18 @@ public class MathUtils {
 	return (d > 0) ? (int)(d + 0.5d) : (int)(d - 0.5d);
    }

-    public static double approximateLog10SumLog10(double[] vals) {
+    public static double approximateLog10SumLog10(final double[] vals) {
+	return approximateLog10SumLog10(vals, vals.length);
+    }

-	final int maxElementIndex = MathUtils.maxElementIndex(vals);
+    public static double approximateLog10SumLog10(final double[] vals, final int endIndex) {
+
+	final int maxElementIndex = MathUtils.maxElementIndex(vals, endIndex);
 	double approxSum = vals[maxElementIndex];
        if ( approxSum == Double.NEGATIVE_INFINITY )
            return approxSum;

-        for ( int i = 0; i < vals.length; i++ ) {
+        for ( int i = 0; i < endIndex; i++ ) {
 	    if ( i == maxElementIndex || vals[i] == Double.NEGATIVE_INFINITY )
 		continue;

@ -582,11 +586,15 @@ public class MathUtils {
        return normalizeFromLog10(array, false);
    }

-    public static int maxElementIndex(double[] array) {
+    public static int maxElementIndex(final double[] array) {
+	return maxElementIndex(array, array.length);
+    }
+
+    public static int maxElementIndex(final double[] array, final int endIndex) {
        if (array == null) throw new IllegalArgumentException("Array cannot be null!");

        int maxI = -1;
-        for (int i = 0; i < array.length; i++) {
+        for (int i = 0; i < endIndex; i++) {
            if (maxI == -1 || array[i] > array[maxI])
                maxI = i;
        }
@ -594,11 +602,15 @@ public class MathUtils {
        return maxI;
    }

-    public static int maxElementIndex(int[] array) {
+    public static int maxElementIndex(final int[] array) {
+	return maxElementIndex(array, array.length);
+    }
+
+    public static int maxElementIndex(final int[] array, int endIndex) {
        if (array == null) throw new IllegalArgumentException("Array cannot be null!");

        int maxI = -1;
-        for (int i = 0; i < array.length; i++) {
+        for (int i = 0; i < endIndex; i++) {
            if (maxI == -1 || array[i] > array[maxI])
                maxI = i;
        }
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java
@ -18,6 +18,7 @@ import java.util.zip.GZIPInputStream;


 public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
+    public final static int MAX_ALLELE_SIZE_BEFORE_WARNING = (int)Math.pow(2, 20);

    protected final static Logger log = Logger.getLogger(VCFCodec.class);
    protected final static int NUM_STANDARD_FIELDS = 8;  // INFO is the 8th column
@ -252,7 +253,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {

        // if we have don't have a header, or we have a header with no genotyping data check that we have eight columns.  Otherwise check that we have nine (normal colummns + genotyping data)
        if (( (header == null || !header.hasGenotypingData()) && nParts != NUM_STANDARD_FIELDS) ||
-             (header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) )
+                (header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) )
            throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) +
                    " tokens, and saw " + nParts + " )", lineNo);

@ -518,8 +519,11 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
     * @param lineNo  the line number for this record
     */
    private static void checkAllele(String allele, boolean isRef, int lineNo) {
-	if ( allele == null || allele.length() == 0 )
-	    generateException("Empty alleles are not permitted in VCF records", lineNo);
+        if ( allele == null || allele.length() == 0 )
+            generateException("Empty alleles are not permitted in VCF records", lineNo);
+
+        if ( MAX_ALLELE_SIZE_BEFORE_WARNING != -1 && allele.length() > MAX_ALLELE_SIZE_BEFORE_WARNING )
+            log.warn(String.format("Allele detected with length %d exceeding max size %d at approximately line %d, likely resulting in degraded VCF processing performance", allele.length(), MAX_ALLELE_SIZE_BEFORE_WARNING, lineNo));

        if ( isSymbolicAllele(allele) ) {
            if ( isRef ) {
@ -572,12 +576,13 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {

    public static int computeForwardClipping(List<Allele> unclippedAlleles, String ref) {
        boolean clipping = true;
+        final byte ref0 = (byte)ref.charAt(0);

        for ( Allele a : unclippedAlleles ) {
            if ( a.isSymbolic() )
                continue;

-            if ( a.length() < 1 || (a.getBases()[0] != ref.getBytes()[0]) ) {
+            if ( a.length() < 1 || (a.getBases()[0] != ref0) ) {
                clipping = false;
                break;
            }
@ -604,7 +609,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
                    stillClipping = false;
                else if ( ref.length() == clipping )
                    generateException("bad alleles encountered", lineNo);
-                else if ( a.getBases()[a.length()-clipping-1] != ref.getBytes()[ref.length()-clipping-1] )
+                else if ( a.getBases()[a.length()-clipping-1] != ((byte)ref.charAt(ref.length()-clipping-1)) )
                    stillClipping = false;
            }
            if ( stillClipping )
@ -613,6 +618,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {

        return clipping;
    }
+
    /**
     * clip the alleles, based on the reference
     *
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/SortingVCFWriterBase.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/SortingVCFWriterBase.java
@ -27,10 +27,8 @@ package org.broadinstitute.sting.utils.codecs.vcf;

 import org.broadinstitute.sting.utils.variantcontext.VariantContext;

-import java.util.Comparator;
-import java.util.PriorityQueue;
-import java.util.Set;
-import java.util.TreeSet;
+import java.util.*;
+import java.util.concurrent.PriorityBlockingQueue;

 /**
 * This class writes VCF files, allowing records to be passed in unsorted.
@ -39,20 +37,26 @@ import java.util.TreeSet;
 public abstract class SortingVCFWriterBase implements VCFWriter {

    // The VCFWriter to which to actually write the sorted VCF records
-    private VCFWriter innerWriter = null;
+    private final VCFWriter innerWriter;

    // the current queue of un-emitted records
-    private PriorityQueue<VCFRecord> queue = null;
+    private final Queue<VCFRecord> queue;

    // The locus until which we are permitted to write out (inclusive)
    protected Integer mostUpstreamWritableLoc;
    protected static final int BEFORE_MOST_UPSTREAM_LOC = 0; // No real locus index is <= 0

    // The set of chromosomes already passed over and to which it is forbidden to return
-    private Set<String> finishedChromosomes = null;
+    private final Set<String> finishedChromosomes;

    // Should we call innerWriter.close() in close()
-    private boolean takeOwnershipOfInner;
+    private final boolean takeOwnershipOfInner;
+
+    // --------------------------------------------------------------------------------
+    //
+    // Constructors
+    //
+    // --------------------------------------------------------------------------------

    /**
     * create a local-sorting VCF writer, given an inner VCF writer to write to
@ -62,16 +66,27 @@ public abstract class SortingVCFWriterBase implements VCFWriter {
     */
    public SortingVCFWriterBase(VCFWriter innerWriter, boolean takeOwnershipOfInner) {
        this.innerWriter = innerWriter;
-        this.queue = new PriorityQueue<VCFRecord>(50, new VariantContextComparator());
-        this.mostUpstreamWritableLoc = BEFORE_MOST_UPSTREAM_LOC;
        this.finishedChromosomes = new TreeSet<String>();
        this.takeOwnershipOfInner = takeOwnershipOfInner;
+
+        // has to be PriorityBlockingQueue to be thread-safe
+        // see http://getsatisfaction.com/gsa/topics/missing_loci_output_in_multi_thread_mode_when_implement_sortingvcfwriterbase?utm_content=topic_link&utm_medium=email&utm_source=new_topic
+        this.queue = new PriorityBlockingQueue<VCFRecord>(50, new VariantContextComparator());
+
+        this.mostUpstreamWritableLoc = BEFORE_MOST_UPSTREAM_LOC;
    }

    public SortingVCFWriterBase(VCFWriter innerWriter) {
        this(innerWriter, false); // by default, don't own inner
    }

+    // --------------------------------------------------------------------------------
+    //
+    // public interface functions
+    //
+    // --------------------------------------------------------------------------------
+
+    @Override
    public void writeHeader(VCFHeader header) {
        innerWriter.writeHeader(header);
    }
@ -79,6 +94,7 @@ public abstract class SortingVCFWriterBase implements VCFWriter {
    /**
     * attempt to close the VCF file; we need to flush the queue first
     */
+    @Override
    public void close() {
        stopWaitingToSort();

@ -86,27 +102,14 @@ public abstract class SortingVCFWriterBase implements VCFWriter {
            innerWriter.close();
    }

-    private void stopWaitingToSort() {
-        emitRecords(true);
-        mostUpstreamWritableLoc = BEFORE_MOST_UPSTREAM_LOC;
-    }
-
-    protected void emitSafeRecords() {
-        emitRecords(false);
-    }
-
-    protected void noteCurrentRecord(VariantContext vc) {
-        // did the user break the contract by giving a record too late?
-        if (mostUpstreamWritableLoc != null && vc.getStart() < mostUpstreamWritableLoc) // went too far back, since may have already written anything that is <= mostUpstreamWritableLoc
-            throw new IllegalArgumentException("Permitted to write any record upstream of position " + mostUpstreamWritableLoc + ", but a record at " + vc.getChr() + ":" + vc.getStart() + " was just added.");
-    }

    /**
     * add a record to the file
     *
     * @param vc      the Variant Context object
     */
-    public void add(VariantContext vc) {
+    @Override
+    public synchronized void add(VariantContext vc) {
        /* Note that the code below does not prevent the successive add()-ing of: (chr1, 10), (chr20, 200), (chr15, 100)
           since there is no implicit ordering of chromosomes:
         */
@ -125,7 +128,43 @@ public abstract class SortingVCFWriterBase implements VCFWriter {
        emitSafeRecords();
    }

-    private void emitRecords(boolean emitUnsafe) {
+    /**
+     * Gets a string representation of this object.
+     * @return a string representation of this object
+     */
+    @Override
+    public String toString() {
+        return getClass().getName();
+    }
+
+    // --------------------------------------------------------------------------------
+    //
+    // protected interface functions for subclasses to use
+    //
+    // --------------------------------------------------------------------------------
+
+    private synchronized void stopWaitingToSort() {
+        emitRecords(true);
+        mostUpstreamWritableLoc = BEFORE_MOST_UPSTREAM_LOC;
+    }
+
+    protected synchronized void emitSafeRecords() {
+        emitRecords(false);
+    }
+
+    protected void noteCurrentRecord(VariantContext vc) {
+        // did the user break the contract by giving a record too late?
+        if (mostUpstreamWritableLoc != null && vc.getStart() < mostUpstreamWritableLoc) // went too far back, since may have already written anything that is <= mostUpstreamWritableLoc
+            throw new IllegalArgumentException("Permitted to write any record upstream of position " + mostUpstreamWritableLoc + ", but a record at " + vc.getChr() + ":" + vc.getStart() + " was just added.");
+    }
+
+    // --------------------------------------------------------------------------------
+    //
+    // private implementation functions
+    //
+    // --------------------------------------------------------------------------------
+
+    private synchronized void emitRecords(boolean emitUnsafe) {
        while (!queue.isEmpty()) {
            VCFRecord firstRec = queue.peek();

@ -140,15 +179,6 @@ public abstract class SortingVCFWriterBase implements VCFWriter {
        }
    }

-    /**
-     * Gets a string representation of this object.
-     * @return a string representation of this object
-     */
-    @Override
-    public String toString() {
-        return getClass().getName();
-    }
-
    private static class VariantContextComparator implements Comparator<VCFRecord> {
        public int compare(VCFRecord r1, VCFRecord r2) {
            return r1.vc.getStart() - r2.vc.getStart();
--- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java
@ -186,17 +186,21 @@ public class ReadUtils {
     * @return the reference coordinate for the adaptor boundary (effectively the first base IN the adaptor, closest to the read. NULL if the read is unmapped or the mate is mapped to another contig.
     */
    public static Integer getAdaptorBoundary(final SAMRecord read) {
+        final int MAXIMUM_ADAPTOR_LENGTH = 8;
        final int insertSize = Math.abs(read.getInferredInsertSize());    // the inferred insert size can be negative if the mate is mapped before the read (so we take the absolute value)

-        if (insertSize == 0 || read.getReadUnmappedFlag())                // no adaptors in reads with mates in another
-            return null;                                                  // chromosome or unmapped pairs
-
-        int adaptorBoundary;                                              // the reference coordinate for the adaptor boundary (effectively the first base IN the adaptor, closest to the read)
+        if (insertSize == 0 || read.getReadUnmappedFlag())                // no adaptors in reads with mates in another chromosome or unmapped pairs
+            return null;                                                  
+        
+        Integer adaptorBoundary;                                          // the reference coordinate for the adaptor boundary (effectively the first base IN the adaptor, closest to the read)
        if (read.getReadNegativeStrandFlag())
            adaptorBoundary = read.getMateAlignmentStart() - 1;           // case 1 (see header)
        else
            adaptorBoundary = read.getAlignmentStart() + insertSize + 1;  // case 2 (see header)

+        if ( (adaptorBoundary < read.getAlignmentStart() - MAXIMUM_ADAPTOR_LENGTH) || (adaptorBoundary > read.getAlignmentEnd() + MAXIMUM_ADAPTOR_LENGTH) )
+            adaptorBoundary = null;                                       // we are being conservative by not allowing the adaptor boundary to go beyond what we belive is the maximum size of an adaptor
+        
        return adaptorBoundary;
    }

--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsWalkerUnitTest.java
@ -1,20 +1,19 @@
 package org.broadinstitute.sting.gatk.walkers;

+import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.utils.sam.ArtificialReadsTraversal;
 import org.broadinstitute.sting.utils.sam.ArtificialSAMFileWriter;
 import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
-import net.sf.samtools.SAMRecord;
-import net.sf.samtools.SAMFileHeader;
-
-import static org.testng.Assert.assertEquals;
-import static org.testng.Assert.assertTrue;
-
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;

+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
+

 /*
 * Copyright (c) 2009 The Broad Institute
@ -44,11 +43,11 @@ import org.testng.annotations.Test;
 /**
 * @author aaron
 *         <p/>
- *         Class PrintReadsWalkerUnitTest
+ *         Class PrintReadsUnitTest
 *         <p/>
 *         This tests the print reads walker, using the artificial reads traversal
 */
-public class PrintReadsWalkerUnitTest extends BaseTest {
+public class PrintReadsUnitTest extends BaseTest {

    /**
     * our private fake reads traversal.  This traversal seeds the
@ -60,19 +59,23 @@ public class PrintReadsWalkerUnitTest extends BaseTest {
    private ReferenceContext bases = null;
    //private ReferenceContext ref = new ReferenceContext()

+    PrintReadsWalker walker;
+    ArtificialSAMFileWriter writer;
+
    @BeforeMethod
    public void before() {
        trav = new ArtificialReadsTraversal();
        readTotal = ( ( trav.endingChr - trav.startingChr ) + 1 ) * trav.readsPerChr + trav.unMappedReads;
+
+        walker = new PrintReadsWalker();
+        writer = new ArtificialSAMFileWriter();
+        walker.out = writer;
+        walker.initialize();
    }

    /** test that we get out the same number of reads we put in */
    @Test
    public void testReadCount() {
-        PrintReadsWalker walker = new PrintReadsWalker();
-        ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter();
-        walker.out = writer;
-
        trav.traverse(walker, null, writer);
        assertEquals(writer.getRecords().size(), readTotal);
    }
@ -80,10 +83,6 @@ public class PrintReadsWalkerUnitTest extends BaseTest {
    /** test that we're ok with a null read */
    @Test
    public void testNullRead() {
-        PrintReadsWalker walker = new PrintReadsWalker();
-        ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter();
-        walker.out = writer;
-
        SAMRecord rec = walker.map(bases, null, null);
        assertTrue(rec == null);
    }
@ -91,10 +90,6 @@ public class PrintReadsWalkerUnitTest extends BaseTest {
    /** tes that we get the read we put into the map function */
    @Test
    public void testReturnRead() {
-        PrintReadsWalker walker = new PrintReadsWalker();
-        ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter();
-        walker.out = writer;
-
        SAMFileHeader head = ArtificialSAMUtils.createArtificialSamHeader(3,1,1000);
        GATKSAMRecord rec = ArtificialSAMUtils.createArtificialRead(head, "FakeRead", 1, 1, 50);
        SAMRecord ret = walker.map(bases, rec, null);
@ -102,20 +97,6 @@ public class PrintReadsWalkerUnitTest extends BaseTest {
        assertTrue(ret.getReadName().equals(rec.getReadName()));
    }

-    /** test that the read makes it to the output source */
-    @Test
-    public void testReducingRead() {
-        PrintReadsWalker walker = new PrintReadsWalker();
-        ArtificialSAMFileWriter writer = new ArtificialSAMFileWriter();
-        walker.out = writer;
-
-        SAMFileHeader head = ArtificialSAMUtils.createArtificialSamHeader(3,1,1000);
-        SAMRecord rec = ArtificialSAMUtils.createArtificialRead(head, "FakeRead", 1, 1, 50);
-        SAMRecord ret = walker.map(bases, null,null);
-        walker.reduce(ret,writer);
-
-        assertTrue(writer.getRecords().size() == 1);
-    }

    
 }
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
@ -32,7 +32,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testHasAnnotsAsking1() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
-                Arrays.asList("e70eb5f80c93e366dcbe3cf684c154e4"));
+                Arrays.asList("604328867fc9aaf3e71fa0f9ca2ba5c9"));
        executeTest("test file has annotations, asking for annotations, #1", spec);
    }

@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testNoAnnotsAsking1() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
-                Arrays.asList("1e52761fdff73a5361b5eb0a6e5d9dad"));
+                Arrays.asList("bbde8c92d27ad2a7ec1ff2d095d459eb"));
        executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
    }

@ -82,7 +82,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testExcludeAnnotations() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
-                Arrays.asList("bb4eebfaffc230cb8a31e62e7b53a300"));
+                Arrays.asList("8ec9f79cab84f26d8250f00d99d18aac"));
        executeTest("test exclude annotations", spec);
    }

@ -171,7 +171,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {

    @Test
    public void testTDTAnnotation() {
-        final String MD5 = "204e67536a17af7eaa6bf0a910818997";
+        final String MD5 = "0aedd760e8099f0b95d53a41bdcd793e";
        WalkerTestSpec spec = new WalkerTestSpec(
                "-T VariantAnnotator -R " + b37KGReference + " -A TransmissionDisequilibriumTest --variant:vcf " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf" +
                        " -L " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf -NO_HEADER -ped " + validationDataLocation + "ug.random50000.family.ped -o %s", 1,
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalkerIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalkerIntegrationTest.java
@ -62,7 +62,7 @@ public class CallableLociWalkerIntegrationTest extends WalkerTest {
    public void testCallableLociWalker3() {
        String gatk_args = commonArgs + " -format BED -L 1:10,000,000-11,000,000 -minDepth 10 -maxDepth 100 --minBaseQuality 10 --minMappingQuality 20 -summary %s";
        WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2,
-                Arrays.asList("4496551d4493857e5153d8172965e527", "b0667e31af9aec02eaf73ca73ec16937"));
+                Arrays.asList("b7d26a470ef906590249f2fa45fd6bdd", "da431d393f7c2b2b3e27556b86c1dbc7"));
        executeTest("formatBed lots of arguments", spec);
    }
 }
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java
@ -55,25 +55,25 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest {
        spec.setOutputFileLocation(baseOutputFile);

        // now add the expected files that get generated
-        spec.addAuxFile("2f072fd8b41b5ac1108797f89376c797", baseOutputFile);
-        spec.addAuxFile("d17ac7cc0b58ba801d2b0727a363d615", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_counts"));
-        spec.addAuxFile("c05190c9e6239cdb1cd486edcbc23505", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_proportions"));
+        spec.addAuxFile("0f9603eb1ca4a26828e82d8c8f4991f6", baseOutputFile);
+        spec.addAuxFile("51e6c09a307654f43811af35238fb179", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_counts"));
+        spec.addAuxFile("229b9b5bc2141c86dbc69c8acc9eba6a", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_proportions"));
        spec.addAuxFile("9cd395f47b329b9dd00ad024fcac9929", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_statistics"));
-        spec.addAuxFile("c94a52b4e73a7995319e0b570c80d2f7", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary"));
-        spec.addAuxFile("1970a44efb7ace4e51a37f0bd2dc84d1", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_statistics"));
-        spec.addAuxFile("c321c542be25359d2e26d45cbeb6d7ab", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_summary"));
-        spec.addAuxFile("9023cc8939777d515cd2895919a99688", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_counts"));
-        spec.addAuxFile("3597b69e90742c5dd7c83fbc74d079f3", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_proportions"));
+        spec.addAuxFile("e69ee59f447816c025c09a56e321cef8", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary"));
+        spec.addAuxFile("fa054b665d1ae537ada719da7713e11b", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_statistics"));
+        spec.addAuxFile("28dec9383b3a323a5ce7d96d62712917", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_summary"));
+        spec.addAuxFile("a836b92ac17b8ff9788e2aaa9116b5d4", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_counts"));
+        spec.addAuxFile("d32a8c425fadcc4c048bd8b48d0f61e5", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_proportions"));
        spec.addAuxFile("7b9d0e93bf5b5313995be7010ef1f528", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_statistics"));
-        spec.addAuxFile("1a6ea3aa759fb154ccc4e171ebca9d02", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_summary"));
-        spec.addAuxFile("b492644ff06b4ffb044d5075cd168abf", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_statistics"));
-        spec.addAuxFile("77cef87dc4083a7b60b7a7b38b4c0bd8", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_summary"));
-        spec.addAuxFile("8e1adbe37b98bb2271ba13932d5c947f", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_counts"));
-        spec.addAuxFile("761d2f9daf2ebaf43abf65c8fd2fcd05", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_proportions"));
+        spec.addAuxFile("4656c8797696cf5ef0cdc5971271236a", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_summary"));
+        spec.addAuxFile("6f1d7f2120a4ac524c6026498f45295a", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_statistics"));
+        spec.addAuxFile("69c424bca013159942337b67fdf31ff8", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_summary"));
+        spec.addAuxFile("6909d50a7da337cd294828b32b945eb8", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_counts"));
+        spec.addAuxFile("a395dafde101971d2b9e5ddb6cd4b7d0", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_proportions"));
        spec.addAuxFile("df0ba76e0e6082c0d29fcfd68efc6b77", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_statistics"));
-        spec.addAuxFile("0582b4681dbc02ece2dfe2752dcfd228", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary"));
-        spec.addAuxFile("0685214965bf1863f7ce8de2e38af060", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics"));
-        spec.addAuxFile("7a0cd8a5ebaaa82621fd3b5aed9c32fe", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary"));
+        spec.addAuxFile("185b910e499c08a8b88dd3ed1ac9e8ec", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary"));
+        spec.addAuxFile("d5d11b686689467b5a8836f0a07f447d", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics"));
+        spec.addAuxFile("ad1a2775a31b1634daf64e691676bb96", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary"));

        execute("testBaseOutputNoFiltering",spec);
    }
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@ -189,7 +189,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
                        " -o %s" +
                        " -L 1:10,000,000-10,100,000",
                1,
-                Arrays.asList("2b2729414ae855d390e7940956745bce"));
+                Arrays.asList("f0fbe472f155baf594b1eeb58166edef"));

        executeTest(String.format("test multiple technologies"), spec);
    }
@ -208,7 +208,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
                        " -L 1:10,000,000-10,100,000" +
                        " -baq CALCULATE_AS_NECESSARY",
                1,
-                Arrays.asList("95c6120efb92e5a325a5cec7d77c2dab"));
+                Arrays.asList("8c87c749a7bb5a76ed8504d4ec254272"));

        executeTest(String.format("test calling with BAQ"), spec);
    }
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java
@ -34,8 +34,8 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {

        new CCTest( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "ab4940a16ab990181bd8368c76b23853" );
        new CCTest( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "17d4b8001c982a70185e344929cf3941");
-        new CCTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "36c0c467b6245c2c6c4e9c956443a154" );
-        new CCTest( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "955a8fa2ddb2b04c406766ccd9ac45cc" );
+        new CCTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "714e65d6cb51ae32221a77ce84cbbcdc" );
+        new CCTest( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "932f0063abb2a23c22ec992ef8d36aa5" );
        return CCTest.getTests(CCTest.class);
    }

@ -91,8 +91,8 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
    public Object[][] createTRTestData() {
        new TRTest( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "0b7123ae9f4155484b68e4a4f96c5504" );
        new TRTest( validationDataLocation + "NA19240.chr1.BFAST.SOLID.bam", "d04cf1f6df486e45226ebfbf93a188a5");
-        new TRTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "b2f4757bc47cf23bd9a09f756c250787" );
-        new TRTest( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "502c7df4d4923c4d078b014bf78bed34" );
+        new TRTest( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "74314e5562c1a65547bb0edaacffe602" );
+        new TRTest( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "41c2f82f7789421f3690ed3c35b8f2e4" );

        return TRTest.getTests(TRTest.class);
    }
@ -291,7 +291,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
    @Test
    public void testCountCovariatesNoIndex() {
        HashMap<String, String> e = new HashMap<String, String>();
-        e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "828d247c6e8ef5ebdf3603dc0ce79f61" );
+        e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "aac7df368ca589dc0a66d5bd9ad007e3" );

        for ( Map.Entry<String, String> entry : e.entrySet() ) {
            String bam = entry.getKey();
@ -317,7 +317,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest {
    @Test(dependsOnMethods = "testCountCovariatesNoIndex")
    public void testTableRecalibratorNoIndex() {
        HashMap<String, String> e = new HashMap<String, String>();
-        e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "991f093a0e610df235d28ada418ebf33" );
+        e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.noindex.bam", "02249d9933481052df75c58a2a1a8e63" );

        for ( Map.Entry<String, String> entry : e.entrySet() ) {
            String bam = entry.getKey();
--- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFCodecUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFCodecUnitTest.java
@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+// our package
+package org.broadinstitute.sting.utils.codecs.vcf;
+
+
+// the imports for unit testing.
+
+
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.utils.variantcontext.*;
+import org.testng.Assert;
+import org.testng.annotations.BeforeSuite;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.*;
+
+
+public class VCFCodecUnitTest extends BaseTest {
+
+    // --------------------------------------------------------------------------------
+    //
+    // Provider
+    //
+    // --------------------------------------------------------------------------------
+
+    private class AlleleClippingTestProvider extends TestDataProvider {
+        final String ref;
+        final List<Allele> alleles = new ArrayList<Allele>();
+        final int expectedClip;
+
+        private AlleleClippingTestProvider(final int expectedClip, final String ref, final String ... alleles) {
+            super(AlleleClippingTestProvider.class);
+            this.ref = ref;
+            for ( final String allele : alleles )
+                this.alleles.add(Allele.create(allele));
+            this.expectedClip = expectedClip;
+        }
+
+        @Override
+        public String toString() {
+            return String.format("ref=%s allele=%s reverse clip %d", ref, alleles, expectedClip);
+        }
+    }
+
+    @DataProvider(name = "AlleleClippingTestProvider")
+    public Object[][] MakeAlleleClippingTest() {
+        // pair clipping
+        new AlleleClippingTestProvider(0, "ATT", "CCG");
+        new AlleleClippingTestProvider(1, "ATT", "CCT");
+        new AlleleClippingTestProvider(2, "ATT", "CTT");
+        new AlleleClippingTestProvider(2, "ATT", "ATT");  // cannot completely clip allele
+
+        // triplets
+        new AlleleClippingTestProvider(0, "ATT", "CTT", "CGG");
+        new AlleleClippingTestProvider(1, "ATT", "CTT", "CGT"); // the T can go
+        new AlleleClippingTestProvider(2, "ATT", "CTT", "CTT"); // both Ts can go
+
+        return AlleleClippingTestProvider.getTests(AlleleClippingTestProvider.class);
+    }
+
+
+    @Test(dataProvider = "AlleleClippingTestProvider")
+    public void TestAlleleClipping(AlleleClippingTestProvider cfg) {
+        int result = AbstractVCFCodec.computeReverseClipping(cfg.alleles, cfg.ref, 0, 1);
+        Assert.assertEquals(result, cfg.expectedClip);
+    }
+}
--- a/public/java/test/org/broadinstitute/sting/utils/interval/IntervalIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/interval/IntervalIntegrationTest.java
@ -83,6 +83,28 @@ public class IntervalIntegrationTest extends WalkerTest {
        executeTest("testUnmappedReadInclusion",spec);
    }

+    @Test
+    public void testMixedMappedAndUnmapped() {
+        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
+                "-T PrintReads" +
+                        " -I " + validationDataLocation + "MV1994.bam" +
+                        " -R " + validationDataLocation + "Escherichia_coli_K12_MG1655.fasta" +
+                        " -L Escherichia_coli_K12:4630000-4639675" +
+                        " -L unmapped" +
+                        " -U",
+                        0, // two output files
+                        Collections.<String>emptyList());
+
+        // our base file
+        File baseOutputFile = createTempFile("testUnmappedReadInclusion",".bam");
+        spec.setOutputFileLocation(baseOutputFile);
+        spec.addAuxFile("083ef1e9ded868e0d12c05a1354c0319",createTempFileFromBase(baseOutputFile.getAbsolutePath()));
+        spec.addAuxFile("fa90ff91ac0cc689c71a3460a3530b8b", createTempFileFromBase(baseOutputFile.getAbsolutePath().substring(0,baseOutputFile.getAbsolutePath().indexOf(".bam"))+".bai"));
+
+        executeTest("testUnmappedReadInclusion",spec);
+    }
+
+
    @Test(enabled = false)
    public void testUnmappedReadExclusion() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
--- a/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java
@ -103,10 +103,31 @@ public class ReadUtilsUnitTest extends BaseTest {
        read.setReadNegativeStrandFlag(false);
        boundary = ReadUtils.getAdaptorBoundary(read);
        Assert.assertNull(boundary);
+        read.setInferredInsertSize(10);

        // Test case 6: read is unmapped
        read.setReadUnmappedFlag(true);
        boundary = ReadUtils.getAdaptorBoundary(read);
        Assert.assertNull(boundary);
+        read.setReadUnmappedFlag(false);
+
+        // Test case 7:  reads don't overlap and look like this:
+        //    <--------|
+        //                 |------>
+        // first read:
+        myStart = 980;
+        read.setAlignmentStart(myStart);
+        read.setInferredInsertSize(20);
+        read.setReadNegativeStrandFlag(true);
+        boundary = ReadUtils.getAdaptorBoundary(read);
+        Assert.assertNull(boundary);
+
+        // second read:
+        myStart = 1000;
+        read.setAlignmentStart(myStart);
+        read.setMateAlignmentStart(980);
+        read.setReadNegativeStrandFlag(false);
+        boundary = ReadUtils.getAdaptorBoundary(read);
+        Assert.assertNull(boundary);
    }
 }
--- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala
@ -134,8 +134,8 @@ class GATKResourcesBundle extends QScript {
    addResource(new Resource("/humgen/1kg/processing/official_release/phase1/ALL.wgs.VQSR_consensus_biallelic.20101123.indels.sites.vcf",
      "1000G_biallelic.indels", b37, true, false))

-    addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/Mills_Devine_Indels_2011/ALL.wgs.indels_mills_devine_hg19_leftAligned_collapsed_double_hit.sites.vcf",
-      "Mills_Devine_2hit.indels", b37, true, true))
+    addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/GoldStandardIndel/gold.standard.indel.MillsAnd1000G.b37.vcf",
+      "Mills_and_1000G_gold_standard.indels", b37, true, true))
    
    //
    // example call set for wiki tutorial
--- a/public/scala/src/org/broadinstitute/sting/queue/extensions/snpeff/SnpEff.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/snpeff/SnpEff.scala
@ -47,12 +47,16 @@ class SnpEff extends JavaCommandLineFunction {
  @Argument(doc="verbose", required=false)
  var verbose = true

+  @Argument(doc="onlyCoding", required=false)
+  var onlyCoding = true
+
  @Output(doc="snp eff output")
  var outVcf: File = _

  override def commandLine = super.commandLine +
                             required("eff") +
                             conditional(verbose, "-v") +
+                             required("-onlyCoding", onlyCoding.toString) +
                             optional("-c", config) +
                             required("-i", "vcf") +
                             required("-o", "vcf") +
--- a/settings/repository/org.broad/tribble-53.jar
+++ b/settings/repository/org.broad/tribble-53.jar
--- a/settings/repository/org.broad/tribble-53.xml
+++ b/settings/repository/org.broad/tribble-53.xml
@ -1,3 +1,3 @@
 <ivy-module version="1.0">
-    <info organisation="org.broad" module="tribble" revision="46" status="integration" />
+    <info organisation="org.broad" module="tribble" revision="53" status="integration" />
 </ivy-module>