Merge branch 'master' of ssh://gsa4.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

2012-08-19 09:23:21 -04:00 · 2012-08-19 09:23:21 -04:00 · d9641e3d57
parent b61ecc7c19 342a5b68ed
commit d9641e3d57
21 changed files with 338 additions and 112 deletions
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.variantqc.utils.R
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.variantqc.utils.R
@ -207,7 +207,7 @@ plotVariantQC <- function(metrics, measures, requestedStrat = "Sample",
  if ( requestedStrat == "Sample" ) {
    perSampleGraph <- perSampleGraph + geom_text(aes(label=strat), size=1.5) + geom_blank() # don't display a scale
-    perSampleGraph <- perSampleGraph + scale_x_discrete("Sample (ordered by nSNPs)", formatter=function(x) "")
+    perSampleGraph <- perSampleGraph + scale_x_discrete("Sample (ordered by nSNPs)")
  } else { # by AlleleCount
    perSampleGraph <- perSampleGraph + geom_point(aes(size=log10(nobs))) #+ geom_smooth(aes(weight=log10(nobs)))
    perSampleGraph <- perSampleGraph + scale_x_log10("AlleleCount")
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinition.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentDefinition.java
@ -26,6 +26,7 @@
 package org.broadinstitute.sting.commandline;
 import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import java.lang.annotation.Annotation;
 import java.util.List;
@ -147,6 +148,9 @@ public class ArgumentDefinition {
        this.exclusiveOf = exclusiveOf;
        this.validation = validation;
        this.validOptions = validOptions;
        validateName(shortName);
        validateName(fullName);
    }
    /**
@ -192,6 +196,9 @@ public class ArgumentDefinition {
        else
            shortName = null;
        validateName(shortName);
        validateName(fullName);
        this.ioType = ioType;
        this.argumentType = argumentType;
        this.fullName = fullName;
@ -277,4 +284,14 @@ public class ArgumentDefinition {
        String validation = (String)CommandLineUtils.getValue(annotation, "validation");
        return validation.trim().length() > 0 ? validation.trim() : null;
    }
    /**
     * Make sure the argument's name is valid
     *
     * @param name
     */
    private void validateName(final String name) {
        if ( name != null && name.startsWith("-") )
            throw new ReviewedStingException("Invalid argument definition: " + name + " begins with a -");
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java
@ -62,23 +62,24 @@ public class ReferenceDataSource {
     * @param fastaFile Fasta file to be used as reference
     */
    public ReferenceDataSource(File fastaFile) {
        // does the fasta file exist? check that first...
        if (!fastaFile.exists())
            throw new UserException("The fasta file you specified (" + fastaFile.getAbsolutePath() + ") does not exist.");
-        File indexFile = new File(fastaFile.getAbsolutePath() + ".fai");
+        final boolean isGzipped = fastaFile.getAbsolutePath().endsWith(".gz");
-        File dictFile;
+
-        if (fastaFile.getAbsolutePath().endsWith("fa")) {
+        final File indexFile = new File(fastaFile.getAbsolutePath() + ".fai");
-            dictFile = new File(fastaFile.getAbsolutePath().replace(".fa", ".dict"));
+
-        }
+        // determine the name for the dict file
-        else
+        final String fastaExt = (fastaFile.getAbsolutePath().endsWith("fa") ? ".fa" : ".fasta" ) + (isGzipped ? ".gz" : "");
-         dictFile = new File(fastaFile.getAbsolutePath().replace(".fasta", ".dict"));
+        final File dictFile = new File(fastaFile.getAbsolutePath().replace(fastaExt, ".dict"));
        /*
-         if index file does not exist, create it manually
+        * if index file does not exist, create it manually
-          */
+        */
        if (!indexFile.exists()) {
            if ( isGzipped ) throw new UserException.CouldNotCreateReferenceFAIorDictForGzippedRef(fastaFile);
            logger.info(String.format("Index file %s does not exist. Trying to create it now.", indexFile.getAbsolutePath()));
            FSLockWithShared indexLock = new FSLockWithShared(indexFile,true);
            try {
@ -114,6 +115,8 @@ public class ReferenceDataSource {
        * This has been filed in trac as (PIC-370) Want programmatic interface to CreateSequenceDictionary
        */
        if (!dictFile.exists()) {
            if ( isGzipped ) throw new UserException.CouldNotCreateReferenceFAIorDictForGzippedRef(fastaFile);
            logger.info(String.format("Dict file %s does not exist. Trying to create it now.", dictFile.getAbsolutePath()));
            /*
@ -218,9 +221,9 @@ public class ReferenceDataSource {
            for(int shardStart = 1; shardStart <= refSequenceRecord.getSequenceLength(); shardStart += maxShardSize) {
                final int shardStop = Math.min(shardStart+maxShardSize-1, refSequenceRecord.getSequenceLength());
                shards.add(new LocusShard(parser,
-                                          readsDataSource,
+                        readsDataSource,
-                                          Collections.singletonList(parser.createGenomeLoc(refSequenceRecord.getSequenceName(),shardStart,shardStop)),
+                        Collections.singletonList(parser.createGenomeLoc(refSequenceRecord.getSequenceName(),shardStart,shardStop)),
-                                          null));
+                        null));
            }
        }
        return shards;
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/BadCigarFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/BadCigarFilter.java
@ -29,9 +29,19 @@ import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
 import net.sf.samtools.SAMRecord;
 import java.util.Iterator;
 /**
 * Filter out reads with wonky cigar strings.
 *
 *  - No reads with Hard/Soft clips in the middle of the cigar
 *  - No reads starting with deletions (with or without preceding clips)
 *  - No reads ending in deletions (with or without follow-up clips)
 *  - No reads that are fully hard or soft clipped
 *  - No reads that have consecutive indels in the cigar (II, DD, ID or DI)
 *
 *  ps: apparently an empty cigar is okay...
 *
 * @author ebanks
 * @version 0.1
 */
@ -40,28 +50,72 @@ public class BadCigarFilter extends ReadFilter {
    public boolean filterOut(final SAMRecord rec) {
        final Cigar c = rec.getCigar();
        if( c.isEmpty() ) { return false; }                                                                             // if there is no Cigar then it can't be bad
-        boolean previousElementWasIndel = false;
+        // if there is no Cigar then it can't be bad
-        CigarOperator lastOp = c.getCigarElement(0).getOperator();
+        if( c.isEmpty() ) {
-
+            return false;
        if (lastOp == CigarOperator.D)                                                                                  // filter out reads starting with deletion
            return true;
        for (CigarElement ce : c.getCigarElements()) {
            CigarOperator op = ce.getOperator();
            if (op == CigarOperator.D || op == CigarOperator.I) {
                if (previousElementWasIndel)
                    return true;                                                                                        // filter out reads with adjacent I/D
                previousElementWasIndel = true;
            }
            else                                                                                                        // this is a regular base (match/mismatch/hard or soft clip)
                previousElementWasIndel = false;                                                                        // reset the previous element
            lastOp = op;
        }
-        return lastOp == CigarOperator.D;
+        Iterator<CigarElement> elementIterator = c.getCigarElements().iterator();
        CigarOperator firstOp = CigarOperator.H;
        while (elementIterator.hasNext() && (firstOp == CigarOperator.H || firstOp == CigarOperator.S)) {
            CigarOperator op = elementIterator.next().getOperator();
            // No reads with Hard/Soft clips in the middle of the cigar
            if (firstOp != CigarOperator.H && op == CigarOperator.H) {
                    return true;
            }
            firstOp = op;
        }
        // No reads starting with deletions (with or without preceding clips)
        if (firstOp == CigarOperator.D) {
            return true;
        }
        boolean hasMeaningfulElements = (firstOp != CigarOperator.H && firstOp != CigarOperator.S);
        boolean previousElementWasIndel = firstOp == CigarOperator.I;
        CigarOperator lastOp = firstOp;
        CigarOperator previousOp = firstOp;
        while (elementIterator.hasNext()) {
            CigarOperator op = elementIterator.next().getOperator();
            if (op != CigarOperator.S && op != CigarOperator.H) {
                // No reads with Hard/Soft clips in the middle of the cigar
                if (previousOp == CigarOperator.S || previousOp == CigarOperator.H)
                    return true;
                lastOp = op;
                if (!hasMeaningfulElements && op.consumesReadBases()) {
                    hasMeaningfulElements = true;
                }
                if (op == CigarOperator.I || op == CigarOperator.D) {
                    // No reads that have consecutive indels in the cigar (II, DD, ID or DI)
                    if (previousElementWasIndel) {
                        return true;
                    }
                    previousElementWasIndel = true;
                }
                else {
                    previousElementWasIndel = false;
                }
            }
            // No reads with Hard/Soft clips in the middle of the cigar
            else if (op == CigarOperator.S && previousOp == CigarOperator.H) {
                return true;
            }
            previousOp = op;
        }
        // No reads ending in deletions (with or without follow-up clips)
        // No reads that are fully hard or soft clipped
        return lastOp == CigarOperator.D || !hasMeaningfulElements;
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java
@ -159,7 +159,7 @@ public class LocusIteratorByState extends LocusIterator {
                    return stepForwardOnGenome();
                } else {
                    if (curElement != null && curElement.getOperator() == CigarOperator.D)
-                        throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString() + ". This is an indication of a malformed file, but the SAM spec allows reads ending in deletion. If you are sure you want to use this read, re-run your analysis with the extra option: -rf BadCigar");
+                        throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
                    // Reads that contain indels model the genomeOffset as the following base in the reference.  Because
                    // we fall into this else block only when indels end the read, increment genomeOffset  such that the
@ -185,7 +185,7 @@ public class LocusIteratorByState extends LocusIterator {
                    break;
                case D: // deletion w.r.t. the reference
                    if (readOffset < 0)             // we don't want reads starting with deletion, this is a malformed cigar string
-                        throw new UserException.MalformedBAM(read, "Read starting with deletion. Cigar: " + read.getCigarString() + ". This is an indication of a malformed file, but the SAM spec allows reads starting in deletion. If you are sure you want to use this read, re-run your analysis with the extra option: -rf BadCigar");
+                        throw new UserException.MalformedBAM(read, "read starts with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
                    // should be the same as N case
                    genomeOffset++;
                    done = true;
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java
@ -75,8 +75,6 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
        if( !dataProvider.hasReads() )
            throw new IllegalArgumentException("Unable to traverse reads; no read data is available.");
        boolean needsReferenceBasesP = WalkerManager.isRequired(walker, DataSource.REFERENCE_BASES);
        ReadView reads = new ReadView(dataProvider);
        ReadReferenceView reference = new ReadReferenceView(dataProvider);
@ -91,7 +89,7 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
            ReferenceContext refContext = null;
            // get the array of characters for the reference sequence, since we're a mapped read
-            if (needsReferenceBasesP && !read.getReadUnmappedFlag() && dataProvider.hasReference())
+            if (!read.getReadUnmappedFlag() && dataProvider.hasReference())
                refContext = reference.getReferenceContext(read);
            // update the number of reads we've seen
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
@ -28,7 +28,7 @@ import java.util.List;
 */
@By(DataSource.READS)
-@Requires({DataSource.READS, DataSource.REFERENCE_BASES})
+@Requires({DataSource.READS, DataSource.REFERENCE})
@PartitionBy(PartitionType.READ)
@ActiveRegionExtension(extension=50,maxRegion=1500)
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class})
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/DataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/DataSource.java
@ -16,8 +16,18 @@ package org.broadinstitute.sting.gatk.walkers;
 * Allow user to choose between a number of different data sources.
 */
 public enum DataSource {
    /**
     * Does this walker require read (BAM) data to work?
     */
    READS,
    /**
     * Does this walker require reference data to work?
     */
    REFERENCE,
-    REFERENCE_BASES,         // Do I actually need the reference bases passed to the walker?
+
    /**
     * Does this walker require reference order data (VCF) to work?
     */
    REFERENCE_ORDERED_DATA
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java
@ -16,7 +16,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 * To change this template use File | Settings | File Templates.
 */
@By(DataSource.READS)
-@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES})
+@Requires({DataSource.READS,DataSource.REFERENCE})
@PartitionBy(PartitionType.LOCUS)
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
@RemoveProgramRecords
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java
@ -12,7 +12,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 * Time: 2:52:28 PM
 * To change this template use File | Settings | File Templates.
 */
-@Requires({DataSource.READS, DataSource.REFERENCE_BASES})
+@Requires({DataSource.READS, DataSource.REFERENCE})
@PartitionBy(PartitionType.READ)
 public abstract class ReadWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
    public boolean requiresOrderedReads() { return false; }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/RefWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/RefWalker.java
@ -8,7 +8,7 @@ package org.broadinstitute.sting.gatk.walkers;
 * To change this template use File | Settings | File Templates.
 */
@By(DataSource.REFERENCE)
-@Requires({DataSource.REFERENCE, DataSource.REFERENCE_BASES})
+@Requires({DataSource.REFERENCE})
@Allows(DataSource.REFERENCE)
 public abstract class RefWalker<MapType, ReduceType> extends LocusWalker<MapType, ReduceType> {
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java
@ -107,7 +107,7 @@ import java.util.ArrayList;
@BAQMode(ApplicationTime = BAQ.ApplicationTime.FORBIDDEN)
@By(DataSource.READS)
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class})                                   // only look at covered loci, not every loci of the reference file
-@Requires({DataSource.READS, DataSource.REFERENCE, DataSource.REFERENCE_BASES})                                         // filter out all reads with zero or unavailable mapping quality
+@Requires({DataSource.READS, DataSource.REFERENCE})                                         // filter out all reads with zero or unavailable mapping quality
@PartitionBy(PartitionType.LOCUS)                                                                                       // this walker requires both -I input.bam and -R reference.fasta
 public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeReducible<Long> {
    @ArgumentCollection
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@ -474,10 +474,17 @@ public class UnifiedGenotyperEngine {
        // add the MLE AC and AF annotations
        if ( alleleCountsofMLE.size() > 0 ) {
            attributes.put(VCFConstants.MLE_ALLELE_COUNT_KEY, alleleCountsofMLE);
-            final double AN = (double)builder.make().getCalledChrCount();
+            final int AN = builder.make().getCalledChrCount();
            // let's sanity check that we don't have an invalid MLE value in there
            for ( int MLEAC : alleleCountsofMLE ) {
                if ( MLEAC > AN )
                    throw new ReviewedStingException(String.format("MLEAC value (%d) is larger than AN (%d) at position %s:%d", MLEAC, AN, loc.getContig(), loc.getStart()));
            }
            final ArrayList<Double> MLEfrequencies = new ArrayList<Double>(alleleCountsofMLE.size());
            for ( int AC : alleleCountsofMLE )
-                MLEfrequencies.add((double)AC / AN);
+                MLEfrequencies.add((double)AC / (double)AN);
            attributes.put(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, MLEfrequencies);
        }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java
@ -645,7 +645,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
                                bestChildGenotype.clear();
                                bestChildGenotype.add(childGenotype.getKey());
                            }
-                            else if(MathUtils.compareDoubles(configurationLikelihood, bestConfigurationLikelihood) == 0) {
+                            else if(configurationLikelihood == bestConfigurationLikelihood) {
                                bestFirstParentGenotype.add(firstParentGenotype.getKey());
                                bestSecondParentGenotype.add(secondParentGenotype.getKey());
                                bestChildGenotype.add(childGenotype.getKey());
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java
@ -118,6 +118,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
            final int sitesBlockSize = decoder.readBlockSize(inputStream);
            final int genotypeBlockSize = decoder.readBlockSize(inputStream);
            decoder.readNextBlock(sitesBlockSize, inputStream);
            decodeSiteLoc(builder);
            final SitesInfoForDecoding info = decodeSitesExtendedInfo(builder);
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java
@ -82,7 +82,7 @@ public final class BCF2Decoder {
    public void skipNextBlock(final int blockSizeInBytes, final InputStream stream) {
        try {
            final int bytesRead = (int)stream.skip(blockSizeInBytes);
-            validateReadBytes(bytesRead, blockSizeInBytes);
+            validateReadBytes(bytesRead, 1, blockSizeInBytes);
        } catch ( IOException e ) {
            throw new UserException.CouldNotReadInputFile("I/O error while reading BCF2 file", e);
        }
@ -316,17 +316,37 @@ public final class BCF2Decoder {
    }
    /**
     * Read all bytes for a BCF record block into a byte[], and return it
     *
-     * @param inputStream
+     * Is smart about reading from the stream multiple times to fill the buffer, if necessary
-     * @return
+     *
     * @param blockSizeInBytes number of bytes to read
     * @param inputStream the stream to read from
     * @return a non-null byte[] containing exactly blockSizeInBytes bytes from the inputStream
     */
-    private final static byte[] readRecordBytes(final int blockSizeInBytes, final InputStream inputStream) {
+    @Requires({"blockSizeInBytes >= 0", "inputStream != null"})
    @Ensures("result != null")
    private static byte[] readRecordBytes(final int blockSizeInBytes, final InputStream inputStream) {
        assert blockSizeInBytes >= 0;
        final byte[] record = new byte[blockSizeInBytes];
        try {
-            final int bytesRead = inputStream.read(record);
+            int bytesRead = 0;
-            validateReadBytes(bytesRead, blockSizeInBytes);
+            int nReadAttempts = 0; // keep track of how many times we've read
            // because we might not read enough bytes from the file in a single go, do it in a loop until we get EOF
            while ( bytesRead < blockSizeInBytes ) {
                final int read1 = inputStream.read(record, bytesRead, blockSizeInBytes - bytesRead);
                if ( read1 == -1 )
                    validateReadBytes(bytesRead, nReadAttempts, blockSizeInBytes);
                else
                    bytesRead += read1;
            }
            if ( nReadAttempts > 1 ) // TODO -- remove me
                logger.warn("Required multiple read attempts to actually get the entire BCF2 block, unexpected behavior");
            validateReadBytes(bytesRead, nReadAttempts, blockSizeInBytes);
        } catch ( IOException e ) {
            throw new UserException.CouldNotReadInputFile("I/O error while reading BCF2 file", e);
        }
@ -334,14 +354,20 @@ public final class BCF2Decoder {
        return record;
    }
-    private final static void validateReadBytes(final int actuallyRead, final int expected) {
+    /**
     * Make sure we read the right number of bytes, or throw an error
     *
     * @param actuallyRead
     * @param nReadAttempts
     * @param expected
     */
    private static void validateReadBytes(final int actuallyRead, final int nReadAttempts, final int expected) {
        assert expected >= 0;
        if ( actuallyRead < expected ) {
-            throw new UserException.MalformedBCF2(String.format("Failed to read next complete record: %s",
+            throw new UserException.MalformedBCF2(
-                    actuallyRead == -1 ?
+                    String.format("Failed to read next complete record: expected %d bytes but read only %d after %d iterations",
-                            "premature end of input stream" :
+                            expected, actuallyRead, nReadAttempts));
                            String.format("expected %d bytes but read only %d", expected, actuallyRead)));
        }
    }
--- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
+++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
@ -340,6 +340,17 @@ public class UserException extends ReviewedStingException {
        }
    }
    public static class CouldNotCreateReferenceFAIorDictForGzippedRef extends UserException {
        public CouldNotCreateReferenceFAIorDictForGzippedRef(final File f) {
            super("Although the GATK can process .gz reference sequences, it currently cannot create FAI " +
                    "or DICT files for them.  In order to use the GATK with reference.fasta.gz you will need to " +
                    "create .dict and .fai files for reference.fasta.gz and name them reference.fasta.gz.fai and " +
                    "reference.dict.  Potentially the easiest way to do this is to uncompress reference.fasta, " +
                    "run the GATK to create the .dict and .fai files, and copy them to the appropriate location. " +
                    "Sorry for the inconvenience.");
        }
    }
    public static class CouldNotCreateReferenceIndexFileBecauseOfLock extends UserException.CouldNotCreateReferenceIndexFile {
        public CouldNotCreateReferenceIndexFileBecauseOfLock(File f) {
            super(f, "could not be written because an exclusive file lock could not be obtained. " +
--- a/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java
+++ b/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java
@ -41,6 +41,8 @@ import java.util.Arrays;
 * Thread-safe!  Uses a lock object to protect write and access to the cache.
 */
 public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
    protected static final org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(CachingIndexedFastaSequenceFile.class);
    /** global enable flag */
    private static final boolean USE_CACHE = true;
@ -125,7 +127,7 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
    public void printEfficiency() {
        // comment out to disable tracking
        if ( (cacheHits + cacheMisses) % PRINT_FREQUENCY == 0 ) {
-            System.out.printf("### CachingIndexedFastaReader: hits=%d misses=%d efficiency %.6f%%%n", cacheHits, cacheMisses, calcEfficiency());
+            logger.info(String.format("### CachingIndexedFastaReader: hits=%d misses=%d efficiency %.6f%%%n", cacheHits, cacheMisses, calcEfficiency()));
        }
    }
--- a/public/java/test/org/broadinstitute/sting/gatk/filters/BadCigarFilterUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/filters/BadCigarFilterUnitTest.java
@ -1,11 +1,14 @@
 package org.broadinstitute.sting.gatk.filters;
-import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
+import net.sf.samtools.Cigar;
 import org.broadinstitute.sting.utils.clipping.ReadClipperTestUtils;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.testng.Assert;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 import java.util.List;
 /**
 * Checks that the Bad Cigar filter works for all kinds of wonky cigars
 *
@ -14,6 +17,29 @@ import org.testng.annotations.Test;
 */
 public class BadCigarFilterUnitTest {
    public static final String[] BAD_CIGAR_LIST = {
            "2D4M",               // starting with multiple deletions
            "4M2D",               // ending with multiple deletions
            "3M1I1D",             // adjacent indels AND ends in deletion
            "1M1I1D2M",           // adjacent indels I->D
            "1M1D2I1M",           // adjacent indels D->I
            "1M1I2M1D",           // ends in single deletion with insertion in the middle
            "4M1D",               // ends in single deletion
            "1D4M",               // starts with single deletion
            "2M1D1D2M",           // adjacent D's
            "1M1I1I1M",           // adjacent I's
            "1H1D4M",             // starting with deletion after H
            "1S1D3M",             // starting with deletion after S
            "1H1S1D3M",           // starting with deletion after HS
            "4M1D1H",             // ending with deletion before H
            "3M1D1S",             // ending with deletion before S
            "3M1D1S1H",           // ending with deletion before HS
            "10M2H10M",           // H in the middle
            "10M2S10M",           // S in the middle
            "1H1S10M2S10M1S1H",    // deceiving S in the middle
            "1H1S10M2H10M1S1H"    // deceiving H in the middle
    };
    BadCigarFilter filter;
    @BeforeClass
@ -21,40 +47,20 @@ public class BadCigarFilterUnitTest {
        filter = new BadCigarFilter();
    }
-    @Test
+    @Test(enabled = true)
    public void testWonkyCigars () {
-        byte[] bases = {'A', 'A', 'A', 'A'};
+        for (String cigarString : BAD_CIGAR_LIST) {
-        byte[] quals = {30, 30, 30, 30};
+            GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigarString);
-        GATKSAMRecord read;
+            Assert.assertTrue(filter.filterOut(read), read.getCigarString());
-                                                                                                                        // starting with multiple deletions
+        }
-        read = ArtificialSAMUtils.createArtificialRead(bases, quals, "2D4M");
+    }
        Assert.assertTrue(filter.filterOut(read), read.getCigarString());
-        read = ArtificialSAMUtils.createArtificialRead(bases, quals, "4M2D");                                           // ending with multiple deletions
+    @Test(enabled = true)
-        Assert.assertTrue(filter.filterOut(read), read.getCigarString());
+    public void testGoodCigars() {
-
+        List<Cigar> cigarList = ReadClipperTestUtils.generateCigarList(10);
-        read = ArtificialSAMUtils.createArtificialRead(bases, quals, "3M1I1D");                                         // adjacent indels AND ends in deletion
+        for (Cigar cigar : cigarList) {
-        Assert.assertTrue(filter.filterOut(read), read.getCigarString());
+            GATKSAMRecord read = ReadClipperTestUtils.makeReadFromCigar(cigar);
-
+            Assert.assertFalse(filter.filterOut(read), read.getCigarString());
-        read = ArtificialSAMUtils.createArtificialRead(bases, quals, "1M1I1D2M");                                       // adjacent indels I->D
+        }
        Assert.assertTrue(filter.filterOut(read), read.getCigarString());
        read = ArtificialSAMUtils.createArtificialRead(bases, quals, "1M1D2I1M");                                       // adjacent indels D->I
        Assert.assertTrue(filter.filterOut(read), read.getCigarString());
        read = ArtificialSAMUtils.createArtificialRead(bases, quals, "1M1I2M1D");                                       // ends in single deletion with insertion in the middle
        Assert.assertTrue(filter.filterOut(read), read.getCigarString());
        read = ArtificialSAMUtils.createArtificialRead(bases, quals, "4M1D");                                           // ends in single deletion
        Assert.assertTrue(filter.filterOut(read), read.getCigarString());
        read = ArtificialSAMUtils.createArtificialRead(bases, quals, "1D4M");                                           // starts with single deletion
        Assert.assertTrue(filter.filterOut(read), read.getCigarString());
        read = ArtificialSAMUtils.createArtificialRead(bases, quals, "2M1D1D2M");                                       // adjacent D's
        Assert.assertTrue(filter.filterOut(read), read.getCigarString());
        read = ArtificialSAMUtils.createArtificialRead(bases, quals, "1M1I1I1M");                                       // adjacent I's
        Assert.assertTrue(filter.filterOut(read), read.getCigarString());
    }
 }
--- a/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java
+++ b/public/java/test/org/broadinstitute/sting/utils/clipping/ReadClipperTestUtils.java
@ -4,6 +4,7 @@ import net.sf.samtools.Cigar;
 import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
 import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.testng.Assert;
@ -37,17 +38,22 @@ public class ReadClipperTestUtils {
        return ArtificialSAMUtils.createArtificialRead(Utils.arrayFromArrayWithLength(BASES, cigar.getReadLength()), Utils.arrayFromArrayWithLength(QUALS, cigar.getReadLength()), cigar.toString());
    }
-    /**
+    public static GATKSAMRecord makeReadFromCigar(String cigarString) {
-     * This function generates every valid permutation of cigar strings with a given length.
+        return makeReadFromCigar(cigarFromString(cigarString));
-     *
+    }
-     * A valid cigar object obeys the following rules:
+
-     *  - No Hard/Soft clips in the middle of the read
+        /**
-     *  - No deletions in the beginning / end of the read
+        * This function generates every valid permutation of cigar strings with a given length.
-     *  - No repeated adjacent element (e.g. 1M2M -> this should be 3M)
+        *
-     *
+        * A valid cigar object obeys the following rules:
-     * @param maximumLength the maximum number of elements in the cigar
+        *  - No Hard/Soft clips in the middle of the read
-     * @return a list with all valid Cigar objects
+        *  - No deletions in the beginning / end of the read
-     */
+        *  - No repeated adjacent element (e.g. 1M2M -> this should be 3M)
        *  - No consecutive I/D elements
        *
        * @param maximumLength the maximum number of elements in the cigar
        * @return a list with all valid Cigar objects
        */
    public static List<Cigar> generateCigarList(int maximumLength) {
        int numCigarElements = cigarElements.length;
        LinkedList<Cigar> cigarList = new LinkedList<Cigar>();
@ -137,7 +143,10 @@ public class ReadClipperTestUtils {
        CigarElement lastElement = null;
        int lastElementLength = 0;
        for (CigarElement cigarElement : rawCigar.getCigarElements()) {
-            if (lastElement != null && lastElement.getOperator() == cigarElement.getOperator())
+            if (lastElement != null &&
                    ((lastElement.getOperator() == cigarElement.getOperator()) ||
                     (lastElement.getOperator() == CigarOperator.I && cigarElement.getOperator() == CigarOperator.D) ||
                     (lastElement.getOperator() == CigarOperator.D && cigarElement.getOperator() == CigarOperator.I)))
                lastElementLength += cigarElement.getLength();
            else
            {
@ -191,7 +200,7 @@ public class ReadClipperTestUtils {
    /**
     * Checks whether or not the read has any cigar element that is not H or S
     *
-     * @param read
+     * @param read the read
     * @return true if it has any M, I or D, false otherwise
     */
    public static boolean readHasNonClippedBases(GATKSAMRecord read) {
@ -201,5 +210,79 @@ public class ReadClipperTestUtils {
        return false;
    }
    public static Cigar cigarFromString(String cigarString) {
        Cigar cigar = new Cigar();
        boolean isNumber = false;
        int number = 0;
        for (int i = 0; i < cigarString.length(); i++) {
            char x = cigarString.charAt(i);
            if (x >= '0' && x <='9') {
                if (isNumber) {
                    number *= 10;
                }
                else {
                    isNumber = true;
                }
                number += x - '0';
            }
            else {
                CigarElement e;
                switch (x) {
                    case 'M':
                    case 'm':
                        e = new CigarElement(number, CigarOperator.M);
                    break;
                    case 'I':
                    case 'i':
                        e = new CigarElement(number, CigarOperator.I);
                    break;
                    case 'D':
                    case 'd':
                        e = new CigarElement(number, CigarOperator.D);
                    break;
                    case 'S':
                    case 's':
                        e = new CigarElement(number, CigarOperator.S);
                    break;
                    case 'N':
                    case 'n':
                        e = new CigarElement(number, CigarOperator.N);
                    break;
                    case 'H':
                    case 'h':
                        e = new CigarElement(number, CigarOperator.H);
                    break;
                    case 'P':
                    case 'p':
                        e = new CigarElement(number, CigarOperator.P);
                    break;
                    case '=':
                        e = new CigarElement(number, CigarOperator.EQ);
                    break;
                    case 'X':
                    case 'x':
                        e = new CigarElement(number, CigarOperator.X);
                    break;
                    default:
                        throw new ReviewedStingException("Unrecognized cigar operator: " + x + " (number: " + number + ")");
                }
                cigar.add(e);
            }
        }
        return cigar;
    }
 }
--- a/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala
@ -35,7 +35,7 @@ import org.broadinstitute.sting.queue.engine.{RunnerStatus, CommandLineJobRunner
 import java.util.regex.Pattern
 import java.lang.StringBuffer
 import java.util.Date
-import com.sun.jna.{Structure, StringArray, NativeLong}
+import com.sun.jna.{Pointer, Structure, StringArray, NativeLong}
 import com.sun.jna.ptr.IntByReference
 /**
@ -295,9 +295,17 @@ object Lsf706JobRunner extends Logging {
      // the platform LSF startTimes are in seconds, not milliseconds, so convert to the java convention
      runner.getRunInfo.startTime = new Date(jobInfo.startTime.longValue * 1000)
      runner.getRunInfo.doneTime = new Date(jobInfo.endTime.longValue * 1000)
-      val exHostsRaw = jobInfo.exHosts.getStringArray(0)
+
-      //logger.warn("exHostsRaw = " + exHostsRaw)
+      val exHostsList =
-      val exHostsList = exHostsRaw.toSeq
+        if (jobInfo.numExHosts != 1) {
          // this is necessary because
          val exHostsString = "multipleHosts_" + jobInfo.numExHosts
          logger.debug("numExHosts = " + jobInfo.numExHosts + " != 1 for job " + runner.jobId + ", cannot safely get exhosts, setting to " + exHostsString)
          List(exHostsString)
        } else {
          jobInfo.exHosts.getStringArray(0).toSeq
        }
      //logger.warn("exHostsList = " + exHostsList)
      val exHosts = exHostsList.reduceLeft(_ + "," + _)
      //logger.warn("exHosts = " + exHosts)