Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable

2011-12-16 15:11:59 -05:00 · 2011-12-16 15:11:59 -05:00 · 1179588475
parent 0e0d022e58 5aa79dacfc
commit 1179588475
29 changed files with 1717 additions and 656 deletions
--- a/public/java/src/net/sf/picard/sam/MergingSamRecordIterator.java
+++ b/public/java/src/net/sf/picard/sam/MergingSamRecordIterator.java
@ -0,0 +1,247 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+package net.sf.picard.sam;
+
+import net.sf.picard.PicardException;
+
+import java.util.*;
+import java.lang.reflect.Constructor;
+
+import net.sf.samtools.*;
+import net.sf.samtools.util.CloseableIterator;
+
+/**
+ * Provides an iterator interface for merging multiple underlying iterators into a single
+ * iterable stream. The underlying iterators/files must all have the same sort order unless
+ * the requested output format is unsorted, in which case any combination is valid.
+ */
+public class MergingSamRecordIterator implements CloseableIterator<SAMRecord> {
+    private final PriorityQueue<ComparableSamRecordIterator> pq;
+    private final SamFileHeaderMerger samHeaderMerger;
+    private final Collection<SAMFileReader> readers;
+    private final SAMFileHeader.SortOrder sortOrder;
+    private final SAMRecordComparator comparator;
+
+    private boolean initialized = false;
+    private boolean iterationStarted = false;
+
+    /**
+     * Constructs a new merging iterator with the same set of readers and sort order as
+     * provided by the header merger parameter.
+     * @param headerMerger The merged header and contents of readers.
+     * @param forcePresorted True to ensure that the iterator checks the headers of the readers for appropriate sort order.
+     * @deprecated replaced by (SamFileHeaderMerger, Collection<SAMFileReader>, boolean)
+     */
+    public MergingSamRecordIterator(final SamFileHeaderMerger headerMerger, final boolean forcePresorted) {
+        this(headerMerger, headerMerger.getReaders(), forcePresorted);
+    }
+
+    /**
+     * Constructs a new merging iterator with the same set of readers and sort order as
+     * provided by the header merger parameter.
+     * @param headerMerger The merged header and contents of readers.
+     * @param assumeSorted false ensures that the iterator checks the headers of the readers for appropriate sort order.
+     */
+    public MergingSamRecordIterator(final SamFileHeaderMerger headerMerger, Collection<SAMFileReader> readers, final boolean assumeSorted) {
+        this.samHeaderMerger = headerMerger;
+        this.sortOrder = headerMerger.getMergedHeader().getSortOrder();
+        this.comparator = getComparator();
+        this.readers = readers;
+
+        this.pq = new PriorityQueue<ComparableSamRecordIterator>(readers.size());
+
+        for (final SAMFileReader reader : readers) {
+            if (!assumeSorted && this.sortOrder != SAMFileHeader.SortOrder.unsorted &&
+                    reader.getFileHeader().getSortOrder() != this.sortOrder){
+                throw new PicardException("Files are not compatible with sort order");
+            }
+        }
+    }
+
+    /**
+     * Add a given SAM file iterator to the merging iterator.  Use this to restrict the merged iteration to a given genomic interval,
+     * rather than iterating over every read in the backing file or stream.
+     * @param reader Reader to add to the merging iterator.
+     * @param iterator Iterator traversing over reader contents.
+     */
+    public void addIterator(final SAMFileReader reader, final CloseableIterator<SAMRecord> iterator) {
+        if(iterationStarted)
+            throw new PicardException("Cannot add another iterator; iteration has already begun");
+        if(!samHeaderMerger.containsHeader(reader.getFileHeader()))
+            throw new PicardException("All iterators to be merged must be accounted for in the SAM header merger");
+        final ComparableSamRecordIterator comparableIterator = new ComparableSamRecordIterator(reader,iterator,comparator);
+        addIfNotEmpty(comparableIterator);
+        initialized = true;
+    }
+
+    private void startIterationIfRequired() {
+        if(initialized)
+            return;
+        for(SAMFileReader reader: readers)
+            addIterator(reader,reader.iterator());
+        iterationStarted = true;
+    }
+
+    /**
+     * Close down all open iterators.
+     */
+    public void close() {
+        // Iterators not in the priority queue have already been closed; only close down the iterators that are still in the priority queue.
+        for(CloseableIterator<SAMRecord> iterator: pq)
+            iterator.close();
+    }
+
+    /** Returns true if any of the underlying iterators has more records, otherwise false. */
+    public boolean hasNext() {
+        startIterationIfRequired();
+        return !this.pq.isEmpty();
+    }
+
+    /** Returns the next record from the top most iterator during merging. */
+    public SAMRecord next() {
+        startIterationIfRequired();
+        
+        final ComparableSamRecordIterator iterator = this.pq.poll();
+        final SAMRecord record = iterator.next();
+        addIfNotEmpty(iterator);
+        record.setHeader(this.samHeaderMerger.getMergedHeader());
+
+        // Fix the read group if needs be
+        if (this.samHeaderMerger.hasReadGroupCollisions()) {
+            final String oldGroupId = (String) record.getAttribute(ReservedTagConstants.READ_GROUP_ID);
+            if (oldGroupId != null ) {
+                final String newGroupId = this.samHeaderMerger.getReadGroupId(iterator.getReader().getFileHeader(),oldGroupId);
+                record.setAttribute(ReservedTagConstants.READ_GROUP_ID, newGroupId); 
+                }
+        }
+
+        // Fix the program group if needs be
+        if (this.samHeaderMerger.hasProgramGroupCollisions()) { 
+            final String oldGroupId = (String) record.getAttribute(ReservedTagConstants.PROGRAM_GROUP_ID);
+            if (oldGroupId != null ) {
+                final String newGroupId = this.samHeaderMerger.getProgramGroupId(iterator.getReader().getFileHeader(),oldGroupId);
+                record.setAttribute(ReservedTagConstants.PROGRAM_GROUP_ID, newGroupId); 
+                }
+        }
+
+        // Fix up the sequence indexes if needs be
+        if (this.samHeaderMerger.hasMergedSequenceDictionary()) {
+            if (record.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
+                record.setReferenceIndex(this.samHeaderMerger.getMergedSequenceIndex(iterator.getReader().getFileHeader(),record.getReferenceIndex()));
+            }
+
+            if (record.getReadPairedFlag() && record.getMateReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
+                record.setMateReferenceIndex(this.samHeaderMerger.getMergedSequenceIndex(iterator.getReader().getFileHeader(),record.getMateReferenceIndex()));
+            }
+        }
+
+        return record;
+    }
+
+    /**
+     * Adds iterator to priority queue. If the iterator has more records it is added
+     * otherwise it is closed and not added.
+     */
+    private void addIfNotEmpty(final ComparableSamRecordIterator iterator) {
+        if (iterator.hasNext()) {
+            pq.offer(iterator);
+        }
+        else {
+            iterator.close();
+        }
+    }
+
+    /** Unsupported operation. */
+    public void remove() {
+        throw new UnsupportedOperationException("MergingSAMRecorderIterator.remove()");
+    }
+
+    /**
+     * Get the right comparator for a given sort order (coordinate, alphabetic). In the
+     * case of "unsorted" it will return a comparator that gives an arbitrary but reflexive
+     * ordering.
+     */
+    private SAMRecordComparator getComparator() {
+        // For unsorted build a fake comparator that compares based on object ID
+        if (this.sortOrder == SAMFileHeader.SortOrder.unsorted) {
+            return new SAMRecordComparator() {
+                public int fileOrderCompare(final SAMRecord lhs, final SAMRecord rhs) {
+                    return System.identityHashCode(lhs) - System.identityHashCode(rhs);
+                }
+
+                public int compare(final SAMRecord lhs, final SAMRecord rhs) {
+                    return fileOrderCompare(lhs, rhs);
+                }
+            };
+        }
+        if (samHeaderMerger.hasMergedSequenceDictionary() && sortOrder.equals(SAMFileHeader.SortOrder.coordinate)) {
+            return new MergedSequenceDictionaryCoordinateOrderComparator();
+        }
+
+        // Otherwise try and figure out what kind of comparator to return and build it
+        return this.sortOrder.getComparatorInstance();
+    }
+
+    /** Returns the merged header that the merging iterator is working from. */
+    public SAMFileHeader getMergedHeader() {
+        return this.samHeaderMerger.getMergedHeader();
+    }
+
+    /**
+     * Ugh.  Basically does a regular coordinate compare, but looks up the sequence indices in the merged
+     * sequence dictionary.  I hate the fact that this extends SAMRecordCoordinateComparator, but it avoids
+     * more copy & paste.
+     */
+    private class MergedSequenceDictionaryCoordinateOrderComparator extends SAMRecordCoordinateComparator {
+
+        public int fileOrderCompare(final SAMRecord samRecord1, final SAMRecord samRecord2) {
+            final int referenceIndex1 = getReferenceIndex(samRecord1);
+            final int referenceIndex2 = getReferenceIndex(samRecord2);
+            if (referenceIndex1 != referenceIndex2) {
+                if (referenceIndex1 == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
+                    return 1;
+                } else if (referenceIndex2 == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
+                    return -1;
+                } else {
+                    return referenceIndex1 - referenceIndex2;
+                }
+            }
+            if (referenceIndex1 == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
+                // Both are unmapped.
+                return 0;
+            }
+            return samRecord1.getAlignmentStart() - samRecord2.getAlignmentStart();
+        }
+
+        private int getReferenceIndex(final SAMRecord samRecord) {
+            if (samRecord.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
+                return samHeaderMerger.getMergedSequenceIndex(samRecord.getHeader(), samRecord.getReferenceIndex());
+            }
+            if (samRecord.getMateReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
+                return samHeaderMerger.getMergedSequenceIndex(samRecord.getHeader(), samRecord.getMateReferenceIndex());
+            }
+            return SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
+        }
+    }
+}
--- a/public/java/src/net/sf/picard/sam/SamFileHeaderMerger.java
+++ b/public/java/src/net/sf/picard/sam/SamFileHeaderMerger.java
@ -0,0 +1,744 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package net.sf.picard.sam;
+
+import java.util.*;
+
+import net.sf.picard.PicardException;
+import net.sf.samtools.AbstractSAMHeaderRecord;
+import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMFileReader;
+import net.sf.samtools.SAMProgramRecord;
+import net.sf.samtools.SAMReadGroupRecord;
+import net.sf.samtools.SAMSequenceDictionary;
+import net.sf.samtools.SAMSequenceRecord;
+import net.sf.samtools.util.SequenceUtil;
+
+/**
+ * Merges SAMFileHeaders that have the same sequences into a single merged header
+ * object while providing read group translation for cases where read groups
+ * clash across input headers.
+ */
+public class SamFileHeaderMerger {
+    //Super Header to construct
+    private final SAMFileHeader mergedHeader;
+    private Collection<SAMFileReader> readers;
+    private final Collection<SAMFileHeader> headers;
+
+    //Translation of old group ids to new group ids
+    private final Map<SAMFileHeader, Map<String, String>> samReadGroupIdTranslation =
+            new IdentityHashMap<SAMFileHeader, Map<String, String>>();
+
+    //the read groups from different files use the same group ids
+    private boolean hasReadGroupCollisions = false;
+
+    //the program records from different files use the same program record ids
+    private boolean hasProgramGroupCollisions = false;
+
+    //Translation of old program group ids to new program group ids
+    private Map<SAMFileHeader, Map<String, String>> samProgramGroupIdTranslation =
+            new IdentityHashMap<SAMFileHeader, Map<String, String>>();
+
+    private boolean hasMergedSequenceDictionary = false;
+
+    // Translation of old sequence dictionary ids to new dictionary ids
+    // This is an IdentityHashMap because it can be quite expensive to compute the hashCode for
+    // large SAMFileHeaders.  It is possible that two input files will have identical headers so that
+    // the regular HashMap would fold them together, but the value stored in each of the two
+    // Map entries will be the same, so it should not hurt anything.
+    private final Map<SAMFileHeader,  Map<Integer, Integer>> samSeqDictionaryIdTranslationViaHeader =
+            new IdentityHashMap<SAMFileHeader, Map<Integer, Integer>>();
+
+    //HeaderRecordFactory that creates SAMReadGroupRecord instances.
+    private static final HeaderRecordFactory<SAMReadGroupRecord> READ_GROUP_RECORD_FACTORY = new HeaderRecordFactory<SAMReadGroupRecord>() {
+        public SAMReadGroupRecord createRecord(String id, SAMReadGroupRecord srcReadGroupRecord) {
+            return new SAMReadGroupRecord(id, srcReadGroupRecord);
+        }
+    };
+
+    //HeaderRecordFactory that creates SAMProgramRecord instances.
+    private static final HeaderRecordFactory<SAMProgramRecord> PROGRAM_RECORD_FACTORY = new HeaderRecordFactory<SAMProgramRecord>() {
+        public SAMProgramRecord createRecord(String id, SAMProgramRecord srcProgramRecord) {
+            return new SAMProgramRecord(id, srcProgramRecord);
+        }
+    };
+
+    //comparator used to sort lists of program group and read group records
+    private static final Comparator<AbstractSAMHeaderRecord> RECORD_ID_COMPARATOR = new Comparator<AbstractSAMHeaderRecord>() {
+        public int compare(AbstractSAMHeaderRecord o1, AbstractSAMHeaderRecord o2) {
+            return o1.getId().compareTo(o2.getId());
+        }
+    };
+
+    /**
+     * Create SAMFileHeader with additional information.  Required that sequence dictionaries agree.
+     *
+     * @param readers sam file readers to combine
+     * @param sortOrder sort order new header should have
+     * @deprecated replaced by SamFileHeaderMerger(Collection<SAMFileHeader>, SAMFileHeader.SortOrder, boolean)
+     */
+    public SamFileHeaderMerger(final Collection<SAMFileReader> readers, final SAMFileHeader.SortOrder sortOrder) {
+        this(readers, sortOrder, false);
+    }
+
+    /**
+     * Create SAMFileHeader with additional information.
+     *
+     * @param readers sam file readers to combine
+     * @param sortOrder sort order new header should have
+     * @param mergeDictionaries If true, merge sequence dictionaries in new header.  If false, require that
+     * all input sequence dictionaries be identical.
+     * @deprecated replaced by SamFileHeaderMerger(Collection<SAMFileHeader>, SAMFileHeader.SortOrder, boolean)
+     */
+    public SamFileHeaderMerger(final Collection<SAMFileReader> readers, final SAMFileHeader.SortOrder sortOrder, final boolean mergeDictionaries) {
+        this(sortOrder, getHeadersFromReaders(readers), mergeDictionaries);
+        this.readers = readers;
+    }
+
+    /**
+     * Create SAMFileHeader with additional information..  This is the preferred constructor.
+     *
+     * @param sortOrder sort order new header should have
+     * @param headers sam file headers to combine
+     * @param mergeDictionaries If true, merge sequence dictionaries in new header.  If false, require that
+     * all input sequence dictionaries be identical.
+     */
+    public SamFileHeaderMerger(final SAMFileHeader.SortOrder sortOrder, final Collection<SAMFileHeader> headers, final boolean mergeDictionaries) {
+        this.headers = headers;
+        this.mergedHeader = new SAMFileHeader();
+
+        SAMSequenceDictionary sequenceDictionary;
+        try {
+            sequenceDictionary = getSequenceDictionary(headers);
+            this.hasMergedSequenceDictionary = false;
+        }
+        catch (SequenceUtil.SequenceListsDifferException pe) {
+            if (mergeDictionaries) {
+                sequenceDictionary = mergeSequenceDictionaries(headers);
+                this.hasMergedSequenceDictionary = true;
+            }
+            else {
+                throw pe;
+            }
+        }
+
+        this.mergedHeader.setSequenceDictionary(sequenceDictionary);
+
+        // Set program that creates input alignments
+        for (final SAMProgramRecord program : mergeProgramGroups(headers)) {
+            this.mergedHeader.addProgramRecord(program);
+        }
+
+        // Set read groups for merged header
+        final List<SAMReadGroupRecord> readGroups = mergeReadGroups(headers);
+        this.mergedHeader.setReadGroups(readGroups);
+        this.mergedHeader.setGroupOrder(SAMFileHeader.GroupOrder.none);
+
+        this.mergedHeader.setSortOrder(sortOrder);
+
+        for (final SAMFileHeader header : headers) {
+            for (final String comment : header.getComments()) {
+                this.mergedHeader.addComment(comment);
+            }
+        }
+    }
+
+    // Utilility method to make use with old constructor
+    private static List<SAMFileHeader> getHeadersFromReaders(Collection<SAMFileReader> readers) {
+        List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(readers.size());
+        for (SAMFileReader reader : readers) {
+            headers.add(reader.getFileHeader());
+        }
+        return headers;
+    }
+
+
+    /**
+     * Checks to see if there are clashes where different readers are using the same read
+     * group IDs. If yes, then those IDs that collided are remapped.
+     *
+     * @param headers headers to combine
+     * @return new list of read groups constructed from all the readers
+     */
+    private List<SAMReadGroupRecord> mergeReadGroups(final Collection<SAMFileHeader> headers) {
+        //prepare args for mergeHeaderRecords(..) call
+        final HashSet<String> idsThatAreAlreadyTaken = new HashSet<String>();
+
+        final List<HeaderRecordAndFileHeader<SAMReadGroupRecord>> readGroupsToProcess = new LinkedList<HeaderRecordAndFileHeader<SAMReadGroupRecord>>();
+        for (final SAMFileHeader header : headers) {
+            for (final SAMReadGroupRecord readGroup : header.getReadGroups()) {
+                //verify that there are no existing id collisions in this input file
+                if(!idsThatAreAlreadyTaken.add(readGroup.getId()))
+                    throw new PicardException("Input file: " + header + " contains more than one RG with the same id (" + readGroup.getId() + ")");
+
+                readGroupsToProcess.add(new HeaderRecordAndFileHeader<SAMReadGroupRecord>(readGroup, header));
+            }
+            idsThatAreAlreadyTaken.clear();
+        }
+
+        final List<SAMReadGroupRecord> result = new LinkedList<SAMReadGroupRecord>();
+
+        hasReadGroupCollisions = mergeHeaderRecords(readGroupsToProcess, READ_GROUP_RECORD_FACTORY, idsThatAreAlreadyTaken, samReadGroupIdTranslation, result);
+
+        //sort the result list by record id
+        Collections.sort(result, RECORD_ID_COMPARATOR);
+
+        return result;
+    }
+
+
+    /**
+     * Checks to see if there are clashes where different readers are using the same program
+     * group IDs. If yes, then those IDs that collided are remapped.
+     *
+     * @param headers headers to combine
+     * @return new list of program groups constructed from all the readers
+     */
+    private List<SAMProgramRecord> mergeProgramGroups(final Collection<SAMFileHeader> headers) {
+
+        final List<SAMProgramRecord> overallResult = new LinkedList<SAMProgramRecord>();
+
+        //this Set will accumulate all SAMProgramRecord ids that have been encountered so far.
+        final HashSet<String> idsThatAreAlreadyTaken = new HashSet<String>();
+
+        //need to process all program groups
+        List<HeaderRecordAndFileHeader<SAMProgramRecord>> programGroupsLeftToProcess = new LinkedList<HeaderRecordAndFileHeader<SAMProgramRecord>>();
+        for (final SAMFileHeader header : headers) {
+            for (final SAMProgramRecord programGroup : header.getProgramRecords()) {
+                //verify that there are no existing id collisions in this input file
+                if(!idsThatAreAlreadyTaken.add(programGroup.getId()))
+                    throw new PicardException("Input file: " + header + " contains more than one PG with the same id (" + programGroup.getId() + ")");
+
+                programGroupsLeftToProcess.add(new HeaderRecordAndFileHeader<SAMProgramRecord>(programGroup, header));
+            }
+            idsThatAreAlreadyTaken.clear();
+        }
+
+        //A program group header (lets say ID=2 PN=B PP=1) may have a PP (previous program) attribute which chains it to
+        //another program group header (lets say ID=1 PN=A) to indicate that the given file was
+        //processed by program A followed by program B. These PP attributes potentially
+        //connect headers into one or more tree structures. Merging is done by
+        //first merging all headers that don't have PP attributes (eg. tree roots),
+        //then updating and merging all headers whose PPs point to the tree-root headers,
+        //and so on until all program group headers are processed.
+
+        //currentProgramGroups is the list of records to merge next. Start by merging the programGroups that don't have a PP attribute (eg. the tree roots).
+        List< HeaderRecordAndFileHeader<SAMProgramRecord> > currentProgramGroups = new LinkedList<HeaderRecordAndFileHeader<SAMProgramRecord>>();
+        for(final Iterator<HeaderRecordAndFileHeader<SAMProgramRecord>> programGroupsLeftToProcessIterator = programGroupsLeftToProcess.iterator(); programGroupsLeftToProcessIterator.hasNext(); ) {
+            final HeaderRecordAndFileHeader<SAMProgramRecord> pair = programGroupsLeftToProcessIterator.next();
+            if(pair.getHeaderRecord().getAttribute(SAMProgramRecord.PREVIOUS_PROGRAM_GROUP_ID_TAG) == null) {
+                programGroupsLeftToProcessIterator.remove();
+                currentProgramGroups.add(pair);
+            }
+        }
+
+        //merge currentProgramGroups
+        while(!currentProgramGroups.isEmpty())
+        {
+            final List<SAMProgramRecord> currentResult = new LinkedList<SAMProgramRecord>();
+
+            hasProgramGroupCollisions |= mergeHeaderRecords(currentProgramGroups, PROGRAM_RECORD_FACTORY, idsThatAreAlreadyTaken, samProgramGroupIdTranslation, currentResult);
+
+            //add currentResults to overallResults
+            overallResult.addAll(currentResult);
+
+            //apply the newly-computed id translations to currentProgramGroups and programGroupsLeftToProcess
+            currentProgramGroups = translateIds(currentProgramGroups, samProgramGroupIdTranslation, false);
+            programGroupsLeftToProcess = translateIds(programGroupsLeftToProcess, samProgramGroupIdTranslation, true);
+
+            //find all records in programGroupsLeftToProcess whose ppId points to a record that was just processed (eg. a record that's in currentProgramGroups),
+            //and move them to the list of programGroupsToProcessNext.
+            LinkedList<HeaderRecordAndFileHeader<SAMProgramRecord>> programGroupsToProcessNext = new LinkedList<HeaderRecordAndFileHeader<SAMProgramRecord>>();
+            for(final Iterator<HeaderRecordAndFileHeader<SAMProgramRecord>> programGroupsLeftToProcessIterator = programGroupsLeftToProcess.iterator(); programGroupsLeftToProcessIterator.hasNext(); ) {
+                final HeaderRecordAndFileHeader<SAMProgramRecord> pairLeftToProcess = programGroupsLeftToProcessIterator.next();
+                final Object ppIdOfRecordLeftToProcess = pairLeftToProcess.getHeaderRecord().getAttribute(SAMProgramRecord.PREVIOUS_PROGRAM_GROUP_ID_TAG);
+                //find what currentProgramGroups this ppId points to (NOTE: they have to come from the same file)
+                for(final HeaderRecordAndFileHeader<SAMProgramRecord> justProcessedPair : currentProgramGroups) {
+                    String idJustProcessed = justProcessedPair.getHeaderRecord().getId();
+                    if(pairLeftToProcess.getFileHeader() == justProcessedPair.getFileHeader() && ppIdOfRecordLeftToProcess.equals(idJustProcessed)) {
+                        programGroupsLeftToProcessIterator.remove();
+                        programGroupsToProcessNext.add(pairLeftToProcess);
+                        break;
+                    }
+                }
+            }
+
+            currentProgramGroups = programGroupsToProcessNext;
+        }
+
+        //verify that all records were processed
+        if(!programGroupsLeftToProcess.isEmpty()) {
+            StringBuffer errorMsg = new StringBuffer(programGroupsLeftToProcess.size() + " program groups weren't processed. Do their PP ids point to existing PGs? \n");
+            for( final HeaderRecordAndFileHeader<SAMProgramRecord> pair : programGroupsLeftToProcess ) {
+                SAMProgramRecord record = pair.getHeaderRecord();
+                errorMsg.append("@PG ID:"+record.getProgramGroupId()+" PN:"+record.getProgramName()+" PP:"+record.getPreviousProgramGroupId() +"\n");
+            }
+            throw new PicardException(errorMsg.toString());
+        }
+
+        //sort the result list by record id
+        Collections.sort(overallResult, RECORD_ID_COMPARATOR);
+
+        return overallResult;
+    }
+
+
+    /**
+     * Utility method that takes a list of program groups and remaps all their
+     * ids (including ppIds if requested) using the given idTranslationTable.
+     *
+     * NOTE: when remapping, this method creates new SAMProgramRecords and
+     * doesn't mutate any records in the programGroups list.
+     *
+     * @param programGroups The program groups to translate.
+     * @param idTranslationTable The translation table.
+     * @param translatePpIds Whether ppIds should be translated as well.
+     *
+     * @return The list of translated records.
+     */
+    private List<HeaderRecordAndFileHeader<SAMProgramRecord>> translateIds(
+            List<HeaderRecordAndFileHeader<SAMProgramRecord>> programGroups,
+            Map<SAMFileHeader, Map<String, String>> idTranslationTable,
+            boolean translatePpIds) {
+
+        //go through programGroups and translate any IDs and PPs based on the idTranslationTable.
+        List<HeaderRecordAndFileHeader<SAMProgramRecord>> result = new LinkedList<HeaderRecordAndFileHeader<SAMProgramRecord>>();
+        for(final HeaderRecordAndFileHeader<SAMProgramRecord> pair : programGroups ) {
+            final SAMProgramRecord record = pair.getHeaderRecord();
+            final String id = record.getProgramGroupId();
+            final String ppId = (String) record.getAttribute(SAMProgramRecord.PREVIOUS_PROGRAM_GROUP_ID_TAG);
+
+            final SAMFileHeader header = pair.getFileHeader();
+            final Map<String, String> translations = idTranslationTable.get(header);
+
+            //see if one or both ids need to be translated
+            SAMProgramRecord translatedRecord = null;
+            if(translations != null)
+            {
+                String translatedId = translations.get( id );
+                String translatedPpId = translatePpIds ? translations.get( ppId ) : null;
+
+                boolean needToTranslateId = translatedId != null && !translatedId.equals(id);
+                boolean needToTranslatePpId = translatedPpId != null && !translatedPpId.equals(ppId);
+
+                if(needToTranslateId && needToTranslatePpId) {
+                    translatedRecord = new SAMProgramRecord(translatedId, record);
+                    translatedRecord.setAttribute(SAMProgramRecord.PREVIOUS_PROGRAM_GROUP_ID_TAG, translatedPpId);
+                } else if(needToTranslateId) {
+                    translatedRecord = new SAMProgramRecord(translatedId, record);
+                } else if(needToTranslatePpId) {
+                    translatedRecord = new SAMProgramRecord(id, record);
+                    translatedRecord.setAttribute(SAMProgramRecord.PREVIOUS_PROGRAM_GROUP_ID_TAG, translatedPpId);
+                }
+            }
+
+            if(translatedRecord != null) {
+                result.add(new HeaderRecordAndFileHeader<SAMProgramRecord>(translatedRecord, header));
+            } else {
+                result.add(pair); //keep the original record
+            }
+        }
+
+        return result;
+    }
+
+
+    /**
+     * Utility method for merging a List of AbstractSAMHeaderRecords. If it finds
+     * records that have identical ids and attributes, it will collapse them
+     * into one record. If it finds records that have identical ids but
+     * non-identical attributes, this is treated as a collision. When collision happens,
+     * the records' ids are remapped, and an old-id to new-id mapping is added to the idTranslationTable.
+     *
+     * NOTE: Non-collided records also get recorded in the idTranslationTable as
+     * old-id to old-id. This way, an idTranslationTable lookup should never return null.
+     *
+     * @param headerRecords The header records to merge.
+     * @param headerRecordFactory Constructs a specific subclass of AbstractSAMHeaderRecord.
+     * @param idsThatAreAlreadyTaken If the id of a headerRecord matches an id in this set, it will be treated as a collision, and the headRecord's id will be remapped.
+     * @param idTranslationTable When records collide, their ids are remapped, and an old-id to new-id
+     *      mapping is added to the idTranslationTable. Non-collided records also get recorded in the idTranslationTable as
+     *      old-id to old-id. This way, an idTranslationTable lookup should never return null.
+     *
+     * @param result The list of merged header records.
+     *
+     * @return True if there were collisions.
+     */
+    private <RecordType extends AbstractSAMHeaderRecord> boolean mergeHeaderRecords(final List<HeaderRecordAndFileHeader<RecordType>> headerRecords, HeaderRecordFactory<RecordType> headerRecordFactory,
+            final HashSet<String> idsThatAreAlreadyTaken, Map<SAMFileHeader, Map<String, String>> idTranslationTable, List<RecordType> result) {
+
+        //The outer Map bins the header records by their ids. The nested Map further collapses
+        //header records which, in addition to having the same id, also have identical attributes.
+        //In other words, each key in the nested map represents one or more
+        //header records which have both identical ids and identical attributes. The List of
+        //SAMFileHeaders keeps track of which readers these header record(s) came from.
+        final Map<String, Map<RecordType, List<SAMFileHeader>>> idToRecord =
+            new HashMap<String, Map<RecordType, List<SAMFileHeader>>>();
+
+        //Populate the idToRecord and seenIds data structures
+        for (final HeaderRecordAndFileHeader<RecordType> pair : headerRecords) {
+            final RecordType record = pair.getHeaderRecord();
+            final SAMFileHeader header = pair.getFileHeader();
+            final String recordId = record.getId();
+            Map<RecordType, List<SAMFileHeader>> recordsWithSameId = idToRecord.get(recordId);
+            if(recordsWithSameId == null) {
+                recordsWithSameId = new LinkedHashMap<RecordType, List<SAMFileHeader>>();
+                idToRecord.put(recordId, recordsWithSameId);
+            }
+
+            List<SAMFileHeader> fileHeaders = recordsWithSameId.get(record);
+            if(fileHeaders == null) {
+                fileHeaders = new LinkedList<SAMFileHeader>();
+                recordsWithSameId.put(record, fileHeaders);
+            }
+
+            fileHeaders.add(header);
+        }
+
+        //Resolve any collisions between header records by remapping their ids.
+        boolean hasCollisions = false;
+        for (final Map.Entry<String, Map<RecordType, List<SAMFileHeader>>> entry : idToRecord.entrySet() )
+        {
+            final String recordId = entry.getKey();
+            final Map<RecordType, List<SAMFileHeader>> recordsWithSameId = entry.getValue();
+
+
+            for( Map.Entry<RecordType, List<SAMFileHeader>> recordWithUniqueAttr : recordsWithSameId.entrySet()) {
+                final RecordType record = recordWithUniqueAttr.getKey();
+                final List<SAMFileHeader> fileHeaders = recordWithUniqueAttr.getValue();
+
+                String newId;
+                if(!idsThatAreAlreadyTaken.contains(recordId)) {
+                    //don't remap 1st record. If there are more records
+                    //with this id, they will be remapped in the 'else'.
+                    newId = recordId;
+                    idsThatAreAlreadyTaken.add(recordId);
+                } else {
+                    //there is more than one record with this id.
+                    hasCollisions = true;
+
+                    //find a unique newId for this record
+                    int idx=1;
+                    while(idsThatAreAlreadyTaken.contains(newId = recordId + "." + Integer.toString(idx++)))
+                        ;
+
+                    idsThatAreAlreadyTaken.add( newId );
+                }
+
+                for(SAMFileHeader fileHeader : fileHeaders) {
+                    Map<String, String> readerTranslationTable = idTranslationTable.get(fileHeader);
+                    if(readerTranslationTable == null) {
+                        readerTranslationTable = new HashMap<String, String>();
+                        idTranslationTable.put(fileHeader, readerTranslationTable);
+                    }
+                    readerTranslationTable.put(recordId, newId);
+                }
+
+                result.add( headerRecordFactory.createRecord(newId, record) );
+            }
+        }
+
+        return hasCollisions;
+    }
+
+
+    /**
+     * Get the sequences off the SAMFileHeader.  Throws runtime exception if the sequence
+     * are different from one another.
+     *
+     * @param headers headers to pull sequences from
+     * @return sequences from files.  Each file should have the same sequence
+     */
+    private SAMSequenceDictionary getSequenceDictionary(final Collection<SAMFileHeader> headers) {
+        SAMSequenceDictionary sequences = null;
+        for (final SAMFileHeader header : headers) {
+
+            if (sequences == null) {
+                sequences = header.getSequenceDictionary();
+            }
+            else {
+                final SAMSequenceDictionary currentSequences = header.getSequenceDictionary();
+                SequenceUtil.assertSequenceDictionariesEqual(sequences, currentSequences);
+            }
+        }
+
+        return sequences;
+    }
+
+    /**
+     * Get the sequences from the SAMFileHeader, and merge the resulting sequence dictionaries.
+     *
+     * @param headers headers to pull sequences from
+     * @return sequences from files.  Each file should have the same sequence
+     */
+    private SAMSequenceDictionary mergeSequenceDictionaries(final Collection<SAMFileHeader> headers) {
+        SAMSequenceDictionary sequences = new SAMSequenceDictionary();
+        for (final SAMFileHeader header : headers) {
+            final SAMSequenceDictionary currentSequences = header.getSequenceDictionary();
+            sequences = mergeSequences(sequences, currentSequences);
+        }
+        // second pass, make a map of the original seqeunce id -> new sequence id
+        createSequenceMapping(headers, sequences);
+        return sequences;
+    }
+
+    /**
+     * They've asked to merge the sequence headers.  What we support right now is finding the sequence name superset.
+     *
+     * @param mergeIntoDict the result of merging so far.  All SAMSequenceRecords in here have been cloned from the originals.
+     * @param mergeFromDict A new sequence dictionary to merge into mergeIntoDict.
+     * @return A new sequence dictionary that resulting from merging the two inputs.
+     */
+    private SAMSequenceDictionary mergeSequences(SAMSequenceDictionary mergeIntoDict, SAMSequenceDictionary mergeFromDict) {
+
+        // a place to hold the sequences that we haven't found a home for, in the order the appear in mergeFromDict.
+        LinkedList<SAMSequenceRecord> holder = new LinkedList<SAMSequenceRecord>();
+
+        // Return value will be created from this.
+        LinkedList<SAMSequenceRecord> resultingDict = new LinkedList<SAMSequenceRecord>();
+        for (final SAMSequenceRecord sequenceRecord : mergeIntoDict.getSequences()) {
+            resultingDict.add(sequenceRecord);
+        }
+
+        // Index into resultingDict of previous SAMSequenceRecord from mergeFromDict that already existed in mergeIntoDict.
+        int prevloc = -1;
+        // Previous SAMSequenceRecord from mergeFromDict that already existed in mergeIntoDict.
+        SAMSequenceRecord previouslyMerged = null;
+
+        for (SAMSequenceRecord sequenceRecord : mergeFromDict.getSequences()) {
+            // Does it already exist in resultingDict?
+            int loc = getIndexOfSequenceName(resultingDict, sequenceRecord.getSequenceName());
+            if (loc == -1) {
+                // If doesn't already exist in resultingDict, save it an decide where to insert it later.
+                holder.add(sequenceRecord.clone());
+            } else if (prevloc > loc) {
+                // If sequenceRecord already exists in resultingDict, but prior to the previous one
+                // from mergeIntoDict that already existed, cannot merge.
+                throw new PicardException("Cannot merge sequence dictionaries because sequence " +
+                        sequenceRecord.getSequenceName() + " and " + previouslyMerged.getSequenceName() +
+                " are in different orders in two input sequence dictionaries.");
+            } else {
+                // Since sequenceRecord already exists in resultingDict, don't need to add it.
+                // Add in all the sequences prior to it that have been held in holder.
+                resultingDict.addAll(loc, holder);
+                // Remember the index of sequenceRecord so can check for merge imcompatibility.
+                prevloc = loc + holder.size();
+                previouslyMerged = sequenceRecord;
+                holder.clear();
+            }
+        }
+        // Append anything left in holder.
+        if (holder.size() != 0) {
+            resultingDict.addAll(holder);
+        }
+        return new SAMSequenceDictionary(resultingDict);
+    }
+
+    /**
+     * Find sequence in list.
+     * @param list List to search for the sequence name.
+     * @param sequenceName Name to search for.
+     * @return Index of SAMSequenceRecord with the given name in list, or -1 if not found.
+     */
+    private static int getIndexOfSequenceName(final List<SAMSequenceRecord> list, final String sequenceName) {
+        for (int i = 0; i < list.size(); ++i) {
+            if (list.get(i).getSequenceName().equals(sequenceName)) {
+                return i;
+            }
+        }
+        return -1;
+    }
+
+    /**
+     * create the sequence mapping.  This map is used to convert the unmerged header sequence ID's to the merged
+     * list of sequence id's.
+     * @param headers the collections of headers.
+     * @param masterDictionary the superset dictionary we've created.
+     */
+    private void createSequenceMapping(final Collection<SAMFileHeader> headers, SAMSequenceDictionary masterDictionary) {
+        LinkedList<String> resultingDictStr = new LinkedList<String>();
+        for (SAMSequenceRecord r : masterDictionary.getSequences()) {
+            resultingDictStr.add(r.getSequenceName());
+        }
+        for (final SAMFileHeader header : headers) {
+            Map<Integer, Integer> seqMap = new HashMap<Integer, Integer>();
+            SAMSequenceDictionary dict = header.getSequenceDictionary();
+            for (SAMSequenceRecord rec : dict.getSequences()) {
+                seqMap.put(rec.getSequenceIndex(), resultingDictStr.indexOf(rec.getSequenceName()));
+            }
+            this.samSeqDictionaryIdTranslationViaHeader.put(header, seqMap);
+        }
+    }
+
+
+
+    /**
+     * Returns the read group id that should be used for the input read and RG id.
+     *
+     * @deprecated replaced by getReadGroupId(SAMFileHeader, String)
+     * */
+    public String getReadGroupId(final SAMFileReader reader, final String originalReadGroupId) {
+        return getReadGroupId(reader.getFileHeader(), originalReadGroupId);
+    }
+
+    /** Returns the read group id that should be used for the input read and RG id. */
+    public String getReadGroupId(final SAMFileHeader header, final String originalReadGroupId) {
+        return this.samReadGroupIdTranslation.get(header).get(originalReadGroupId);
+    }
+
+    /**
+     * @param reader one of the input files
+     * @param originalProgramGroupId a program group ID from the above input file
+     * @return new ID from the merged list of program groups in the output file
+     * @deprecated replaced by getProgramGroupId(SAMFileHeader, String)
+     */
+    public String getProgramGroupId(final SAMFileReader reader, final String originalProgramGroupId) {
+        return getProgramGroupId(reader.getFileHeader(), originalProgramGroupId);
+    }
+
+    /**
+     * @param header one of the input headers
+     * @param originalProgramGroupId a program group ID from the above input file
+     * @return new ID from the merged list of program groups in the output file
+     */
+    public String getProgramGroupId(final SAMFileHeader header, final String originalProgramGroupId) {
+        return this.samProgramGroupIdTranslation.get(header).get(originalProgramGroupId);
+    }
+
+    /** Returns true if there are read group duplicates within the merged headers. */
+    public boolean hasReadGroupCollisions() {
+        return this.hasReadGroupCollisions;
+    }
+
+    /** Returns true if there are program group duplicates within the merged headers. */
+    public boolean hasProgramGroupCollisions() {
+        return hasProgramGroupCollisions;
+    }
+
+    /** @return if we've merged the sequence dictionaries, return true */
+    public boolean hasMergedSequenceDictionary() {
+        return hasMergedSequenceDictionary;
+    }
+
+    /** Returns the merged header that should be written to any output merged file. */
+    public SAMFileHeader getMergedHeader() {
+        return this.mergedHeader;
+    }
+
+    /** Returns the collection of readers that this header merger is working with. May return null.
+     * @deprecated replaced by getHeaders()
+     */
+    public Collection<SAMFileReader> getReaders() {
+        return this.readers;
+    }
+
+    /** Returns the collection of readers that this header merger is working with.
+     */
+    public Collection<SAMFileHeader> getHeaders() {
+        return this.headers;
+    }
+
+    /**
+     * Tells whether this header merger contains a given SAM file header.  Note that header presence
+     * is confirmed / blocked by == equality, rather than actually testing SAMFileHeader.equals(), for
+     * reasons of performance.
+     * @param header header to check for.
+     * @return True if the header exists in this HeaderMerger.  False otherwise.
+     */
+    boolean containsHeader(SAMFileHeader header) {
+        for(SAMFileHeader headerMergerHeader: headers) {
+            if(headerMergerHeader == header)
+                return true;
+        }
+        return false;
+    }
+
+    /**
+     * returns the new mapping for a specified reader, given it's old sequence index
+     * @param reader the reader
+     * @param oldReferenceSequenceIndex the old sequence (also called reference) index
+     * @return the new index value
+     * @deprecated replaced by getMergedSequenceIndex(SAMFileHeader, Integer)
+     */
+    public Integer getMergedSequenceIndex(SAMFileReader reader, Integer oldReferenceSequenceIndex) {
+        return this.getMergedSequenceIndex(reader.getFileHeader(), oldReferenceSequenceIndex);
+    }
+
+    /**
+     * Another mechanism for getting the new sequence index, for situations in which the reader is not available.
+     * Note that if the SAMRecord has already had its header replaced with the merged header, this won't work.
+     * @param header The original header for the input record in question.
+     * @param oldReferenceSequenceIndex The original sequence index.
+     * @return the new index value that is compatible with the merged sequence index.
+     */
+    public Integer getMergedSequenceIndex(final SAMFileHeader header, Integer oldReferenceSequenceIndex) {
+        final Map<Integer, Integer> mapping = this.samSeqDictionaryIdTranslationViaHeader.get(header);
+        if (mapping == null) {
+            throw new PicardException("No sequence dictionary mapping available for header: " + header);
+        }
+
+        final Integer newIndex = mapping.get(oldReferenceSequenceIndex);
+        if (newIndex == null) {
+            throw new PicardException("No mapping for reference index " + oldReferenceSequenceIndex + " from header: " + header);
+        }
+
+        return newIndex;
+    }
+
+
+    /**
+     * Implementations of this interface are used by mergeHeaderRecords(..) to instantiate
+     * specific subclasses of AbstractSAMHeaderRecord.
+     */
+    private static interface HeaderRecordFactory<RecordType extends AbstractSAMHeaderRecord> {
+
+       /**
+        * Constructs a new instance of RecordType.
+        * @param id The id of the new record.
+        * @param srcRecord Except for the id, the new record will be a copy of this source record.
+        */
+        public RecordType createRecord(final String id, RecordType srcRecord);
+    }
+
+    /**
+     * Struct that groups together a subclass of AbstractSAMHeaderRecord with the
+     * SAMFileHeader that it came from.
+     */
+    private static class HeaderRecordAndFileHeader<RecordType extends AbstractSAMHeaderRecord> {
+        private RecordType headerRecord;
+        private SAMFileHeader samFileHeader;
+
+        public HeaderRecordAndFileHeader(RecordType headerRecord, SAMFileHeader samFileHeader) {
+            this.headerRecord = headerRecord;
+            this.samFileHeader = samFileHeader;
+        }
+
+        public RecordType getHeaderRecord() {
+            return headerRecord;
+        }
+        public SAMFileHeader getFileHeader() {
+            return samFileHeader;
+        }
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
@ -41,6 +41,7 @@ import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.GenomeLocSortedSet;
+import org.broadinstitute.sting.utils.SimpleTimer;
 import org.broadinstitute.sting.utils.baq.BAQ;
 import org.broadinstitute.sting.utils.baq.BAQSamIterator;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -51,6 +52,7 @@ import java.io.File;
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
 import java.util.*;
+import java.util.concurrent.*;

 /**
 * User: aaron
@ -95,6 +97,11 @@ public class SAMDataSource {
     */
    private final Map<SAMReaderID,GATKBAMFileSpan> readerPositions = new HashMap<SAMReaderID,GATKBAMFileSpan>();

+    /**
+     * Cached representation of the merged header used to generate a merging iterator.
+     */
+    private final SamFileHeaderMerger headerMerger;
+
    /**
     * The merged header.
     */
@ -199,7 +206,7 @@ public class SAMDataSource {
                BAQ.QualityMode.DONT_MODIFY,
                null, // no BAQ
                (byte) -1);
-        }
+    }

    /**
     * Create a new SAM data source given the supplied read metadata.
@ -252,11 +259,10 @@ public class SAMDataSource {
        validationStringency = strictness;
        if(readBufferSize != null)
            ReadShard.setReadBufferSize(readBufferSize);
-
-        for (SAMReaderID readerID : samFiles) {
-            if (!readerID.samFile.canRead())
-                throw new UserException.CouldNotReadInputFile(readerID.samFile,"file is not present or user does not have appropriate permissions.  " +
-                                                                               "Please check that the file is present and readable and try again.");
+        else {
+            // Choose a sensible default for the read buffer size.  For the moment, we're picking 1000 reads per BAM per shard (which effectively
+            // will mean per-thread once ReadWalkers are parallelized) with a max cap of 250K reads in memory at once.
+            ReadShard.setReadBufferSize(Math.min(1000*samFiles.size(),250000));
        }

        resourcePool = new SAMResourcePool(Integer.MAX_VALUE);
@ -264,6 +270,10 @@ public class SAMDataSource {

        // Determine the sort order.
        for(SAMReaderID readerID: readerIDs) {
+            if (! readerID.samFile.canRead() )
+                throw new UserException.CouldNotReadInputFile(readerID.samFile,"file is not present or user does not have appropriate permissions.  " +
+                        "Please check that the file is present and readable and try again.");
+
            // Get the sort order, forcing it to coordinate if unsorted.
            SAMFileReader reader = readers.getReader(readerID);
            SAMFileHeader header = reader.getFileHeader();
@ -287,7 +297,7 @@ public class SAMDataSource {

        initializeReaderPositions(readers);

-        SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,readers.headers(),true);
+        headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,readers.headers(),true);
        mergedHeader = headerMerger.getMergedHeader();
        hasReadGroupCollisions = headerMerger.hasReadGroupCollisions();

@ -474,10 +484,13 @@ public class SAMDataSource {
        // Cache the most recently viewed read so that we can check whether we've reached the end of a pair.
        SAMRecord read = null;

+        Map<SAMFileReader,GATKBAMFileSpan> positionUpdates = new IdentityHashMap<SAMFileReader,GATKBAMFileSpan>();
+
        CloseableIterator<SAMRecord> iterator = getIterator(readers,shard,sortOrder == SAMFileHeader.SortOrder.coordinate);
        while(!shard.isBufferFull() && iterator.hasNext()) {
            read = iterator.next();
-            addReadToBufferingShard(shard,getReaderID(readers,read),read);
+            shard.addRead(read);
+            noteFilePositionUpdate(positionUpdates,read);
        }

        // If the reads are sorted in queryname order, ensure that all reads
@ -487,11 +500,21 @@ public class SAMDataSource {
                SAMRecord nextRead = iterator.next();
                if(read == null || !read.getReadName().equals(nextRead.getReadName()))
                    break;
-                addReadToBufferingShard(shard,getReaderID(readers,nextRead),nextRead);
+                shard.addRead(nextRead);
+                noteFilePositionUpdate(positionUpdates,nextRead);
            }
        }

        iterator.close();
+
+        // Make the updates specified by the reader.
+        for(Map.Entry<SAMFileReader,GATKBAMFileSpan> positionUpdate: positionUpdates.entrySet())
+            readerPositions.put(readers.getReaderID(positionUpdate.getKey()),positionUpdate.getValue());
+    }
+
+    private void noteFilePositionUpdate(Map<SAMFileReader,GATKBAMFileSpan> positionMapping, SAMRecord read) {
+        GATKBAMFileSpan endChunk = new GATKBAMFileSpan(read.getFileSource().getFilePointer().getContentsFollowing());
+        positionMapping.put(read.getFileSource().getReader(),endChunk);
    }

    public StingSAMIterator seek(Shard shard) {
@ -535,8 +558,6 @@ public class SAMDataSource {
     * @return An iterator over the selected data.
     */
    private StingSAMIterator getIterator(SAMReaders readers, Shard shard, boolean enableVerification) {
-        SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,readers.headers(),true);
-
        // Set up merging to dynamically merge together multiple BAMs.
        MergingSamRecordIterator mergingIterator = new MergingSamRecordIterator(headerMerger,readers.values(),true);

@ -571,18 +592,6 @@ public class SAMDataSource {
                readProperties.defaultBaseQualities());
    }

-    /**
-     * Adds this read to the given shard.
-     * @param shard The shard to which to add the read.
-     * @param id The id of the given reader.
-     * @param read The read to add to the shard.
-     */
-    private void addReadToBufferingShard(Shard shard,SAMReaderID id,SAMRecord read) {
-        GATKBAMFileSpan endChunk = new GATKBAMFileSpan(read.getFileSource().getFilePointer().getContentsFollowing());
-        shard.addRead(read);
-        readerPositions.put(id,endChunk);
-    }
-
    /**
     * Filter reads based on user-specified criteria.
     *
@ -711,29 +720,68 @@ public class SAMDataSource {
         * @param validationStringency validation stringency.
         */
        public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) {
+            final int N_THREADS = 8;
            int totalNumberOfFiles = readerIDs.size();
            int readerNumber = 1;
-            for(SAMReaderID readerID: readerIDs) {
-                File indexFile = findIndexFile(readerID.samFile);

-                SAMFileReader reader = null;
-
-                if(threadAllocation.getNumIOThreads() > 0) {
-                    BlockInputStream blockInputStream = new BlockInputStream(dispatcher,readerID,false);
-                    reader = new SAMFileReader(blockInputStream,indexFile,false);
-                    inputStreams.put(readerID,blockInputStream);
-                }
-                else
-                    reader = new SAMFileReader(readerID.samFile,indexFile,false);
-                reader.setSAMRecordFactory(factory);
-
-                reader.enableFileSource(true);
-                reader.setValidationStringency(validationStringency);
-
-                logger.debug(String.format("Processing file (%d of %d) %s...", readerNumber++, totalNumberOfFiles,  readerID.samFile));
-
-                readers.put(readerID,reader);
+            ExecutorService executor = Executors.newFixedThreadPool(N_THREADS);
+            final List<ReaderInitializer> inits = new ArrayList<ReaderInitializer>(totalNumberOfFiles);
+            Queue<Future<ReaderInitializer>> futures = new LinkedList<Future<ReaderInitializer>>();
+            for (SAMReaderID readerID: readerIDs) {
+                logger.debug("Enqueuing for initialization: " + readerID.samFile);
+                final ReaderInitializer init = new ReaderInitializer(readerID);
+                inits.add(init);
+                futures.add(executor.submit(init));
            }
+
+            final SimpleTimer timer = new SimpleTimer();
+            try {
+                final int MAX_WAIT = 30 * 1000;
+                final int MIN_WAIT = 1 * 1000;
+
+                timer.start();
+                while ( ! futures.isEmpty() ) {
+                    final int prevSize = futures.size();
+                    final double waitTime = prevSize * (0.5 / N_THREADS); // about 0.5 seconds to load each file
+                    final int waitTimeInMS = Math.min(MAX_WAIT, Math.max((int) (waitTime * 1000), MIN_WAIT));
+                    Thread.sleep(waitTimeInMS);
+
+                    Queue<Future<ReaderInitializer>> pending = new LinkedList<Future<ReaderInitializer>>();
+                    for ( final Future<ReaderInitializer> initFuture : futures ) {
+                        if ( initFuture.isDone() ) {
+                            final ReaderInitializer init = initFuture.get();
+                            if (threadAllocation.getNumIOThreads() > 0) {
+                                inputStreams.put(init.readerID, init.blockInputStream); // get from initializer
+                            }
+                            logger.debug(String.format("Processing file (%d of %d) %s...", readerNumber++, totalNumberOfFiles, init.readerID));
+                            readers.put(init.readerID, init.reader);
+                        } else {
+                            pending.add(initFuture);
+                        }
+                    }
+
+                    final int pendingSize = pending.size();
+                    final int nExecutedInTick = prevSize - pendingSize;
+                    final int nExecutedTotal = totalNumberOfFiles - pendingSize;
+                    final double totalTimeInSeconds = timer.getElapsedTime();
+                    final double nTasksPerSecond = nExecutedTotal / (1.0*totalTimeInSeconds);
+                    final int nRemaining = pendingSize;
+                    final double estTimeToComplete = pendingSize / nTasksPerSecond;
+                    logger.info(String.format("Init %d BAMs in last %d s, %d of %d in %.2f s / %.2f m (%.2f tasks/s).  %d remaining with est. completion in %.2f s / %.2f m",
+                            nExecutedInTick, (int)(waitTimeInMS / 1000.0),
+                            nExecutedTotal, totalNumberOfFiles, totalTimeInSeconds, totalTimeInSeconds / 60, nTasksPerSecond,
+                            nRemaining, estTimeToComplete, estTimeToComplete / 60));
+
+                    futures = pending;
+                }
+            } catch ( InterruptedException e ) {
+                throw new ReviewedStingException("Interrupted SAMReader initialization", e);
+            } catch ( ExecutionException e ) {
+                throw new ReviewedStingException("Execution exception during SAMReader initialization", e);
+            }
+
+            logger.info(String.format("Done initializing BAM readers: total time %.2f", timer.getElapsedTime()));
+            executor.shutdown();
        }

        /**
@ -806,6 +854,30 @@ public class SAMDataSource {
        }
    }

+    class ReaderInitializer implements Callable<ReaderInitializer> {
+        final SAMReaderID readerID;
+        BlockInputStream blockInputStream = null;
+        SAMFileReader reader;
+
+        public ReaderInitializer(final SAMReaderID readerID) {
+            this.readerID = readerID;
+        }
+
+        public ReaderInitializer call() {
+            final File indexFile = findIndexFile(readerID.samFile);
+            if (threadAllocation.getNumIOThreads() > 0) {
+                blockInputStream = new BlockInputStream(dispatcher,readerID,false);
+                reader = new SAMFileReader(blockInputStream,indexFile,false);
+            }
+            else
+                reader = new SAMFileReader(readerID.samFile,indexFile,false);
+            reader.setSAMRecordFactory(factory);
+            reader.enableFileSource(true);
+            reader.setValidationStringency(validationStringency);
+            return this;
+        }
+    }
+
    private class ReleasingIterator implements StingSAMIterator {
        /**
         * The resource acting as the source of the data.
@ -988,12 +1060,12 @@ public class SAMDataSource {
            return
                    // Read ends on a later contig, or...
                    read.getReferenceIndex() > intervalContigIndices[currentBound] ||
-                    // Read ends of this contig...
-                    (read.getReferenceIndex() == intervalContigIndices[currentBound] &&
-                            // either after this location, or...
-                            (read.getAlignmentEnd() >= intervalStarts[currentBound] ||
-                            // read is unmapped but positioned and alignment start is on or after this start point.
-                            (read.getReadUnmappedFlag() && read.getAlignmentStart() >= intervalStarts[currentBound])));
+                            // Read ends of this contig...
+                            (read.getReferenceIndex() == intervalContigIndices[currentBound] &&
+                                    // either after this location, or...
+                                    (read.getAlignmentEnd() >= intervalStarts[currentBound] ||
+                                            // read is unmapped but positioned and alignment start is on or after this start point.
+                                            (read.getReadUnmappedFlag() && read.getAlignmentStart() >= intervalStarts[currentBound])));
        }

        /**
@ -1005,8 +1077,8 @@ public class SAMDataSource {
            return
                    // Read starts on a prior contig, or...
                    read.getReferenceIndex() < intervalContigIndices[currentBound] ||
-                    // Read starts on this contig and the alignment start is registered before this end point.
-                   (read.getReferenceIndex() == intervalContigIndices[currentBound] && read.getAlignmentStart() <= intervalEnds[currentBound]);
+                            // Read starts on this contig and the alignment start is registered before this end point.
+                            (read.getReferenceIndex() == intervalContigIndices[currentBound] && read.getAlignmentStart() <= intervalEnds[currentBound]);
        }
    }

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java
@ -28,7 +28,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.utils.variantcontext.Allele;
 import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;

 import java.io.PrintStream;
 import java.util.List;
@ -65,11 +64,9 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
     * @param GLs                               genotype likelihoods
     * @param Alleles                           Alleles corresponding to GLs
     * @param log10AlleleFrequencyPriors        priors
-     * @param log10AlleleFrequencyLikelihoods   array (pre-allocated) to store likelihoods results
-     * @param log10AlleleFrequencyPosteriors    array (pre-allocated) to store posteriors results
+     * @param result                            (pre-allocated) object to store likelihoods results
     */
    protected abstract void getLog10PNonRef(GenotypesContext GLs,  List<Allele> Alleles,
                                            double[][] log10AlleleFrequencyPriors,
-                                            double[][] log10AlleleFrequencyLikelihoods,
-                                            double[][] log10AlleleFrequencyPosteriors);
+                                            AlleleFrequencyCalculationResult result);
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationResult.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationResult.java
@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2010.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.genotyper;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: ebanks
+ * Date: Dec 14, 2011
+ *
+ * Useful helper class to communicate the results of the allele frequency calculation
+ */
+public class AlleleFrequencyCalculationResult {
+
+    final double[][] log10AlleleFrequencyLikelihoods;
+    final double[][] log10AlleleFrequencyPosteriors;
+
+    AlleleFrequencyCalculationResult(int maxAltAlleles, int numChr) {
+        log10AlleleFrequencyLikelihoods = new double[maxAltAlleles][numChr+1];
+        log10AlleleFrequencyPosteriors = new double[maxAltAlleles][numChr+1];
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java
@ -47,16 +47,15 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
    public void getLog10PNonRef(final GenotypesContext GLs,
                                final List<Allele> alleles,
                                final double[][] log10AlleleFrequencyPriors,
-                                final double[][] log10AlleleFrequencyLikelihoods,
-                                final double[][] log10AlleleFrequencyPosteriors) {
+                                final AlleleFrequencyCalculationResult result) {
        final int numAlleles = alleles.size();

        //linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
-        linearExactMultiAllelic(GLs, numAlleles - 1, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors, false);
+        linearExactMultiAllelic(GLs, numAlleles - 1, log10AlleleFrequencyPriors, result, false);
    }

    private static final ArrayList<double[]> getGLs(GenotypesContext GLs) {
-        ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>();  // TODO -- initialize with size of GLs
+        ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>(GLs.size());

        genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
        for ( Genotype sample : GLs.iterateInSampleNameOrder() ) {
@ -196,10 +195,13 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
    public static void linearExactMultiAllelic(final GenotypesContext GLs,
                                               final int numAlternateAlleles,
                                               final double[][] log10AlleleFrequencyPriors,
-                                               final double[][] log10AlleleFrequencyLikelihoods,
-                                               final double[][] log10AlleleFrequencyPosteriors,
+                                               final AlleleFrequencyCalculationResult result,
                                               final boolean preserveData) {

+        // make sure the PL cache has been initialized
+        if ( UnifiedGenotyperEngine.PLIndexToAlleleIndex == null )
+            UnifiedGenotyperEngine.calculatePLcache(5);
+
        final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs);
        final int numSamples = genotypeLikelihoods.size()-1;
        final int numChr = 2*numSamples;
@ -221,7 +223,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
        while ( !ACqueue.isEmpty() ) {
            // compute log10Likelihoods
            final ExactACset set = ACqueue.remove();
-            final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, preserveData, ACqueue, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
+            final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, preserveData, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result);

            // adjust max likelihood seen if needed
            maxLog10L = Math.max(maxLog10L, log10LofKs);
@ -236,14 +238,13 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
                                                           final Queue<ExactACset> ACqueue,
                                                           final HashMap<ExactACcounts, ExactACset> indexesToACset,
                                                           final double[][] log10AlleleFrequencyPriors,
-                                                           final double[][] log10AlleleFrequencyLikelihoods,
-                                                           final double[][] log10AlleleFrequencyPosteriors) {
+                                                           final AlleleFrequencyCalculationResult result) {

        if ( DEBUG )
            System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts);

        // compute the log10Likelihoods
-        computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
+        computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, result);

        // clean up memory
        if ( !preserveData ) {
@ -349,8 +350,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
                                    final ArrayList<double[]> genotypeLikelihoods,
                                    final HashMap<ExactACcounts, ExactACset> indexesToACset,
                                    final double[][] log10AlleleFrequencyPriors,
-                                    final double[][] log10AlleleFrequencyLikelihoods,
-                                    final double[][] log10AlleleFrequencyPosteriors) {
+                                    final AlleleFrequencyCalculationResult result) {

        set.log10Likelihoods[0] = 0.0; // the zero case
        final int totalK = set.getACsum();
@ -410,19 +410,15 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
        // update the likelihoods/posteriors vectors which are collapsed views of each of the various ACs
        for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) {
            int AC = set.ACcounts.getCounts()[i];
-            log10AlleleFrequencyLikelihoods[i][AC] = approximateLog10SumLog10(log10AlleleFrequencyLikelihoods[i][AC], log10LofK);
+            result.log10AlleleFrequencyLikelihoods[i][AC] = approximateLog10SumLog10(result.log10AlleleFrequencyLikelihoods[i][AC], log10LofK);

            // for k=0 we still want to use theta
            final double prior = (nonRefAlleles == 0) ? log10AlleleFrequencyPriors[0][0] : log10AlleleFrequencyPriors[nonRefAlleles-1][AC];
-            log10AlleleFrequencyPosteriors[i][AC] = approximateLog10SumLog10(log10AlleleFrequencyPosteriors[i][AC], log10LofK + prior);
+            result.log10AlleleFrequencyPosteriors[i][AC] = approximateLog10SumLog10(result.log10AlleleFrequencyPosteriors[i][AC], log10LofK + prior);
        }
    }

    private static double determineCoefficient(int PLindex, final int j, final int[] ACcounts, final int totalK) {
-        // todo -- arent' there a small number of fixed values that this function can adopt?
-        // todo -- at a minimum it'd be good to partially compute some of these in ACCounts for performance
-        // todo -- need to cache PLIndex -> two alleles, compute looping over each PLIndex.  Note all other operations are efficient
-        // todo -- this can be computed once at the start of the all operations

        // the closed form representation generalized for multiple alleles is as follows:
        // AA: (2j - totalK) * (2j - totalK - 1)
@ -438,25 +434,19 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
        if ( PLindex <= numAltAlleles )
            return MathUtils.log10Cache[2*ACcounts[PLindex-1]] + MathUtils.log10Cache[2*j-totalK];

-        int subtractor = numAltAlleles+1;
-        int subtractions = 0;
-        do {
-            PLindex -= subtractor;
-            subtractor--;
-            subtractions++;
-        }
-        while ( PLindex >= subtractor );
+        // find the 2 alternate alleles that are represented by this PL index
+        int[] alleles = UnifiedGenotyperEngine.PLIndexToAlleleIndex[numAltAlleles][PLindex];

-        final int k_i = ACcounts[subtractions-1];
+        final int k_i = ACcounts[alleles[0]-1];  // subtract one because ACcounts doesn't consider the reference allele

        // the hom var case (e.g. BB, CC, DD)
        final double coeff;
-        if ( PLindex == 0 ) {
+        if ( alleles[0] == alleles[1] ) {
            coeff = MathUtils.log10Cache[k_i] + MathUtils.log10Cache[k_i - 1];
        }
        // the het non-ref case (e.g. BC, BD, CD)
        else {
-            final int k_j = ACcounts[subtractions+PLindex-1];
+            final int k_j = ACcounts[alleles[1]-1];
            coeff = MathUtils.log10Cache[2] + MathUtils.log10Cache[k_i] + MathUtils.log10Cache[k_j];
        }

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.baq.BAQ;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
 import org.broadinstitute.sting.utils.exceptions.StingException;
@ -95,7 +96,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
            determineAlternateAlleles(basesToUse, refBase, contexts, useBAQedPileup);

            // how many alternate alleles are we using?
-            int alleleCounter = countSetBits(basesToUse);
+            int alleleCounter = Utils.countSetBits(basesToUse);

            // if there are no non-ref alleles...
            if ( alleleCounter == 0 ) {
@ -109,7 +110,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
        }

        // create the alternate alleles and the allele ordering (the ordering is crucial for the GLs)
-        final int numAltAlleles = countSetBits(basesToUse);
+        final int numAltAlleles = Utils.countSetBits(basesToUse);
        final int[] alleleOrdering = new int[numAltAlleles + 1];
        alleleOrdering[0] = indexOfRefBase;
        int alleleOrderingIndex = 1;
@ -161,15 +162,6 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
        return builder.genotypes(genotypes).make();
    }

-    private int countSetBits(boolean[] array) {
-        int counter = 0;
-        for ( int i = 0; i < array.length; i++ ) {
-            if ( array[i] )
-                counter++;
-        }
-        return counter;
-    }
-
    // fills in the allelesToUse array
    protected void determineAlternateAlleles(boolean[] allelesToUse, byte ref, Map<String, AlignmentContext> contexts, boolean useBAQedPileup) {
        int[] qualCounts = new int[4];
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@ -75,14 +75,13 @@ public class UnifiedGenotyperEngine {
    // the model used for calculating p(non-ref)
    private ThreadLocal<AlleleFrequencyCalculationModel> afcm = new ThreadLocal<AlleleFrequencyCalculationModel>();

+    // the allele frequency likelihoods (allocated once as an optimization)
+    private ThreadLocal<AlleleFrequencyCalculationResult> alleleFrequencyCalculationResult = new ThreadLocal<AlleleFrequencyCalculationResult>();
+
    // because the allele frequency priors are constant for a given i, we cache the results to avoid having to recompute everything
    private final double[][] log10AlleleFrequencyPriorsSNPs;
    private final double[][] log10AlleleFrequencyPriorsIndels;

-    // the allele frequency likelihoods (allocated once as an optimization)
-    private ThreadLocal<double[][]> log10AlleleFrequencyLikelihoods = new ThreadLocal<double[][]>();
-    private ThreadLocal<double[][]> log10AlleleFrequencyPosteriors = new ThreadLocal<double[][]>();
-
    // the priors object
    private final GenotypePriors genotypePriorsSNPs;
    private final GenotypePriors genotypePriorsIndels;
@ -103,6 +102,9 @@ public class UnifiedGenotyperEngine {
    private final GenomeLocParser genomeLocParser;
    private final boolean BAQEnabledOnCMDLine;

+    // a cache of the PL index to the 2 alleles it represents over all possible numbers of alternate alleles
+    // the representation is int[number of alternate alleles][PL index][pair of allele indexes (where reference = 0)]
+    protected static int[][][] PLIndexToAlleleIndex;


    // ---------------------------------------------------------------------------------------------------------
@ -136,6 +138,27 @@ public class UnifiedGenotyperEngine {
        genotypePriorsIndels = createGenotypePriors(GenotypeLikelihoodsCalculationModel.Model.INDEL);
        
        filter.add(LOW_QUAL_FILTER_NAME);
+        calculatePLcache(UAC.MAX_ALTERNATE_ALLELES);
+    }
+
+    protected static void calculatePLcache(int maxAltAlleles) {
+        PLIndexToAlleleIndex = new int[maxAltAlleles+1][][];
+        PLIndexToAlleleIndex[0] = new int[][]{ new int[]{0, 0} };
+        int numLikelihoods = 1;
+
+        // for each count of alternate alleles
+        for ( int altAlleles = 1; altAlleles <= maxAltAlleles; altAlleles++ ) {
+            numLikelihoods += altAlleles + 1;
+            PLIndexToAlleleIndex[altAlleles] = new int[numLikelihoods][];
+            int PLindex = 0;
+
+            // for all possible combinations of the 2 alt alleles
+            for ( int allele1 = 0; allele1 <= altAlleles; allele1++ ) {
+                for ( int allele2 = allele1; allele2 <= altAlleles; allele2++ ) {
+                    PLIndexToAlleleIndex[altAlleles][PLindex++] = new int[]{ allele1, allele2 };
+                }
+            }
+        }
    }

    /**
@ -264,9 +287,8 @@ public class UnifiedGenotyperEngine {

        // initialize the data for this thread if that hasn't been done yet
        if ( afcm.get() == null ) {
-            log10AlleleFrequencyLikelihoods.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]);
-            log10AlleleFrequencyPosteriors.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]);
            afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC));
+            alleleFrequencyCalculationResult.set(new AlleleFrequencyCalculationResult(UAC.MAX_ALTERNATE_ALLELES, N));
        }

        // don't try to genotype too many alternate alleles
@ -285,9 +307,9 @@ public class UnifiedGenotyperEngine {
        }

        // 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
-        clearAFarray(log10AlleleFrequencyLikelihoods.get());
-        clearAFarray(log10AlleleFrequencyPosteriors.get());
-        afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get());
+        clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyLikelihoods);
+        clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors);
+        afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), alleleFrequencyCalculationResult.get());

        // is the most likely frequency conformation AC=0 for all alternate alleles?
        boolean bestGuessIsRef = true;
@ -299,7 +321,7 @@ public class UnifiedGenotyperEngine {
        // determine which alternate alleles have AF>0
        boolean[] altAllelesToUse = new boolean[vc.getAlternateAlleles().size()];
        for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) {
-            int indexOfBestAC = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()[i]);
+            int indexOfBestAC = MathUtils.maxElementIndex(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[i]);

            // if the most likely AC is not 0, then this is a good alternate allele to use
            if ( indexOfBestAC != 0 ) {
@ -320,7 +342,7 @@ public class UnifiedGenotyperEngine {

        // calculate p(f>0)
        // TODO -- right now we just calculate it for the alt allele with highest AF, but the likelihoods need to be combined correctly over all AFs
-        double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get()[indexOfHighestAlt]);
+        double[] normalizedPosteriors = MathUtils.normalizeFromLog10(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[indexOfHighestAlt]);
        double sum = 0.0;
        for (int i = 1; i <= N; i++)
            sum += normalizedPosteriors[i];
@ -330,15 +352,15 @@ public class UnifiedGenotyperEngine {
        if ( !bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
            phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]);
            if ( Double.isInfinite(phredScaledConfidence) )
-                phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0][0];
+                phredScaledConfidence = -10.0 * alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][0];
        } else {
            phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF);
            if ( Double.isInfinite(phredScaledConfidence) ) {
                sum = 0.0;
                for (int i = 1; i <= N; i++) {
-                    if ( log10AlleleFrequencyPosteriors.get()[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED )
+                    if ( alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED )
                        break;
-                    sum += log10AlleleFrequencyPosteriors.get()[0][i];
+                    sum += alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][i];
                }
                phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum);
            }
@ -378,7 +400,7 @@ public class UnifiedGenotyperEngine {
        }

        // create the genotypes
-        GenotypesContext genotypes = assignGenotypes(vc, altAllelesToUse);
+        GenotypesContext genotypes = assignGenotypes(vc, altAllelesToUse, myAlleles);

        // print out stats if we have a writer
        if ( verboseWriter != null && !limitedContext )
@ -396,31 +418,31 @@ public class UnifiedGenotyperEngine {

            // the overall lod
            VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model);
-            clearAFarray(log10AlleleFrequencyLikelihoods.get());
-            clearAFarray(log10AlleleFrequencyPosteriors.get());
-            afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get());
+            clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyLikelihoods);
+            clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors);
+            afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), alleleFrequencyCalculationResult.get());
            //double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
-            double overallLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1);
+            double overallLog10PofF = MathUtils.log10sumLog10(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0], 1);
            //if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);

            // the forward lod
            VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model);
-            clearAFarray(log10AlleleFrequencyLikelihoods.get());
-            clearAFarray(log10AlleleFrequencyPosteriors.get());
-            afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get());
+            clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyLikelihoods);
+            clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors);
+            afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), alleleFrequencyCalculationResult.get());
            //double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
-            double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0][0];
-            double forwardLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1);
+            double forwardLog10PofNull = alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][0];
+            double forwardLog10PofF = MathUtils.log10sumLog10(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0], 1);
            //if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);

            // the reverse lod
            VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model);
-            clearAFarray(log10AlleleFrequencyLikelihoods.get());
-            clearAFarray(log10AlleleFrequencyPosteriors.get());
-            afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get());
+            clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyLikelihoods);
+            clearAFarray(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors);
+            afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), alleleFrequencyCalculationResult.get());
            //normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
-            double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0][0];
-            double reverseLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1);
+            double reverseLog10PofNull = alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][0];
+            double reverseLog10PofF = MathUtils.log10sumLog10(alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0], 1);
            //if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);

            double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
@ -587,10 +609,10 @@ public class UnifiedGenotyperEngine {
            AFline.append(i + "/" + N + "\t");
            AFline.append(String.format("%.2f\t", ((float)i)/N));
            AFline.append(String.format("%.8f\t", getAlleleFrequencyPriors(model)[i]));
-            if ( log10AlleleFrequencyPosteriors.get()[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED)
+            if ( alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED)
                AFline.append("0.00000000\t");                
            else
-                AFline.append(String.format("%.8f\t", log10AlleleFrequencyPosteriors.get()[i]));
+                AFline.append(String.format("%.8f\t", alleleFrequencyCalculationResult.get().log10AlleleFrequencyPosteriors[i]));
            AFline.append(String.format("%.8f\t", normalizedPosteriors[i]));
            verboseWriter.println(AFline.toString());
        }
@ -750,82 +772,85 @@ public class UnifiedGenotyperEngine {
    /**
     * @param vc            variant context with genotype likelihoods
     * @param allelesToUse  bit vector describing which alternate alleles from the vc are okay to use
+     * @param newAlleles    a list of the final new alleles to use
     *
     * @return genotypes
     */
-    public GenotypesContext assignGenotypes(VariantContext vc,
-                                            boolean[] allelesToUse) {
+    public GenotypesContext assignGenotypes(final VariantContext vc,
+                                            final boolean[] allelesToUse,
+                                            final List<Allele> newAlleles) {

+        // the no-called genotypes
        final GenotypesContext GLs = vc.getGenotypes();

+        // samples
        final List<String> sampleIndices = GLs.getSampleNamesOrderedByName();

+        // the new called genotypes to create
        final GenotypesContext calls = GenotypesContext.create();

+        // we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward
+        final int numOriginalAltAlleles = allelesToUse.length;
+        final int numNewAltAlleles = newAlleles.size() - 1;
+        ArrayList<Integer> likelihoodIndexesToUse = null;
+
+        // an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles,
+        // then we can keep the PLs as is; otherwise, we determine which ones to keep
+        if ( numNewAltAlleles != numOriginalAltAlleles && numNewAltAlleles > 0 ) {
+            likelihoodIndexesToUse = new ArrayList<Integer>(30);
+            final int[][] PLcache = PLIndexToAlleleIndex[numOriginalAltAlleles];
+
+            for ( int PLindex = 0; PLindex < PLcache.length; PLindex++ ) {
+                int[] alleles = PLcache[PLindex];
+                // consider this entry only if both of the alleles are good
+                if ( (alleles[0] == 0 || allelesToUse[alleles[0] - 1]) && (alleles[1] == 0 || allelesToUse[alleles[1] - 1]) )
+                    likelihoodIndexesToUse.add(PLindex);
+            }
+        }
+
+        // create the new genotypes
        for ( int k = GLs.size() - 1; k >= 0; k-- ) {
            final String sample = sampleIndices.get(k);
            final Genotype g = GLs.get(sample);
            if ( !g.hasLikelihoods() )
                continue;

-            final double[] likelihoods = g.getLikelihoods().getAsVector();
+            // create the new likelihoods array from the alleles we are allowed to use
+            final double[] originalLikelihoods = g.getLikelihoods().getAsVector();
+            double[] newLikelihoods;
+            if ( likelihoodIndexesToUse == null ) {
+                newLikelihoods = originalLikelihoods;
+            } else {
+                newLikelihoods = new double[likelihoodIndexesToUse.size()];
+                int newIndex = 0;
+                for ( int oldIndex : likelihoodIndexesToUse )
+                    newLikelihoods[newIndex++] = originalLikelihoods[oldIndex];

-            // if there is no mass on the likelihoods, then just no-call the sample
-            if ( MathUtils.sum(likelihoods) > SUM_GL_THRESH_NOCALL ) {
+                // might need to re-normalize
+                newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true);
+            }
+
+            // if there is no mass on the (new) likelihoods and we actually have alternate alleles, then just no-call the sample
+            if ( MathUtils.sum(newLikelihoods) > SUM_GL_THRESH_NOCALL ) {
                calls.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, null, false));
                continue;
            }

-            // genotype likelihoods are a linear vector that can be thought of as a row-wise upper triangular matrix of log10Likelihoods.
-            // so e.g. with 2 alt alleles the likelihoods are AA,AB,AC,BB,BC,CC and with 3 alt alleles they are AA,AB,AC,AD,BB,BC,BD,CC,CD,DD.
-
-            final int numAltAlleles = allelesToUse.length;
-
-            // start with the assumption that the ideal genotype is homozygous reference
-            Allele maxAllele1 = vc.getReference(), maxAllele2 = vc.getReference();
-            double maxLikelihoodSeen = likelihoods[0];
-            int indexOfMax = 0;
-
-            // keep track of some state
-            Allele firstAllele = vc.getReference();
-            int subtractor = numAltAlleles + 1;
-            int subtractionsMade = 0;
-
-            for ( int i = 1, PLindex = 1; i < likelihoods.length; i++, PLindex++ ) {
-                if ( PLindex == subtractor ) {
-                    firstAllele = vc.getAlternateAllele(subtractionsMade);
-                    PLindex -= subtractor;
-                    subtractor--;
-                    subtractionsMade++;
-
-                    // we can skip this allele if it's not usable
-                    if ( !allelesToUse[subtractionsMade-1] ) {
-                        i += subtractor - 1;
-                        PLindex += subtractor - 1;
-                        continue;
-                    }
-                }
-
-                // we don't care about the entry if we've already seen better
-                if ( likelihoods[i] <= maxLikelihoodSeen )
-                    continue;
-
-                // if it's usable then update the alleles
-                int alleleIndex = subtractionsMade + PLindex - 1;
-                if ( allelesToUse[alleleIndex] ) {
-                    maxAllele1 = firstAllele;
-                    maxAllele2 = vc.getAlternateAllele(alleleIndex);
-                    maxLikelihoodSeen = likelihoods[i];
-                    indexOfMax = i;
-                }
-            }
+            // find the genotype with maximum likelihoods
+            int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods);
+            int[] alleles = PLIndexToAlleleIndex[numNewAltAlleles][PLindex];

            ArrayList<Allele> myAlleles = new ArrayList<Allele>();
-            myAlleles.add(maxAllele1);
-            myAlleles.add(maxAllele2);
+            myAlleles.add(newAlleles.get(alleles[0]));
+            myAlleles.add(newAlleles.get(alleles[1]));

-            final double qual = GenotypeLikelihoods.getQualFromLikelihoods(indexOfMax, likelihoods);
-            calls.add(new Genotype(sample, myAlleles, qual, null, g.getAttributes(), false));
+            final double qual = numNewAltAlleles == 0 ? Genotype.NO_LOG10_PERROR : GenotypeLikelihoods.getQualFromLikelihoods(PLindex, newLikelihoods);
+            Map<String, Object> attrs = new HashMap<String, Object>(g.getAttributes());
+            if ( numNewAltAlleles == 0 )
+                attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
+            else
+                attrs.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(newLikelihoods));
+            calls.add(new Genotype(sample, myAlleles, qual, null, attrs, false));
        }

        return calls;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java
@ -182,6 +182,8 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
                        nHomDerived++;
                    }

+                    break;
+                case MIXED:
                    break;
                default:
                    throw new ReviewedStingException("BUG: Unexpected genotype type: " + g);
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
@ -175,7 +175,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
    /////////////////////////////
    // Debug Arguments
    /////////////////////////////
-    @Hidden
+    @Advanced
    @Argument(fullName = "trustAllPolymorphic", shortName = "allPoly", doc = "Trust that all the input training sets' unfiltered records contain only polymorphic sites to drastically speed up the computation.", required = false)
    protected Boolean TRUST_ALL_POLYMORPHIC = false;
    //@Hidden
--- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
@ -1033,14 +1033,15 @@ public class MathUtils {
    public static final double JACOBIAN_LOG_TABLE_STEP = 0.1;
    public static final double INV_JACOBIAN_LOG_TABLE_STEP = 1.0/JACOBIAN_LOG_TABLE_STEP;
    public static final double MAX_JACOBIAN_TOLERANCE = 10.0;
-    private static final int MAXN = 10000;
+    private static final int MAXN = 11000;
+    private static final int LOG10_CACHE_SIZE = 4 * MAXN;  // we need to be able to go up to 2*(2N) when calculating some of the coefficients

    static {
-        log10Cache = new double[2*MAXN];
+        log10Cache = new double[LOG10_CACHE_SIZE];
 	    jacobianLogTable = new double[JACOBIAN_LOG_TABLE_SIZE];

        log10Cache[0] = Double.NEGATIVE_INFINITY;
-        for (int k=1; k < 2*MAXN; k++)
+        for (int k=1; k < LOG10_CACHE_SIZE; k++)
            log10Cache[k] = Math.log10(k);

    	for (int k=0; k < JACOBIAN_LOG_TABLE_SIZE; k++) {
--- a/public/java/src/org/broadinstitute/sting/utils/Utils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java
@ -388,6 +388,15 @@ public class Utils {
        return reallocate(pos, z);
    }

+    public static int countSetBits(boolean[] array) {
+        int counter = 0;
+        for ( int i = 0; i < array.length; i++ ) {
+            if ( array[i] )
+                counter++;
+        }
+        return counter;
+    }
+
    /**
     * Returns new (reallocated) integer array of the specified size, with content
     * of the original array <code>orig</code> copied into it. If <code>newSize</code> is
@ -645,4 +654,18 @@ public class Utils {
            // handle exception
        }
    }
+
+
+    public static byte [] arrayFromArrayWithLength(byte[] array, int length) {
+        byte [] output = new byte[length];
+        for (int j = 0; j < length; j++)
+            output[j] = array[(j % array.length)];
+        return output;
+    }
+
+    public static void fillArrayWithByte(byte[] array, byte value) {
+        for (int i=0; i<array.length; i++)
+            array[i] = value;
+    }
+
 }
--- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java
+++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ClippingOp.java
@ -247,7 +247,7 @@ public class ClippingOp {
        return newCigar;
    }

-    @Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1", "!read.getReadUnmappedFlag()"})
+    @Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1"})
    private GATKSAMRecord hardClip (GATKSAMRecord read, int start, int stop) {
        if (start == 0 && stop == read.getReadLength() - 1)
            return new GATKSAMRecord(read.getHeader());
@ -373,6 +373,10 @@ public class ClippingOp {
            while(cigarElementIterator.hasNext()) {
                cigarElement = cigarElementIterator.next();
                alignmentShift += calculateHardClippingAlignmentShift(cigarElement, cigarElement.getLength());
+
+                // if the read had a HardClip operator in the end, combine it with the Hard Clip we are adding
+                if (cigarElement.getOperator() == CigarOperator.HARD_CLIP)
+                    totalHardClipCount += cigarElement.getLength();
            }
            newCigar.add(new CigarElement(totalHardClipCount + alignmentShift, CigarOperator.HARD_CLIP));
        }
--- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java
+++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java
@ -58,6 +58,7 @@ public class ReadClipper {
        return hardClipByReferenceCoordinates(refStart, -1);
    }

+    @Requires("!read.getReadUnmappedFlag()")
    protected GATKSAMRecord hardClipByReferenceCoordinates(int refStart, int refStop) {
        int start = (refStart < 0) ? 0 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStart, ReadUtils.ClippingTail.RIGHT_TAIL);
        int stop =  (refStop  < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
@ -168,7 +169,14 @@ public class ReadClipper {
            try {
                GATKSAMRecord clippedRead = (GATKSAMRecord) read.clone();
                for (ClippingOp op : getOps()) {
-                    clippedRead = op.apply(algorithm, clippedRead);
+                    //check if the clipped read can still be clipped in the range requested
+                    if (op.start < clippedRead.getReadLength()) {
+                        ClippingOp fixedOperation = op;
+                        if (op.stop >= clippedRead.getReadLength())
+                            fixedOperation = new ClippingOp(op.start, clippedRead.getReadLength() - 1);
+
+                        clippedRead = fixedOperation.apply(algorithm, clippedRead);
+                    }
                }
                wasClipped = true;
                ops.clear();
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java
@ -184,7 +184,6 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
     * @return a feature, (not guaranteed complete) that has the correct start and stop
     */
    public Feature decodeLoc(String line) {
-        lineNo++;

        // the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line
        if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null;
@ -279,6 +278,8 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
        builder.source(getName());

        // increment the line count
+        // TODO -- because of the way the engine utilizes Tribble, we can parse a line multiple times (especially when
+        // TODO --   the first record is far along the contig) and the line counter can get out of sync
        lineNo++;

        // parse out the required fields
@ -594,6 +595,11 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec {
                if ( a.isSymbolic() )
                    continue;

+                // we need to ensure that we don't reverse clip out all of the bases from an allele because we then will have the wrong
+                // position set for the VariantContext (although it's okay to forward clip it all out, because the position will be fine).
+                if ( a.length() - clipping == 0 )
+                    return clipping - 1;
+
                if ( a.length() - clipping <= forwardClipping || a.length() - forwardClipping == 0 )
                    stillClipping = false;
                else if ( ref.length() == clipping )
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java
@ -120,6 +120,8 @@ public class VCF3Codec extends AbstractVCFCodec {
            genotypeParts = new String[header.getColumnCount() - NUM_STANDARD_FIELDS];

        int nParts = ParsingUtils.split(str, genotypeParts, VCFConstants.FIELD_SEPARATOR_CHAR);
+        if ( nParts != genotypeParts.length )
+            generateException("there are " + (nParts-1) + " genotypes while the header requires that " + (genotypeParts.length-1) + " genotypes be present for all records", lineNo);

        ArrayList<Genotype> genotypes = new ArrayList<Genotype>(nParts);

--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java
@ -147,6 +147,8 @@ public class VCFCodec extends AbstractVCFCodec {
            genotypeParts = new String[header.getColumnCount() - NUM_STANDARD_FIELDS];

        int nParts = ParsingUtils.split(str, genotypeParts, VCFConstants.FIELD_SEPARATOR_CHAR);
+        if ( nParts != genotypeParts.length )
+            generateException("there are " + (nParts-1) + " genotypes while the header requires that " + (genotypeParts.length-1) + " genotypes be present for all records", lineNo);

        ArrayList<Genotype> genotypes = new ArrayList<Genotype>(nParts);

--- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
+++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
@ -184,11 +184,11 @@ public class UserException extends ReviewedStingException {

    public static class MalformedVCF extends UserException {
        public MalformedVCF(String message, String line) {
-            super(String.format("The provided VCF file is malformed at line %s: %s", line, message));
+            super(String.format("The provided VCF file is malformed at approximately line %s: %s", line, message));
        }

        public MalformedVCF(String message, int lineNo) {
-            super(String.format("The provided VCF file is malformed at line number %d: %s", lineNo, message));
+            super(String.format("The provided VCF file is malformed at approximately line number %d: %s", lineNo, message));
        }
    }

--- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java
@ -200,6 +200,48 @@ public class ArtificialSAMUtils {
        return rec;
    }

+    /**
+     * Create an artificial read based on the parameters
+     *
+     * @param header         the SAM header to associate the read with
+     * @param name           the name of the read
+     * @param refIndex       the reference index, i.e. what chromosome to associate it with
+     * @param alignmentStart where to start the alignment
+     * @param bases          the sequence of the read
+     * @param qual           the qualities of the read
+     * @param cigar          the cigar string of the read
+     *
+     * @return the artificial read
+     */
+    public static GATKSAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, byte[] bases, byte[] qual, String cigar ) {
+        GATKSAMRecord rec = createArtificialRead(header, name, refIndex, alignmentStart, bases, qual);
+        rec.setCigarString(cigar);
+        return rec;
+    }
+
+    /**
+     * Create an artificial read with the following default parameters :
+     *    header:
+     *     numberOfChromosomes = 1
+     *     startingChromosome = 1
+     *     chromosomeSize = 1000000
+     *    read:
+     *     name = "default_read"
+     *     refIndex = 0
+     *     alignmentStart = 1
+     *
+     * @param bases          the sequence of the read
+     * @param qual           the qualities of the read
+     * @param cigar          the cigar string of the read
+     *
+     * @return the artificial read
+     */
+    public static GATKSAMRecord createArtificialRead( byte[] bases, byte[] qual, String cigar ) {
+        SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
+        return ArtificialSAMUtils.createArtificialRead(header, "default_read", 0, 1, bases, qual, cigar);
+    }
+
+
    public final static List<GATKSAMRecord> createPair(SAMFileHeader header, String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) {
        GATKSAMRecord left = ArtificialSAMUtils.createArtificialRead(header, name, 0, leftStart, readLen);
        GATKSAMRecord right = ArtificialSAMUtils.createArtificialRead(header, name, 0, rightStart, readLen);
--- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java
@ -910,4 +910,9 @@ public class ReadUtils {
        return comp.compare(read1, read2);
    }

+    // TEST UTILITIES
+
+
+
+
 }
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java
@ -410,6 +410,12 @@ public class GenotypesContext implements List<Genotype> {
        return getGenotypes().get(i);
    }

+    /**
+     * Gets sample associated with this sampleName, or null if none is found
+     *
+     * @param sampleName
+     * @return
+     */
    public Genotype get(final String sampleName) {
        Integer offset = getSampleI(sampleName);
        return offset == null ? null : getGenotypes().get(offset);
@ -648,16 +654,15 @@ public class GenotypesContext implements List<Genotype> {
    @Ensures("result != null")
    public GenotypesContext subsetToSamples( final Set<String> samples ) {
        final int nSamples = samples.size();
-        final int nGenotypes = size();

-        if ( nSamples == nGenotypes )
-            return this;
-        else if ( nSamples == 0 )
+        if ( nSamples == 0 )
            return NO_GENOTYPES;
        else { // nGenotypes < nSamples
            final GenotypesContext subset = create(samples.size());
            for ( final String sample : samples ) {
-                subset.add(get(sample));
+                final Genotype g = get(sample);
+                if ( g != null )
+                    subset.add(g);
            }
            return subset;
        }
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java
@ -83,21 +83,20 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
    @Test(dataProvider = "getGLs")
    public void testGLs(GetGLsTest cfg) {

-        final double[][] log10AlleleFrequencyLikelihoods = new double[2][2*numSamples+1];
-        final double[][] log10AlleleFrequencyPosteriors = new double[2][2*numSamples+1];
+        final AlleleFrequencyCalculationResult result = new AlleleFrequencyCalculationResult(2, 2*numSamples);
        for ( int i = 0; i < 2; i++ ) {
            for ( int j = 0; j < 2*numSamples+1; j++ ) {
-                log10AlleleFrequencyLikelihoods[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
-                log10AlleleFrequencyPosteriors[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
+                result.log10AlleleFrequencyLikelihoods[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
+                result.log10AlleleFrequencyPosteriors[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
            }
        }

-        ExactAFCalculationModel.linearExactMultiAllelic(cfg.GLs, cfg.numAltAlleles, priors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors, false);
+        ExactAFCalculationModel.linearExactMultiAllelic(cfg.GLs, cfg.numAltAlleles, priors, result, false);

        int nameIndex = 1;
        for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
            int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1));
-            int calculatedAlleleCount = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors[allele]);
+            int calculatedAlleleCount = MathUtils.maxElementIndex(result.log10AlleleFrequencyPosteriors[allele]);
            Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount);
        }
    }
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@ -129,8 +129,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testOutputParameter() {
        HashMap<String, String> e = new HashMap<String, String>();
        e.put( "-sites_only", "44f3b5b40e6ad44486cddfdb7e0bfcd8" );
-        e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "94e53320f14c5ff29d62f68d36b46fcd" );
-        e.put( "--output_mode EMIT_ALL_SITES", "73ad1cc41786b12c5f0e6f3e9ec2b728" );
+        e.put( "--output_mode EMIT_ALL_CONFIDENT_SITES", "274bd9d1b9c7857690fa5f0228ffc6d7" );
+        e.put( "--output_mode EMIT_ALL_SITES", "594c6d3c48bbc73289de7727d768644d" );

        for ( Map.Entry<String, String> entry : e.entrySet() ) {
            WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
--- a/public/java/test/org/broadinstitute/sting/utils/clipreads/CigarStringTestPair.java
+++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/CigarStringTestPair.java
@ -1,18 +0,0 @@
-package org.broadinstitute.sting.utils.clipreads;
-
-/**
- * Created by IntelliJ IDEA.
- * User: roger
- * Date: 11/29/11
- * Time: 4:53 PM
- * To change this template use File | Settings | File Templates.
- */
-public class CigarStringTestPair {
-    public String toTest;
-    public String expected;
-
-    public CigarStringTestPair(String ToTest, String Expected) {
-        this.toTest = ToTest;
-        this.expected = Expected;
-    }
-}
--- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ClipReadsTestUtils.java
+++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/ClipReadsTestUtils.java
@ -2,8 +2,8 @@ package org.broadinstitute.sting.utils.clipreads;

 import net.sf.samtools.Cigar;
 import net.sf.samtools.CigarElement;
-import net.sf.samtools.SAMFileHeader;
-import net.sf.samtools.TextCigarCodec;
+import net.sf.samtools.CigarOperator;
+import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.testng.Assert;
@ -21,165 +21,159 @@ public class ClipReadsTestUtils {
    //Should contain all the utils needed for tests to mass produce
    //reads, cigars, and other needed classes

-    final static String BASES = "ACTG";
-    final static String QUALS = "!+5?"; //ASCII values = 33,43,53,63
+    final static byte [] BASES = {'A', 'C', 'T', 'G'};
+    final static byte [] QUALS = {2, 15, 25, 30};
+    final static String CIGAR = "4M";
+    final static CigarElement[] cigarElements = { new CigarElement(1, CigarOperator.HARD_CLIP),
+                                                  new CigarElement(1, CigarOperator.SOFT_CLIP),
+                                                  new CigarElement(1, CigarOperator.INSERTION),
+                                                  new CigarElement(1, CigarOperator.DELETION),
+                                                  new CigarElement(1, CigarOperator.MATCH_OR_MISMATCH)};

-    public static void testBaseQualCigar(GATKSAMRecord read, byte[] readBases, byte[] baseQuals, String cigar) {
-        // Because quals to char start at 33 for visibility
-        baseQuals = subtractToArray(baseQuals, 33);
-
-        Assert.assertEquals(read.getReadBases(), readBases);
-        Assert.assertEquals(read.getBaseQualities(), baseQuals);
-        Assert.assertEquals(read.getCigarString(), cigar);
-    }
-
-    public static void testCigar(GATKSAMRecord read, String cigar) {
-        Assert.assertEquals(read.getCigarString(), cigar);
-    }
-
-    public static void testBaseQual(GATKSAMRecord read, byte[] readBases, byte[] baseQuals) {
-        // Because quals to chars start at 33 for visibility
-        baseQuals = subtractToArray(baseQuals, 33);
-
-        if (readBases.length > 0 && baseQuals.length > 0) {
-            Assert.assertEquals(read.getReadBases(), readBases);
-            Assert.assertEquals(read.getBaseQualities(), baseQuals);
-        } else
-            Assert.assertTrue(read.isEmpty());
-    }
-
-    private static byte[] subtractToArray(byte[] array, int n) {
-        if (array == null)
-            return null;
-
-        byte[] output = new byte[array.length];
-
-        for (int i = 0; i < array.length; i++)
-            output[i] = (byte) (array[i] - n);
-
-        return output;
-    }
-
-    // What the test read looks like
-    // Ref:    10 11 12 13 14 15 16 17
-    // Read:   0  1  2  3  -  -  -  -
-    // --------------------------------
-    // Bases:  A  C  T  G  -  -  -  -
-    // Quals:  !  +  5  ?  -  -  -  -
-
-    public static GATKSAMRecord makeRead() {
-        SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
-        GATKSAMRecord output = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 10, BASES.length());
-        output.setReadBases(new String(BASES).getBytes());
-        output.setBaseQualityString(new String(QUALS));
-
-        return output;
-    }

    public static GATKSAMRecord makeReadFromCigar(Cigar cigar) {
-
-        SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
-        GATKSAMRecord output = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 10, cigar.getReadLength());
-        output.setReadBases(cycleString(BASES, cigar.getReadLength()).getBytes());
-        output.setBaseQualityString(cycleString(QUALS, cigar.getReadLength()));
-        output.setCigar(cigar);
-
-        return output;
+        return ArtificialSAMUtils.createArtificialRead(Utils.arrayFromArrayWithLength(BASES, cigar.getReadLength()), Utils.arrayFromArrayWithLength(QUALS, cigar.getReadLength()), cigar.toString());
    }

-    private static String cycleString(String string, int length) {
-        String output = "";
-        int cycles = (length / string.length()) + 1;
+    /**
+     * This function generates every valid permutation of cigar strings with a given length.
+     *
+     * A valid cigar object obeys the following rules:
+     *  - No Hard/Soft clips in the middle of the read
+     *  - No deletions in the beginning / end of the read
+     *  - No repeated adjacent element (e.g. 1M2M -> this should be 3M)
+     *
+     * @param maximumLength the maximum number of elements in the cigar
+     * @return a list with all valid Cigar objects
+     */
+    public static List<Cigar> generateCigarList(int maximumLength) {
+        int numCigarElements = cigarElements.length;
+        LinkedList<Cigar> cigarList = new LinkedList<Cigar>();
+        byte [] cigarCombination = new byte[maximumLength];

-        for (int i = 1; i < cycles; i++)
-            output += string;
+        Utils.fillArrayWithByte(cigarCombination, (byte) 0);               // we start off with all 0's in the combination array.
+        int currentIndex = 0;
+        while (true) {
+            Cigar cigar = createCigarFromCombination(cigarCombination);    // create the cigar
+            cigar = combineAdjacentCigarElements(cigar);                   // combine adjacent elements
+            if (isCigarValid(cigar)) {                                     // check if it's valid
+                cigarList.add(cigar);                                      // add it
+            }

-        for (int j = 0; output.length() < length; j++)
-            output += string.charAt(j % string.length());
+            boolean currentIndexChanged = false;
+            while (currentIndex < maximumLength && cigarCombination[currentIndex] == numCigarElements - 1) {
+                currentIndex++;                                            // find the next index to increment
+                currentIndexChanged = true;                                // keep track of the fact that we have changed indices!
+            }

-        return output;
-    }
+            if (currentIndex == maximumLength)                             // if we hit the end of the array, we're done.
+                break;

-    public static Set<Cigar> generateCigars() {
+            cigarCombination[currentIndex]++;                              // otherwise advance the current index

-        // This function generates every permutation of cigar strings we need.
-
-        LinkedHashSet<Cigar> output = new LinkedHashSet<Cigar>();
-
-        List<Cigar> clippingOptionsStart = new LinkedList<Cigar>();
-        clippingOptionsStart.add(new Cigar());
-        clippingOptionsStart.add(TextCigarCodec.getSingleton().decode("1H1S"));
-        clippingOptionsStart.add(TextCigarCodec.getSingleton().decode("1S"));
-        clippingOptionsStart.add(TextCigarCodec.getSingleton().decode("1H"));
-
-        LinkedList<Cigar> clippingOptionsEnd = new LinkedList<Cigar>();
-        clippingOptionsEnd.add(new Cigar());
-        clippingOptionsEnd.add(TextCigarCodec.getSingleton().decode("1S1H"));
-        clippingOptionsEnd.add(TextCigarCodec.getSingleton().decode("1S"));
-        clippingOptionsEnd.add(TextCigarCodec.getSingleton().decode("1H"));
-
-
-        LinkedList<Cigar> indelOptions1 = new LinkedList<Cigar>();
-        indelOptions1.add(new Cigar());
-        //indelOptions1.add( TextCigarCodec.getSingleton().decode("1I1D"));
-        //indelOptions1.add( TextCigarCodec.getSingleton().decode("1D1I") );
-        indelOptions1.add(TextCigarCodec.getSingleton().decode("1I"));
-        indelOptions1.add(TextCigarCodec.getSingleton().decode("1D"));
-
-        LinkedList<Cigar> indelOptions2 = new LinkedList<Cigar>();
-        indelOptions2.add(new Cigar());
-        indelOptions2.add(TextCigarCodec.getSingleton().decode("1I"));
-        indelOptions2.add(null);
-
-
-        // Start With M as base CigarElements, M,
-
-        LinkedList<Cigar> base = new LinkedList<Cigar>();
-        base.add(TextCigarCodec.getSingleton().decode("1M"));
-        base.add(TextCigarCodec.getSingleton().decode("5M"));
-        base.add(TextCigarCodec.getSingleton().decode("25M"));
-        // Should indel be added as a base?
-
-        // Nested loops W00t!
-        for (Cigar Base : base) {
-            for (Cigar indelStart : indelOptions1) {
-                for (Cigar indelEnd : indelOptions2) {
-                    for (Cigar clipStart : clippingOptionsStart) {
-                        for (Cigar clipEnd : clippingOptionsEnd) {
-                            // Create a list of Cigar Elements and construct Cigar
-                            List<CigarElement> CigarBuilder = new ArrayList<CigarElement>();
-                            // add starting clipping (H/S)
-                            CigarBuilder.addAll(clipStart.getCigarElements());
-                            // add first base (M)
-                            CigarBuilder.addAll(Base.getCigarElements());
-                            // add first indel
-                            CigarBuilder.addAll(indelStart.getCigarElements());
-                            // add second base (M)
-                            CigarBuilder.addAll(Base.getCigarElements());
-                            // add another indel or nothing (M)
-                            if (indelEnd != null)
-                                CigarBuilder.addAll(indelEnd.getCigarElements());
-                            // add final clipping (S/H)
-                            CigarBuilder.addAll(clipEnd.getCigarElements());
-
-
-                            output.add(new Cigar(removeConsecutiveElements(CigarBuilder)));
-
-                        }
-                    }
-                }
+            if (currentIndexChanged) {                                     // if we have changed index, then...
+                for (int i = 0; i < currentIndex; i++)
+                    cigarCombination[i] = 0;                               // reset everything from 0->currentIndex
+                currentIndex = 0;                                          // go back to the first index
            }
        }

-        return output;
+        return cigarList;
    }

-    private static List<CigarElement> removeConsecutiveElements(List<CigarElement> cigarBuilder) {
-        LinkedList<CigarElement> output = new LinkedList<CigarElement>();
-        for (CigarElement E : cigarBuilder) {
-            if (output.isEmpty() || output.getLast().getOperator() != E.getOperator())
-                output.add(E);
+    private static boolean isCigarValid(Cigar cigar) {
+        if (cigar.isValid(null, -1) == null) {                                          // This should take care of most invalid Cigar Strings (picard's "exhaustive" implementation)
+
+            Stack<CigarElement> cigarElementStack = new Stack<CigarElement>();          // Stack to invert cigar string to find ending operator
+            CigarOperator startingOp = null;
+            CigarOperator endingOp = null;
+
+            // check if it doesn't start with deletions
+            boolean readHasStarted = false;                                             // search the list of elements for the starting operator
+            for (CigarElement cigarElement : cigar.getCigarElements()) {
+                if (!readHasStarted) {
+                    if (cigarElement.getOperator() != CigarOperator.SOFT_CLIP && cigarElement.getOperator() != CigarOperator.HARD_CLIP) {
+                        readHasStarted = true;
+                        startingOp = cigarElement.getOperator();
+                    }
+                }
+                cigarElementStack.push(cigarElement);
+            }
+
+            readHasStarted = false;                                                     // search the inverted list of elements (stack) for the stopping operator
+            while (!cigarElementStack.empty()) {
+                CigarElement cigarElement = cigarElementStack.pop();
+                if (cigarElement.getOperator() != CigarOperator.SOFT_CLIP && cigarElement.getOperator() != CigarOperator.HARD_CLIP) {
+                    readHasStarted = true;
+                    endingOp = cigarElement.getOperator();
+                    break;
+                }
+            }
+
+            if (startingOp != CigarOperator.DELETION && endingOp != CigarOperator.DELETION && startingOp != CigarOperator.INSERTION && endingOp != CigarOperator.INSERTION)
+                return true;                                                            // we don't accept reads starting or ending in deletions (add any other constraint here)
        }
-        return output;
+
+        return false;
    }
+
+    private static Cigar createCigarFromCombination(byte[] cigarCombination) {
+        Cigar cigar = new Cigar();
+        for (byte i : cigarCombination) {
+            cigar.add(cigarElements[i]);
+        }
+        return cigar;
+    }
+
+
+    /**
+     * Combines equal adjacent elements of a Cigar object
+     *
+     * @param rawCigar the cigar object
+     * @return a combined cigar object
+     */
+    private static Cigar combineAdjacentCigarElements(Cigar rawCigar) {
+        Cigar combinedCigar = new Cigar();
+        CigarElement lastElement = null;
+        int lastElementLength = 0;
+        for (CigarElement cigarElement : rawCigar.getCigarElements()) {
+            if (lastElement != null && lastElement.getOperator() == cigarElement.getOperator())
+                lastElementLength += cigarElement.getLength();
+            else
+            {
+                if (lastElement != null)
+                    combinedCigar.add(new CigarElement(lastElementLength, lastElement.getOperator()));
+
+                lastElement = cigarElement;
+                lastElementLength = cigarElement.getLength();
+            }
+        }
+        if (lastElement != null)
+            combinedCigar.add(new CigarElement(lastElementLength, lastElement.getOperator()));
+
+        return combinedCigar;
+    }
+
+    public static GATKSAMRecord makeRead() {
+        return ArtificialSAMUtils.createArtificialRead(BASES, QUALS, CIGAR);
+    }
+
+    /**
+     * Asserts that the two reads have the same bases, qualities and cigar strings
+     *
+     * @param actual the calculated read
+     * @param expected the expected read
+     */
+    public static void assertEqualReads(GATKSAMRecord actual, GATKSAMRecord expected) {
+        // If they're both not empty, test their contents
+        if(!actual.isEmpty() && !expected.isEmpty()) {
+            Assert.assertEquals(actual.getReadBases(), expected.getReadBases());
+            Assert.assertEquals(actual.getBaseQualities(), expected.getBaseQualities());
+            Assert.assertEquals(actual.getCigarString(), expected.getCigarString());
+        }
+        // Otherwise test if they're both empty
+        else
+            Assert.assertEquals(actual.isEmpty(), expected.isEmpty());
+    }
+
 }
--- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ClippingOpUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/ClippingOpUnitTest.java
@ -5,8 +5,6 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.testng.annotations.BeforeTest;
 import org.testng.annotations.Test;

-import java.util.LinkedList;
-import java.util.List;

 /**
 * Created by IntelliJ IDEA.
@ -27,43 +25,43 @@ public class ClippingOpUnitTest extends BaseTest {

    @Test
    private void testHardClip() {
-        List<TestParameter> testList = new LinkedList<TestParameter>();
-        testList.add(new TestParameter(0, 0, 1, 4, "1H3M"));//clip 1 base at start
-        testList.add(new TestParameter(3, 3, 0, 3, "3M1H"));//clip 1 base at end
-        testList.add(new TestParameter(0, 1, 2, 4, "2H2M"));//clip 2 bases at start
-        testList.add(new TestParameter(2, 3, 0, 2, "2M2H"));//clip 2 bases at end
-        testList.add(new TestParameter(0, 2, 3, 4, "3H1M"));//clip 3 bases at start
-        testList.add(new TestParameter(1, 3, 0, 1, "1M3H"));//clip 3 bases at end
-
-        for (TestParameter p : testList) {
-            init();
-            clippingOp = new ClippingOp(p.inputStart, p.inputStop);
-            logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.substringStart + "," + p.substringStop + "," + p.cigar);
-            ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.HARDCLIP_BASES, read),
-                    ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
-                    ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
-                    p.cigar);
-        }
+//        List<TestParameter> testList = new LinkedList<TestParameter>();
+//        testList.add(new TestParameter(0, 0, 1, 4, "1H3M"));//clip 1 base at start
+//        testList.add(new TestParameter(3, 3, 0, 3, "3M1H"));//clip 1 base at end
+//        testList.add(new TestParameter(0, 1, 2, 4, "2H2M"));//clip 2 bases at start
+//        testList.add(new TestParameter(2, 3, 0, 2, "2M2H"));//clip 2 bases at end
+//        testList.add(new TestParameter(0, 2, 3, 4, "3H1M"));//clip 3 bases at start
+//        testList.add(new TestParameter(1, 3, 0, 1, "1M3H"));//clip 3 bases at end
+//
+//        for (TestParameter p : testList) {
+//            init();
+//            clippingOp = new ClippingOp(p.inputStart, p.inputStop);
+//            logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.substringStart + "," + p.substringStop + "," + p.cigar);
+//            ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.HARDCLIP_BASES, read),
+//                    ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
+//                    ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
+//                    p.cigar);
+//        }

    }

    @Test
    private void testSoftClip() {
-        List<TestParameter> testList = new LinkedList<TestParameter>();
-        testList.add(new TestParameter(0, 0, -1, -1, "1S3M"));//clip 1 base at start
-        testList.add(new TestParameter(3, 3, -1, -1, "3M1S"));//clip 1 base at end
-        testList.add(new TestParameter(0, 1, -1, -1, "2S2M"));//clip 2 bases at start
-        testList.add(new TestParameter(2, 3, -1, -1, "2M2S"));//clip 2 bases at end
-        testList.add(new TestParameter(0, 2, -1, -1, "3S1M"));//clip 3 bases at start
-        testList.add(new TestParameter(1, 3, -1, -1, "1M3S"));//clip 3 bases at end
-
-        for (TestParameter p : testList) {
-            init();
-            clippingOp = new ClippingOp(p.inputStart, p.inputStop);
-            logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.cigar);
-            ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.SOFTCLIP_BASES, read),
-                    ClipReadsTestUtils.BASES.getBytes(), ClipReadsTestUtils.QUALS.getBytes(), p.cigar);
-        }
+//        List<TestParameter> testList = new LinkedList<TestParameter>();
+//        testList.add(new TestParameter(0, 0, -1, -1, "1S3M"));//clip 1 base at start
+//        testList.add(new TestParameter(3, 3, -1, -1, "3M1S"));//clip 1 base at end
+//        testList.add(new TestParameter(0, 1, -1, -1, "2S2M"));//clip 2 bases at start
+//        testList.add(new TestParameter(2, 3, -1, -1, "2M2S"));//clip 2 bases at end
+//        testList.add(new TestParameter(0, 2, -1, -1, "3S1M"));//clip 3 bases at start
+//        testList.add(new TestParameter(1, 3, -1, -1, "1M3S"));//clip 3 bases at end
+//
+//        for (TestParameter p : testList) {
+//            init();
+//            clippingOp = new ClippingOp(p.inputStart, p.inputStop);
+//            logger.warn("Testing Parameters: " + p.inputStart + "," + p.inputStop + "," + p.cigar);
+//            ClipReadsTestUtils.testBaseQualCigar(clippingOp.apply(ClippingRepresentation.SOFTCLIP_BASES, read),
+//                    ClipReadsTestUtils.BASES.getBytes(), ClipReadsTestUtils.QUALS.getBytes(), p.cigar);
+//        }

    }
 }
--- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java
@ -28,15 +28,15 @@ package org.broadinstitute.sting.utils.clipreads;
 import net.sf.samtools.Cigar;
 import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
-import net.sf.samtools.TextCigarCodec;
 import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
-import org.broadinstitute.sting.utils.sam.ReadUtils;
 import org.testng.Assert;
+import org.testng.annotations.BeforeClass;
 import org.testng.annotations.BeforeMethod;
 import org.testng.annotations.Test;

-import java.util.LinkedList;
 import java.util.List;

 /**
@ -52,157 +52,36 @@ public class ReadClipperUnitTest extends BaseTest {

    // TODO: add indels to all test cases

-    ReadClipper readClipper;
+    List<Cigar> cigarList;
+    int maximumCigarSize = 10;

-    @BeforeMethod
+    @BeforeClass
    public void init() {
-        readClipper = new ReadClipper(ClipReadsTestUtils.makeRead());
+        cigarList = ClipReadsTestUtils.generateCigarList(maximumCigarSize);
    }

    @Test(enabled = true)
    public void testHardClipBothEndsByReferenceCoordinates() {
-
        logger.warn("Executing testHardClipBothEndsByReferenceCoordinates");
-        //int debug = 1;
-        //Clip whole read
-        Assert.assertEquals(readClipper.hardClipBothEndsByReferenceCoordinates(10, 10), new GATKSAMRecord(readClipper.read.getHeader()));

-        //clip 1 base
-        ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipBothEndsByReferenceCoordinates(10, 13),
-                ClipReadsTestUtils.BASES.substring(1, 3).getBytes(), ClipReadsTestUtils.QUALS.substring(1, 3).getBytes(),
-                "1H2M1H");
-
-        List<CigarStringTestPair> cigarStringTestPairs = new LinkedList<CigarStringTestPair>();
-        cigarStringTestPairs.add(new CigarStringTestPair("5M1D1M2I4M5I6M1D3M2I100M", "1H4M1D1M2I4M5I6M1D3M2I99M1H"));
-        //cigarStringTestPairs.add( new CigarStringTestPair("5M1I1M2I1M","1H4M1I1M2I1H"));
-        cigarStringTestPairs.add(new CigarStringTestPair("1S1M1I1M1I1M1I1M1I1M1I1M1S", "1H1M1I1M1I1M1I1M1I1M1I1M1H"));
-        cigarStringTestPairs.add(new CigarStringTestPair("1S1M1D1M1D1M1D1M1D1M1D1M1S", "1H1M1D1M1D1M1D1M1D1M1D1M1H"));
-
-        for (CigarStringTestPair pair : cigarStringTestPairs) {
-            readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar(TextCigarCodec.getSingleton().decode(pair.toTest)));
-            ClipReadsTestUtils.testCigar(readClipper.hardClipBothEndsByReferenceCoordinates(
-                    ReadUtils.getRefCoordSoftUnclippedStart(readClipper.read),
-                    ReadUtils.getRefCoordSoftUnclippedEnd(readClipper.read)),
-                    pair.expected);
-        }
-        /*
-        for ( Cigar cigar: ClipReadsTestUtils.generateCigars() ) {
-            // The read has to be long enough to clip one base from each side
-            // This also filters a lot of cigars
-            if ( cigar.getReadLength() > 26 ) {
-                readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar( cigar ));
-                System.out.println( "Testing Cigar: "+cigar.toString() ) ;
-                //cigar length reference plus soft clip
-
-                ClipReadsTestUtils.testBaseQual(
-                        readClipper.hardClipBothEndsByReferenceCoordinates(
-                                                              ReadUtils.getRefCoordSoftUnclippedStart(readClipper.read),
-                                                              ReadUtils.getRefCoordSoftUnclippedEnd(readClipper.read) ),
-                        readClipper.read.getReadString().substring(1, (cigar.getReadLength() - 1)).getBytes(),
-                        readClipper.read.getBaseQualityString().substring(1, (cigar.getReadLength() - 1)).getBytes());
-            }
-        }
-        */
    }

    @Test(enabled = true)
    public void testHardClipByReadCoordinates() {
-
        logger.warn("Executing testHardClipByReadCoordinates");

-        //Clip whole read
-        Assert.assertEquals(readClipper.hardClipByReadCoordinates(0, 3), new GATKSAMRecord(readClipper.read.getHeader()));
-
-        List<TestParameter> testList = new LinkedList<TestParameter>();
-        testList.add(new TestParameter(0, 0, 1, 4, "1H3M"));//clip 1 base at start
-        testList.add(new TestParameter(3, 3, 0, 3, "3M1H"));//clip 1 base at end
-        testList.add(new TestParameter(0, 1, 2, 4, "2H2M"));//clip 2 bases at start
-        testList.add(new TestParameter(2, 3, 0, 2, "2M2H"));//clip 2 bases at end
-
-        for (TestParameter p : testList) {
-            init();
-            //logger.warn("Testing Parameters: " + p.inputStart+","+p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar);
-            ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipByReadCoordinates(p.inputStart, p.inputStop),
-                    ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
-                    ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
-                    p.cigar);
-        }
-
    }

    @Test(enabled = true)
    public void testHardClipByReferenceCoordinates() {
        logger.warn("Executing testHardClipByReferenceCoordinates");
-        //logger.warn(debug);
-        //Clip whole read
-        Assert.assertEquals(readClipper.hardClipByReferenceCoordinates(10, 13), new GATKSAMRecord(readClipper.read.getHeader()));

-        List<TestParameter> testList = new LinkedList<TestParameter>();
-        testList.add(new TestParameter(-1, 10, 1, 4, "1H3M"));//clip 1 base at start
-        testList.add(new TestParameter(13, -1, 0, 3, "3M1H"));//clip 1 base at end
-        testList.add(new TestParameter(-1, 11, 2, 4, "2H2M"));//clip 2 bases at start
-        testList.add(new TestParameter(12, -1, 0, 2, "2M2H"));//clip 2 bases at end
-
-        for (TestParameter p : testList) {
-            init();
-            //logger.warn("Testing Parameters: " + p.inputStart+","+p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar);
-            ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipByReferenceCoordinates(p.inputStart, p.inputStop),
-                    ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
-                    ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
-                    p.cigar);
-        }
-
-        List<CigarStringTestPair> cigarStringTestPairs = new LinkedList<CigarStringTestPair>();
-        cigarStringTestPairs.add(new CigarStringTestPair("5M1D1M2I4M5I6M1D3M2I100M", "1H4M1D1M2I4M5I6M1D3M2I100M"));
-        //cigarStringTestPairs.add( new CigarStringTestPair("5M1I1M2I1M","1H4M1I1M2I1M"));
-        cigarStringTestPairs.add(new CigarStringTestPair("1S1M1I1M1I1M1I1M1I1M1I1M1S", "1H1M1I1M1I1M1I1M1I1M1I1M1S"));
-        cigarStringTestPairs.add(new CigarStringTestPair("1S1M1D1M1D1M1D1M1D1M1D1M1S", "1H1M1D1M1D1M1D1M1D1M1D1M1S"));
-
-        //Clips only first base
-        for (CigarStringTestPair pair : cigarStringTestPairs) {
-            readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar(TextCigarCodec.getSingleton().decode(pair.toTest)));
-            ClipReadsTestUtils.testCigar(readClipper.hardClipByReadCoordinates(0, 0), pair.expected);
-        }
-        /*
-        for ( Cigar cigar: ClipReadsTestUtils.generateCigars() ) {
-            // The read has to be long enough to clip one base
-            // This also filters a lot of cigars
-            if ( cigar.getReadLength() > 26 ) {
-                readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar( cigar ));
-                System.out.println( "Testing Cigar: "+cigar.toString() ) ;
-                //cigar length reference plus soft clip
-
-                // Clip first read
-                ClipReadsTestUtils.testBaseQual(
-                        readClipper.hardClipByReadCoordinates(0,0),
-                        readClipper.read.getReadString().substring(1, cigar.getReadLength()).getBytes(),
-                        readClipper.read.getBaseQualityString().substring(1, cigar.getReadLength()).getBytes());
-            }
-        }
-        */
    }

    @Test(enabled = true)
    public void testHardClipByReferenceCoordinatesLeftTail() {
-        init();
        logger.warn("Executing testHardClipByReferenceCoordinatesLeftTail");

-        //Clip whole read
-        Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesLeftTail(13), new GATKSAMRecord(readClipper.read.getHeader()));
-
-        List<TestParameter> testList = new LinkedList<TestParameter>();
-        testList.add(new TestParameter(10, -1, 1, 4, "1H3M"));//clip 1 base at start
-        testList.add(new TestParameter(11, -1, 2, 4, "2H2M"));//clip 2 bases at start
-
-        for (TestParameter p : testList) {
-            init();
-            //logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar);
-            ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipByReferenceCoordinatesLeftTail(p.inputStart),
-                    ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
-                    ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
-                    p.cigar);
-        }
-
    }

    @Test(enabled = true)
@ -210,135 +89,154 @@ public class ReadClipperUnitTest extends BaseTest {
        init();
        logger.warn("Executing testHardClipByReferenceCoordinatesRightTail");

-        //Clip whole read
-        Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesRightTail(10), new GATKSAMRecord(readClipper.read.getHeader()));
-
-        List<TestParameter> testList = new LinkedList<TestParameter>();
-        testList.add(new TestParameter(-1, 13, 0, 3, "3M1H"));//clip 1 base at end
-        testList.add(new TestParameter(-1, 12, 0, 2, "2M2H"));//clip 2 bases at end
-
-        for (TestParameter p : testList) {
-            init();
-            //logger.warn("Testing Parameters: " + p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar);
-            ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipByReferenceCoordinatesRightTail(p.inputStop),
-                    ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
-                    ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
-                    p.cigar);
-        }
-
    }

    @Test(enabled = true)
    public void testHardClipLowQualEnds() {
-        // Needs a thorough redesign
-        logger.warn("Executing testHardClipByReferenceCoordinates");
+        logger.warn("Executing testHardClipLowQualEnds");

-        //Clip whole read
-        Assert.assertEquals(readClipper.hardClipLowQualEnds((byte) 64), new GATKSAMRecord(readClipper.read.getHeader()));
+        final byte LOW_QUAL = 2;
+        final byte HIGH_QUAL = 30;

-        List<TestParameter> testList = new LinkedList<TestParameter>();
-        testList.add(new TestParameter(1, -1, 1, 4, "1H3M"));//clip 1 base at start
-        testList.add(new TestParameter(11, -1, 2, 4, "2H2M"));//clip 2 bases at start
+        // create a read for every cigar permutation
+        for (Cigar cigar : cigarList) {
+            GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar);
+            int readLength = read.getReadLength();
+            byte [] quals = new byte[readLength];

-        for (TestParameter p : testList) {
-            init();
-            //logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar);
-            ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipLowQualEnds((byte) p.inputStart),
-                    ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
-                    ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
-                    p.cigar);
+            for (int nLowQualBases = 0; nLowQualBases < readLength; nLowQualBases++) {
+
+                // create a read with nLowQualBases in the left tail
+                Utils.fillArrayWithByte(quals, HIGH_QUAL);
+                for (int addLeft = 0; addLeft < nLowQualBases; addLeft++)
+                    quals[addLeft] = LOW_QUAL;
+                read.setBaseQualities(quals);
+                GATKSAMRecord clipLeft = (new ReadClipper(read)).hardClipLowQualEnds(LOW_QUAL);
+
+                // Tests
+
+                // Make sure the low qualities are gone
+                testNoLowQualBases(clipLeft,  LOW_QUAL);
+
+                // Can't run this test with the current contract of no hanging insertions
+                //Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipLeft.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipLeft.getCigarString()));
+
+                // create a read with nLowQualBases in the right tail
+                Utils.fillArrayWithByte(quals, HIGH_QUAL);
+                for (int addRight = 0; addRight < nLowQualBases; addRight++)
+                    quals[readLength - addRight - 1] = LOW_QUAL;
+                read.setBaseQualities(quals);
+                GATKSAMRecord clipRight = (new ReadClipper(read)).hardClipLowQualEnds(LOW_QUAL);
+
+                // Tests
+
+                // Make sure the low qualities are gone
+                testNoLowQualBases(clipRight, LOW_QUAL);
+
+                // Make sure we haven't clipped any high quals -- Can't run this test with the current contract of no hanging insertions
+                //Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - nLowQualBases, read.getCigarString(), clipRight.getCigarString()));
+
+                // create a read with nLowQualBases in the both tails
+                if (nLowQualBases <= readLength/2) {
+                    Utils.fillArrayWithByte(quals, HIGH_QUAL);
+                    for (int addBoth = 0; addBoth < nLowQualBases; addBoth++) {
+                        quals[addBoth] = LOW_QUAL;
+                        quals[readLength - addBoth - 1] = LOW_QUAL;
+                    }
+                    read.setBaseQualities(quals);
+                    GATKSAMRecord clipBoth = (new ReadClipper(read)).hardClipLowQualEnds(LOW_QUAL);
+
+                    // Tests
+
+                    // Make sure the low qualities are gone
+                    testNoLowQualBases(clipBoth,  LOW_QUAL);
+
+                    // Can't run this test with the current contract of no hanging insertions
+                    //Assert.assertEquals(clipLeft.getReadLength(), readLength - nLowQualBases, String.format("Clipped read size (%d) is different than the number high qual bases (%d) -- Cigars: %s -> %s", clipRight.getReadLength(), readLength - (2*nLowQualBases), read.getCigarString(), clipBoth.getCigarString()));
+                }
+            }
+//            logger.warn(String.format("Testing %s for all combinations of low/high qual... PASSED", read.getCigarString()));
        }
-        /*      todo find a better way to test lowqual tail clipping on both sides
-        // Reverse Quals sequence
-        readClipper.getRead().setBaseQualityString("?5+!"); // 63,53,43,33

-        testList = new LinkedList<testParameter>();
-        testList.add(new testParameter(1,-1,0,3,"3M1H"));//clip 1 base at end
-        testList.add(new testParameter(11,-1,0,2,"2M2H"));//clip 2 bases at end

-        for ( testParameter p : testList ) {
-            init();
-            readClipper.getRead().setBaseQualityString("?5+!"); // 63,53,43,33
-            //logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar);
-            testBaseQualCigar( readClipper.hardClipLowQualEnds( (byte)p.inputStart ),
-                    BASES.substring(p.substringStart,p.substringStop).getBytes(),
-                    QUALS.substring(p.substringStart,p.substringStop),
-                    p.cigar );
-        }
-        */
+
+
+
+        // ONE OFF Testing clipping that ends inside an insertion
+        final byte[] BASES = {'A','C','G','T','A','C','G','T'};
+        final byte[] QUALS = {2, 2, 2, 2, 20, 20, 20, 2};
+        final String CIGAR = "1S1M5I1S";
+
+        final byte[] CLIPPED_BASES = {};
+        final byte[] CLIPPED_QUALS = {};
+        final String CLIPPED_CIGAR = "";
+
+
+        GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(BASES, QUALS, CIGAR);
+        GATKSAMRecord expected = ArtificialSAMUtils.createArtificialRead(CLIPPED_BASES, CLIPPED_QUALS, CLIPPED_CIGAR);
+
+        ReadClipper lowQualClipper = new ReadClipper(read);
+        ClipReadsTestUtils.assertEqualReads(lowQualClipper.hardClipLowQualEnds((byte) 2), expected);
    }

-    @Test(enabled = false)
+    private void testNoLowQualBases(GATKSAMRecord read, byte low_qual) {
+        if (!read.isEmpty()) {
+            byte [] quals = read.getBaseQualities();
+            for (int i=0; i<quals.length; i++)
+                Assert.assertFalse(quals[i] <= low_qual, String.format("Found low qual (%d) base after hard clipping. Position: %d -- %s", low_qual, i, read.getCigarString()));
+        }
+    }
+
+    @Test(enabled = true)
    public void testHardClipSoftClippedBases() {

        // Generate a list of cigars to test
-        for (Cigar cigar : ClipReadsTestUtils.generateCigars()) {
-            //logger.warn("Testing Cigar: "+cigar.toString());
-            readClipper = new ReadClipper(ClipReadsTestUtils.makeReadFromCigar(cigar));
+        for (Cigar cigar : cigarList) {
+            GATKSAMRecord read = ClipReadsTestUtils.makeReadFromCigar(cigar);
+            ReadClipper readClipper = new ReadClipper(read);
+            GATKSAMRecord clippedRead = readClipper.hardClipSoftClippedBases();

-            int clipStart = 0;
-            int clipEnd = 0;
-            boolean expectEmptyRead = false;
+            int sumHardClips = 0;
+            int sumMatches = 0;

-            List<CigarElement> cigarElements = cigar.getCigarElements();
-            int CigarListLength = cigarElements.size();
+            boolean tail = true;
+            for (CigarElement element : read.getCigar().getCigarElements()) {
+                // Assuming cigars are well formed, if we see S or H, it means we're on the tail (left or right)
+                if (element.getOperator() == CigarOperator.HARD_CLIP || element.getOperator() == CigarOperator.SOFT_CLIP)
+                    tail = true;

-            // It will know what needs to be clipped based on the start and end of the string, hardclips and softclips
-            // are added to the amount to clip
-            if (cigarElements.get(0).getOperator() == CigarOperator.HARD_CLIP) {
-                //clipStart += cigarElements.get(0).getLength();
-                if (cigarElements.get(1).getOperator() == CigarOperator.SOFT_CLIP) {
-                    clipStart += cigarElements.get(1).getLength();
-                    // Check for leading indel
-                    if (cigarElements.get(2).getOperator() == CigarOperator.INSERTION) {
-                        expectEmptyRead = true;
-                    }
-                }
-                // Check for leading indel
-                else if (cigarElements.get(1).getOperator() == CigarOperator.INSERTION) {
-                    expectEmptyRead = true;
-                }
-            } else if (cigarElements.get(0).getOperator() == CigarOperator.SOFT_CLIP) {
-                clipStart += cigarElements.get(0).getLength();
-                // Check for leading indel
-                if (cigarElements.get(1).getOperator() == CigarOperator.INSERTION) {
-                    expectEmptyRead = true;
-                }
-            }
-            //Check for leading indel
-            else if (cigarElements.get(0).getOperator() == CigarOperator.INSERTION) {
-                expectEmptyRead = true;
+                // Adds all H, S and D's (next to hard/soft clips).
+                // All these should be hard clips after clipping.
+                if (tail && (element.getOperator() == CigarOperator.HARD_CLIP || element.getOperator() == CigarOperator.SOFT_CLIP || element.getOperator() == CigarOperator.DELETION))
+                    sumHardClips += element.getLength();
+
+                // this means we're no longer on the tail (insertions can still potentially be the tail because
+                // of the current contract of clipping out hanging insertions
+                else if (element.getOperator() != CigarOperator.INSERTION)
+                    tail = false;
+
+                // Adds all matches to verify that they remain the same after clipping
+                if (element.getOperator() == CigarOperator.MATCH_OR_MISMATCH)
+                    sumMatches += element.getLength();
            }

-            if (cigarElements.get(CigarListLength - 1).getOperator() == CigarOperator.HARD_CLIP) {
-                //clipEnd += cigarElements.get(CigarListLength - 1).getLength();
-                if (cigarElements.get(CigarListLength - 2).getOperator() == CigarOperator.SOFT_CLIP)
-                    clipEnd += cigarElements.get(CigarListLength - 2).getLength();
-            } else if (cigarElements.get(CigarListLength - 1).getOperator() == CigarOperator.SOFT_CLIP)
-                clipEnd += cigarElements.get(CigarListLength - 1).getLength();
+            for (CigarElement element : clippedRead.getCigar().getCigarElements()) {
+                // Test if clipped read has Soft Clips (shouldn't have any!)
+                Assert.assertTrue( element.getOperator() != CigarOperator.SOFT_CLIP, String.format("Cigar %s -> %s -- FAILED (resulting cigar has soft clips)", read.getCigarString(), clippedRead.getCigarString()));

-            String readBases = readClipper.read.getReadString();
-            String baseQuals = readClipper.read.getBaseQualityString();
+                // Keep track of the total number of Hard Clips after clipping to make sure everything was accounted for
+                if (element.getOperator() == CigarOperator.HARD_CLIP)
+                    sumHardClips -= element.getLength();

-            // "*" is the default empty-sequence-string and for our test it needs to be changed to  ""
-            if (readBases.equals("*"))
-                readBases = "";
-            if (baseQuals.equals("*"))
-                baseQuals = "";
+                // Make sure all matches are still there
+                if (element.getOperator() == CigarOperator.MATCH_OR_MISMATCH)
+                    sumMatches -= element.getLength();
+            }
+            Assert.assertTrue( sumHardClips == 0, String.format("Cigar %s -> %s -- FAILED (number of hard clips mismatched by %d)", read.getCigarString(), clippedRead.getCigarString(), sumHardClips));
+            Assert.assertTrue( sumMatches == 0, String.format("Cigar %s -> %s -- FAILED (number of matches mismatched by %d)", read.getCigarString(), clippedRead.getCigarString(), sumMatches));

-            logger.warn(String.format("Testing cigar %s, expecting Base: %s and Qual: %s",
-                    cigar.toString(), readBases.substring(clipStart, readBases.length() - clipEnd),
-                    baseQuals.substring(clipStart, baseQuals.length() - clipEnd)));
-            //if (expectEmptyRead)
-            //    testBaseQual( readClipper.hardClipSoftClippedBases(), new byte[0], new byte[0] );
-            //else
-            ClipReadsTestUtils.testBaseQual(readClipper.hardClipSoftClippedBases(),
-                    readBases.substring(clipStart, readBases.length() - clipEnd).getBytes(),
-                    baseQuals.substring(clipStart, baseQuals.length() - clipEnd).getBytes());
-            logger.warn("Cigar: " + cigar.toString() + " PASSED!");
+
+//            logger.warn(String.format("Cigar %s -> %s -- PASSED!", read.getCigarString(), clippedRead.getCigarString()));
        }
-        // We will use testParameter in the following way
-        // Right tail, left tail,
-
    }
 }
--- a/public/java/test/org/broadinstitute/sting/utils/clipreads/TestParameter.java
+++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/TestParameter.java
@ -1,24 +0,0 @@
-package org.broadinstitute.sting.utils.clipreads;
-
-/**
- * Created by IntelliJ IDEA.
- * User: roger
- * Date: 11/28/11
- * Time: 4:07 PM
- * To change this template use File | Settings | File Templates.
- */
-public class TestParameter {
-    int inputStart;
-    int inputStop;
-    int substringStart;
-    int substringStop;
-    String cigar;
-
-    public TestParameter(int InputStart, int InputStop, int SubstringStart, int SubstringStop, String Cigar) {
-        inputStart = InputStart;
-        inputStop = InputStop;
-        substringStart = SubstringStart;
-        substringStop = SubstringStop;
-        cigar = Cigar;
-    }
-}
--- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java
@ -704,11 +704,14 @@ public class VariantContextUnitTest extends BaseTest {
    public Object[][] MakeSubContextTest() {
        for ( boolean updateAlleles : Arrays.asList(true, false)) {
            new SubContextTest(Collections.<String>emptySet(), updateAlleles);
+            new SubContextTest(Collections.singleton("MISSING"), updateAlleles);
            new SubContextTest(Collections.singleton("AA"), updateAlleles);
            new SubContextTest(Collections.singleton("AT"), updateAlleles);
            new SubContextTest(Collections.singleton("TT"), updateAlleles);
            new SubContextTest(Arrays.asList("AA", "AT"), updateAlleles);
            new SubContextTest(Arrays.asList("AA", "AT", "TT"), updateAlleles);
+            new SubContextTest(Arrays.asList("AA", "AT", "MISSING"), updateAlleles);
+            new SubContextTest(Arrays.asList("AA", "AT", "TT", "MISSING"), updateAlleles);
        }

        return SubContextTest.getTests(SubContextTest.class);