BitSet implementation of the on-the-fly recalibration using the CSV format file.

Infrastructure:
   * Added static interface to all different clipping algorithms of low quality tail clipping
   * Added reverse direction pileup element event lookup (indels) to the PileupElement and LocusIteratorByState
   * Complete refactor of the KeyManager. Much cleaner implementation that handles keys with no optional covariates (necessary for on-the-fly recalibration)
   * EventType is now an independent enum with added capabilities. All functionality is now centralized.

 BQSR and RecalibrateBases:
   * On-the-fly recalibration is now generic and uses the same bit set structure as BQSR for a reduced memory footprint
   * Refactored the object creation to take advantage of the compact key structure
   * Replaced nested hash maps with single hash maps indexed by bitsets
   * Eliminated low quality tails from the context covariate (using ReadClipper's write N's algorithm).
   * Excluded contexts with N's from the output file.
   * Fixed cycle covariate for discrete platforms (need to check flow cycle platforms now!)
   * Redfined error for indels to look at the previous base in negative strand reads (using new PE functionality)
   * Added the covariate ID (for optional covariates) to the output for disambiguation purposes
   * Refactored CovariateKeySet -- eventType functionality is now handled by the EventType enum.
   * Reduced memory usage of the BQSR script to 4

 Tests:
   * Refactored BQSRKeyManagerUnitTest to handle the new implementation of the key manager
   * Added tests for keys without optional covariates
   * Added tests for on-the-fly recalibration (but more tests are necessary)
This commit is contained in:
Mauricio Carneiro 2012-03-08 14:52:28 -05:00
parent ca11ab39e7
commit 3bfca0ccfd
28 changed files with 826 additions and 508 deletions

View File

@ -179,6 +179,11 @@ public class LocusIteratorByState extends LocusIterator {
return ( cigarElementCounter + 1 > curElement.getLength() && cigarOffset + 1 < nCigarElements ? cigar.getCigarElement(cigarOffset + 1) : curElement );
}
public CigarElement peekBackwardOnGenome() {
return ( cigarElementCounter - 1 == 0 && cigarOffset - 1 > 0 ? cigar.getCigarElement(cigarOffset - 1) : curElement );
}
public CigarOperator stepForwardOnGenome() {
// we enter this method with readOffset = index of the last processed base on the read
// (-1 if we did not process a single base yet); this can be last matching base, or last base of an insertion
@ -401,24 +406,24 @@ public class LocusIteratorByState extends LocusIterator {
while (iterator.hasNext()) {
final SAMRecordState state = iterator.next();
final GATKSAMRecord read = (GATKSAMRecord) state.getRead(); // the actual read
final CigarOperator op = state.getCurrentCigarOperator(); // current cigar operator
final int readOffset = state.getReadOffset(); // the base offset on this read
final int eventStartOffset = state.getReadEventStartOffset(); // this will be -1 if base is not a deletion, or if base is the first deletion in the event. Otherwise, it will give the last base before the deletion began.
final GATKSAMRecord read = (GATKSAMRecord) state.getRead(); // the actual read
final CigarOperator op = state.getCurrentCigarOperator(); // current cigar operator
final int readOffset = state.getReadOffset(); // the base offset on this read
final int eventStartOffset = state.getReadEventStartOffset(); // this will be -1 if base is not a deletion, or if base is the first deletion in the event. Otherwise, it will give the last base before the deletion began.
final int eventLength = state.getEventLength();
if (op == CigarOperator.N) // N's are never added to any pileup
if (op == CigarOperator.N) // N's are never added to any pileup
continue;
if (state.hadIndel()) { // this read has an indel associated with the previous position on the ref
if (state.hadIndel()) { // this read has an indel associated with the previous position on the ref
size++;
ExtendedEventPileupElement pileupElement;
if (state.getEventBases() == null) { // Deletion event
if (state.getEventBases() == null) { // Deletion event
nDeletions++;
maxDeletionLength = Math.max(maxDeletionLength, state.getEventLength());
pileupElement = new ExtendedEventPileupElement(read, eventStartOffset, eventLength);
}
else { // Insertion event
else { // Insertion event
nInsertions++;
pileupElement = new ExtendedEventPileupElement(read, eventStartOffset, eventLength, state.getEventBases());
}
@ -442,10 +447,10 @@ public class LocusIteratorByState extends LocusIterator {
if (indelPile.size() != 0)
fullExtendedEventPileup.put(sample, new ReadBackedExtendedEventPileupImpl(loc, indelPile, size, maxDeletionLength, nInsertions, nDeletions, nMQ0Reads));
}
hasExtendedEvents = false; // we are done with extended events prior to current ref base
hasExtendedEvents = false; // we are done with extended events prior to current ref base
nextAlignmentContext = new AlignmentContext(loc, new ReadBackedExtendedEventPileupImpl(loc, fullExtendedEventPileup), hasBeenSampled);
}
else { // this is a regular event pileup (not extended)
else { // this is a regular event pileup (not extended)
GenomeLoc location = getLocation();
Map<String, ReadBackedPileupImpl> fullPileup = new HashMap<String, ReadBackedPileupImpl>();
boolean hasBeenSampled = false;
@ -454,27 +459,34 @@ public class LocusIteratorByState extends LocusIterator {
List<PileupElement> pile = new ArrayList<PileupElement>(readStates.size(sample));
hasBeenSampled |= location.getStart() <= readStates.getDownsamplingExtent(sample);
size = 0; // number of elements in this sample's pileup
nDeletions = 0; // number of deletions in this sample's pileup
nMQ0Reads = 0; // number of MQ0 reads in this sample's pileup (warning: current implementation includes N bases that are MQ0)
size = 0; // number of elements in this sample's pileup
nDeletions = 0; // number of deletions in this sample's pileup
nMQ0Reads = 0; // number of MQ0 reads in this sample's pileup (warning: current implementation includes N bases that are MQ0)
while (iterator.hasNext()) {
final SAMRecordState state = iterator.next(); // state object with the read/offset information
final GATKSAMRecord read = (GATKSAMRecord) state.getRead(); // the actual read
final CigarOperator op = state.getCurrentCigarOperator(); // current cigar operator
final CigarElement nextElement = state.peekForwardOnGenome(); // next cigar element
final CigarOperator nextOp = nextElement.getOperator();
final int readOffset = state.getReadOffset(); // the base offset on this read
final SAMRecordState state = iterator.next(); // state object with the read/offset information
final GATKSAMRecord read = (GATKSAMRecord) state.getRead(); // the actual read
final CigarOperator op = state.getCurrentCigarOperator(); // current cigar operator
final CigarElement nextElement = state.peekForwardOnGenome(); // next cigar element
final CigarElement lastElement = state.peekBackwardOnGenome(); // last cigar element
final CigarOperator nextOp = nextElement.getOperator(); // next cigar operator
final CigarOperator lastOp = lastElement.getOperator(); // last cigar operator
final int readOffset = state.getReadOffset(); // the base offset on this read
final boolean isBeforeDeletion = nextOp == CigarOperator.DELETION;
final boolean isAfterDeletion = lastOp == CigarOperator.DELETION;
final boolean isBeforeInsertion = nextOp == CigarOperator.INSERTION;
final boolean isAfterInsertion = lastOp == CigarOperator.INSERTION;
final boolean isNextToSoftClip = nextOp == CigarOperator.S || (state.getGenomeOffset() == 0 && read.getSoftStart() != read.getAlignmentStart());
int nextElementLength = nextElement.getLength();
if (op == CigarOperator.N) // N's are never added to any pileup
if (op == CigarOperator.N) // N's are never added to any pileup
continue;
if (op == CigarOperator.D) {
if (readInfo.includeReadsWithDeletionAtLoci()) { // only add deletions to the pileup if we are authorized to do so
pile.add(new PileupElement(read, readOffset, true, nextOp == CigarOperator.D, nextOp == CigarOperator.I, nextOp == CigarOperator.S || (state.getGenomeOffset() == 0 && read.getSoftStart() != read.getAlignmentStart()),
null,nextOp == CigarOperator.D? nextElementLength:-1));
if (readInfo.includeReadsWithDeletionAtLoci()) { // only add deletions to the pileup if we are authorized to do so
pile.add(new PileupElement(read, readOffset, true, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, null, nextOp == CigarOperator.D ? nextElementLength : -1));
size++;
nDeletions++;
if (read.getMappingQuality() == 0)
@ -484,11 +496,10 @@ public class LocusIteratorByState extends LocusIterator {
else {
if (!filterBaseInRead(read, location.getStart())) {
String insertedBaseString = null;
if (nextOp == CigarOperator.I) {
if (nextOp == CigarOperator.I)
insertedBaseString = new String(Arrays.copyOfRange(read.getReadBases(), readOffset + 1, readOffset + 1 + nextElement.getLength()));
}
pile.add(new PileupElement(read, readOffset, false, nextOp == CigarOperator.D, nextOp == CigarOperator.I, nextOp == CigarOperator.S || (state.getGenomeOffset() == 0 && read.getSoftStart() != read.getAlignmentStart()),
insertedBaseString,nextElementLength));
pile.add(new PileupElement(read, readOffset, false, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, insertedBaseString, nextElementLength));
size++;
if (read.getMappingQuality() == 0)
nMQ0Reads++;

View File

@ -0,0 +1,284 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.BitSetUtils;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.LinkedList;
import java.util.List;
/**
* This class provides all the functionality for the BitSet representation of the keys to the hash table of BQSR
*
* It also handles the event type "covariate" which is not exactly a covariate, but is added as a key to the hashmap. The Key Manager will
* add the event type as a bitset to the end of the covariate bitset key. This way, it won't get int the way of masking the information
* out of the key for the actual covariates, and having the covariates handle it. The key manager handles the event type.
*
* The keys represented by this key manager will always have the same order:
*
* RequiredCovariate1, RequiredCovariate2, ..., RequiredCovariateN, OptionalCovariate1, OptionalCovariateID, EventType
* RequiredCovariate1, RequiredCovariate2, ..., RequiredCovariateN, OptionalCovariate2, OptionalCovariateID, EventType
* ...
* RequiredCovariate1, RequiredCovariate2, ..., RequiredCovariateN, OptionalCovariateN, OptionalCovariateID, EventType
*
*
* Note that Optional Covariates are optional, and the Key Manager should operate without them if necessary.
*
* @author Mauricio Carneiro
* @since 3/6/12
*/
public class BQSRKeyManager {
private List<RequiredCovariateInfo> requiredCovariates;
private List<OptionalCovariateInfo> optionalCovariates;
private int nRequiredBits; // Number of bits used to represent the required covariates
private int nOptionalBits; // Number of bits used to represent the standard covaraites
private int nOptionalIDBits; // Number of bits used to represent the optional covariates IDs
private int totalNumberOfBits; // Sum of all of the above plus the event bits
private BitSet optionalCovariateMask; // Standard mask for optional covariates bitset
private BitSet optionalCovariateIDMask; // Standard mask for optional covariates order bitset
/**
* Initializes the KeyManager with the total number of covariates to use
*
* @param requiredCovariates the ordered list of required covariates
* @param optionalCovariates the ordered list of optional covariates
*/
public BQSRKeyManager(List<Covariate> requiredCovariates, List<Covariate> optionalCovariates) {
this.requiredCovariates = new ArrayList<RequiredCovariateInfo>(requiredCovariates.size()); // initialize the required covariates list
this.optionalCovariates = new ArrayList<OptionalCovariateInfo>(optionalCovariates.size()); // initialize the optional covariates list (size may be 0, it's okay)
nRequiredBits = 0;
for (Covariate required : requiredCovariates) { // create a list of required covariates with the extra information for key management
int nBits = required.numberOfBits(); // number of bits used by this covariate
BitSet mask = genericMask(nRequiredBits, nBits); // create a mask for this covariate
this.requiredCovariates.add(new RequiredCovariateInfo(nRequiredBits, nBits, mask, required)); // Create an object for this required covariate
nRequiredBits += nBits;
}
short i = 0;
nOptionalBits = 0;
for (Covariate optional : optionalCovariates) {
int nBits = optional.numberOfBits(); // number of bits used by this covariate
nOptionalBits = Math.max(nOptionalBits, nBits); // optional covariates are represented by the number of bits needed by biggest covariate
BitSet optionalID = BitSetUtils.bitSetFrom(i); // calculate the optional covariate ID for this covariate
this.optionalCovariates.add(new OptionalCovariateInfo(optionalID, optional)); // optional covariates have standardized mask and number of bits, so no need to store in the RequiredCovariateInfo object
i++;
}
nOptionalIDBits = BitSetUtils.numberOfBitsToRepresent(optionalCovariates.size()); // number of bits used to represent the covariate ID
optionalCovariateMask = genericMask(nRequiredBits, nOptionalBits); // the generic mask to extract optional covariate bits from the combined bitset
optionalCovariateIDMask = genericMask(nRequiredBits + nOptionalBits, nOptionalIDBits); // the generic mask to extract optional covariate ID bits from the combined bitset
totalNumberOfBits = nRequiredBits + nOptionalBits + nOptionalIDBits + bitsInEventType(); // total number of bits used in the final key
}
/**
* Generates one key per optional covariate.
*
* Keys include all required covariates, the standard covariate and the event type.
*
* Example allKeys:
* RG, QUAL, CYCLE, CONTEXT
*
* List of BitSets returned by this example (given eventType):
* RG, QUAL, CYCLE, EVENT
* RG, QUAL, CONTEXT, EVENT
*
* Note: If there are no optional covariates, only one bitset key will be returned with all the required covariates and the event type
*
* @param allKeys The keys in bitset representation for each covariate
* @param eventType The type of event described by this keyset (e.g. mismatches, insertions, deletions)
* @return one key in bitset representation per covariate
*/
public List<BitSet> bitSetsFromAllKeys(BitSet[] allKeys, EventType eventType) {
List<BitSet> allBitSets = new LinkedList<BitSet>(); // Generate one key per optional covariate
BitSet eventBitSet = BitSetUtils.bitSetFrom(eventType.index); // create a bitset with the event type
int eventTypeBitIndex = nRequiredBits + nOptionalBits + nOptionalIDBits; // Location in the bit set to add the event type bits
int covariateIndex = 0;
BitSet requiredKey = new BitSet(nRequiredBits); // This will be a bitset holding all the required keys, to replicate later on
for (RequiredCovariateInfo infoRequired : requiredCovariates)
addBitSetToKeyAtLocation(requiredKey, allKeys[covariateIndex++], infoRequired.bitsBefore); // Add all the required covariates to the key set
for (OptionalCovariateInfo infoOptional : optionalCovariates) {
BitSet covariateKey = allKeys[covariateIndex++]; // get the bitset from all keys
if (covariateKey == null)
continue; // do not add nulls to the final set of keys.
BitSet optionalKey = new BitSet(totalNumberOfBits); // create a new key for this optional covariate
optionalKey.or(requiredKey); // import all the required covariates
addBitSetToKeyAtLocation(optionalKey, covariateKey, nRequiredBits); // add the optional covariate right after the required covariates
addBitSetToKeyAtLocation(optionalKey, infoOptional.covariateID, nRequiredBits + nOptionalBits); // add the optional covariate ID right after the optional covarite
addBitSetToKeyAtLocation(optionalKey, eventBitSet, eventTypeBitIndex); // Add the event type
allBitSets.add(optionalKey); // add this key to the list of keys
}
if (optionalCovariates.size() == 0) { // special case when we have no optional covariates, add the event type to the required key (our only key)
addBitSetToKeyAtLocation(requiredKey, eventBitSet, eventTypeBitIndex); // Add the event type
allBitSets.add(requiredKey); // add this key to the list of keys
}
return allBitSets;
}
/**
* Generates one bitset key for the covariates represented in Object[] key
*
* The covariates will have the actual objects produced by the covariates (probably read from the recalibration data file)
* and will contain all required covariates and one (or none) optional covariates. Therefore, the product is one bitset key, not many.
*
* Example key:
* RG, QUAL, CYCLE, CYCLE_ID, EventType
*
* @param key list of objects produced by the required covariates followed by one or zero optional covariates.
* @return a bitset key representing these objects. Bitset encryption is done using the covariate's interface.
*/
public BitSet bitSetFromKey(Object[] key) {
BitSet bitSetKey = new BitSet(totalNumberOfBits);
int requiredCovariate = 0;
for (RequiredCovariateInfo infoRequired : requiredCovariates) {
BitSet covariateBitSet = infoRequired.covariate.bitSetFromKey(key[requiredCovariate++]); // create a bitset from the object key provided using the required covariate's interface
addBitSetToKeyAtLocation(bitSetKey, covariateBitSet, infoRequired.bitsBefore); // add it to the bitset key
}
if (optionalCovariates.size() > 0) {
int optionalCovariate = requiredCovariates.size(); // the optional covariate index in the key array
int covariateIDIndex = optionalCovariate + 1; // the optional covariate ID index is right after the optional covariate's
int covariateID = (Short) key[covariateIDIndex]; // get the optional covariate id
OptionalCovariateInfo infoOptional = optionalCovariates.get(covariateID); // so we can get the optional covariate information
BitSet covariateBitSet = infoOptional.covariate.bitSetFromKey(key[optionalCovariate]); // convert the optional covariate key into a bitset using the covariate's interface
addBitSetToKeyAtLocation(bitSetKey, covariateBitSet, nRequiredBits); // add the optional covariate right after the required covariates
addBitSetToKeyAtLocation(bitSetKey, infoOptional.covariateID, nRequiredBits + nOptionalBits); // add the optional covariate ID right after the optional covarite
}
int eventIndex = key.length - 1; // the event type is always the last key
int eventTypeBitIndex = nRequiredBits + nOptionalBits + nOptionalIDBits; // location in the bit set to add the event type bits
BitSet eventBitSet = bitSetFromEvent((EventType) key[eventIndex]); // get the bit set representation of the event type
addBitSetToKeyAtLocation(bitSetKey, eventBitSet, eventTypeBitIndex); // add the event type
return bitSetKey;
}
/**
* Generates a key set of objects from a combined bitset key.
*
* Masks out each covariate independently and decodes their values (Object) into a keyset
*
* @param key the bitset representation of the keys
* @return an object array with the values for each key
*/
public List<Object> keySetFrom(BitSet key) {
List<Object> objectKeys = new ArrayList<Object>();
for (RequiredCovariateInfo info : requiredCovariates) {
BitSet covariateBitSet = extractBitSetFromKey(key, info.mask, info.bitsBefore); // get the covariate's bitset
objectKeys.add(info.covariate.keyFromBitSet(covariateBitSet)); // convert the bitset to object using covariate's interface
}
if (optionalCovariates.size() > 0) {
BitSet covBitSet = extractBitSetFromKey(key, optionalCovariateMask, nRequiredBits); // mask out the covariate bit set
BitSet idbs = extractBitSetFromKey(key, optionalCovariateIDMask, nRequiredBits + nOptionalBits);// mask out the covariate order (to identify which covariate this is)
short id = BitSetUtils.shortFrom(idbs); // covert the id bitset into a short
Covariate covariate = optionalCovariates.get(id).covariate; // get the corresponding optional covariate object
objectKeys.add(covariate.keyFromBitSet(covBitSet)); // add the optional covariate to the key set
objectKeys.add(id); // add the covariate id
}
objectKeys.add(eventFromBitSet(key)); // add the event type object to the key set
return objectKeys;
}
/**
* Translates a masked bitset into a bitset starting at 0
*
* @param key the masked out bitset
* @param n the number of bits to chop
* @return a translated bitset starting at 0 for the covariate machinery to decode
*/
private BitSet chopNBitsFrom(BitSet key, int n) {
BitSet choppedKey = new BitSet();
for (int i = key.nextSetBit(0); i >= 0; i = key.nextSetBit(i + 1))
choppedKey.set(i - n); // Set every bit translocated to the beginning of the BitSet
return choppedKey;
}
/**
* Creates a mask for the requested covariate to extract the relevant bitset from a combined bitset key
*
* @param leadingBits the index of the covariate in the ordered covariate list
* @param nBits the number of bits needed by the Covariate to represent its values in BitSet form
* @return the bitset relevant to the covariate
*/
private BitSet genericMask(int leadingBits, int nBits) {
BitSet mask = new BitSet(leadingBits + nBits);
mask.set(leadingBits, leadingBits + nBits);
return mask;
}
/**
* Decodes the event type (enum) from the full bitset key
*
* @param fullKey the full key of all covariates + event type
* @return the decoded event type.
*/
private EventType eventFromBitSet(BitSet fullKey) {
BitSet eventKey = new BitSet();
int firstBitIndex = nRequiredBits + nOptionalBits + nOptionalIDBits;
for (int i = fullKey.nextSetBit(firstBitIndex); i >= 0; i = fullKey.nextSetBit(i + 1))
eventKey.set(i - firstBitIndex);
return EventType.eventFrom(BitSetUtils.shortFrom(eventKey));
}
private BitSet bitSetFromEvent(EventType eventType) {
return BitSetUtils.bitSetFrom(eventType.index);
}
private int bitsInEventType() {
return BitSetUtils.numberOfBitsToRepresent(EventType.values().length);
}
private void addBitSetToKeyAtLocation(BitSet key, BitSet bitSet, int location) {
for (int j = bitSet.nextSetBit(0); j >= 0; j = bitSet.nextSetBit(j + 1))
key.set(j + location); // translate the bits set in the key to their corresponding position in the full key
}
private BitSet extractBitSetFromKey (BitSet key, BitSet mask, int leadingBits) {
BitSet bitSet = (BitSet) key.clone();
bitSet.and(mask);
return chopNBitsFrom(bitSet, leadingBits);
}
/**
* Aggregate information for each Covariate
*/
class RequiredCovariateInfo {
public int bitsBefore; // number of bits before this covariate in the combined bitset key
public int nBits; // number of bits used by this covariate (cached access to covariate.nBits())
public BitSet mask; // the mask to pull out this covariate from the combined bitset key ( a mask made from bitsBefore and nBits )
public Covariate covariate; // this allows reverse lookup of the Covariates in order
RequiredCovariateInfo(int bitsBefore, int nBits, BitSet mask, Covariate covariate) {
this.bitsBefore = bitsBefore;
this.nBits = nBits;
this.mask = mask;
this.covariate = covariate;
}
}
class OptionalCovariateInfo {
public BitSet covariateID; // cache the covariate ID
public Covariate covariate;
OptionalCovariateInfo(BitSet covariateID, Covariate covariate) {
this.covariateID = covariateID;
this.covariate = covariate;
}
}
}

View File

@ -27,6 +27,8 @@ package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.BitSetUtils;
import org.broadinstitute.sting.utils.clipping.ClippingRepresentation;
import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -46,7 +48,9 @@ public class ContextCovariate implements StandardCovariate {
private int deletionsContextSize;
private final BitSet NO_CONTEXT_BITSET = BitSetUtils.bitSetFrom(-1L);
protected final String NO_CONTEXT_VALUE = "N"; // protected so we can UNIT TEST it
// protected final String NO_CONTEXT_VALUE = "N"; // protected so we can UNIT TEST it
private byte LOW_QUAL_TAIL;
// Initialize any member variables using the command-line arguments passed to the walkers
@Override
@ -55,18 +59,22 @@ public class ContextCovariate implements StandardCovariate {
insertionsContextSize = RAC.INSERTIONS_CONTEXT_SIZE;
deletionsContextSize = RAC.DELETIONS_CONTEXT_SIZE;
LOW_QUAL_TAIL = RAC.LOW_QUAL_TAIL;
if (mismatchesContextSize <= 0 || insertionsContextSize <= 0 || deletionsContextSize <= 0)
throw new UserException(String.format("Context Size must be positive, if you don't want to use the context covariate, just turn it off instead. Mismatches: %d Insertions: %d Deletions:%d", mismatchesContextSize, insertionsContextSize, deletionsContextSize));
}
@Override
public CovariateValues getValues(final GATKSAMRecord read) {
public CovariateValues getValues(GATKSAMRecord read) {
int l = read.getReadLength();
BitSet[] mismatches = new BitSet[l];
BitSet[] insertions = new BitSet[l];
BitSet[] deletions = new BitSet[l];
read = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS); // Write N's over the low quality tail of the reads to avoid adding them into the context
final boolean negativeStrand = read.getReadNegativeStrandFlag();
byte[] bases = read.getReadBases();
if (negativeStrand)
@ -94,11 +102,17 @@ public class ContextCovariate implements StandardCovariate {
@Override
public String keyFromBitSet(BitSet key) {
if (key.equals(NO_CONTEXT_BITSET))
return NO_CONTEXT_VALUE;
if (key == null) // this can only happen in test routines because we do not propagate null keys to the csv file
return null;
return BitSetUtils.dnaFrom(key);
}
@Override
public BitSet bitSetFromKey(Object key) {
return BitSetUtils.bitSetFrom((String) key);
}
@Override
public int numberOfBits() {
return Long.bitCount(-1L);
@ -113,7 +127,7 @@ public class ContextCovariate implements StandardCovariate {
* @return the bitSet representing the Context
*/
private BitSet contextWith(byte[] bases, int offset, int contextSize) {
BitSet result = NO_CONTEXT_BITSET;
BitSet result = null;
if (offset >= contextSize) {
String context = new String(Arrays.copyOfRange(bases, offset - contextSize, offset));
if (!context.contains("N"))

View File

@ -71,6 +71,17 @@ public interface Covariate {
*/
public String keyFromBitSet(BitSet key);
/**
* Converts a key into a bitset
*
* Only necessary for on-the-fly recalibration when you have the object, but need to store it in memory in bitset format. For counting covariates
* the getValues method already returns all values in BitSet format.
*
* @param key the object corresponding to the covariate
* @return a bitset representation of the object
*/
public BitSet bitSetFromKey(Object key);
/**
* Each covariate should determine how many bits are necessary to encode it's data
*

View File

@ -1,108 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.BitSet;
import java.util.HashMap;
/**
* The object temporarily held by a read that describes all of it's covariates.
*
* In essence, this is an array of CovariateValues, but it also has some functionality to deal with the optimizations of the NestedHashMap
*
* @author Mauricio Carneiro
* @since 2/8/12
*/
public class CovariateKeySet {
private BitSet[][] mismatchesKeySet;
private BitSet[][] insertionsKeySet;
private BitSet[][] deletionsKeySet;
private int nextCovariateIndex;
// private static String mismatchesCovariateName = "M";
// private static String insertionsCovariateName = "I";
// private static String deletionsCovariateName = "D";
//
// private static BitSet mismatchesCovariateBitSet = BitSetUtils.bitSetFrom(0);
// private static BitSet insertionsCovariateBitSet = BitSetUtils.bitSetFrom(1);
// private static BitSet deletionsCovariateBitSet = BitSetUtils.bitSetFrom(2);
private static HashMap<String, RecalDataManager.BaseRecalibrationType> nameToType = new HashMap<String, RecalDataManager.BaseRecalibrationType>();
private static HashMap<BitSet, String> bitSetToName = new HashMap<BitSet, String>();
public CovariateKeySet(int readLength, int numberOfCovariates) {
// numberOfCovariates++; // +1 because we are adding the mismatch covariate (to comply with the molten table format)
this.mismatchesKeySet = new BitSet[readLength][numberOfCovariates];
this.insertionsKeySet = new BitSet[readLength][numberOfCovariates];
this.deletionsKeySet = new BitSet[readLength][numberOfCovariates];
// initializeCovariateKeySet(this.mismatchesKeySet, mismatchesCovariateBitSet);
// initializeCovariateKeySet(this.insertionsKeySet, insertionsCovariateBitSet);
// initializeCovariateKeySet(this.deletionsKeySet, deletionsCovariateBitSet);
this.nextCovariateIndex = 0;
// nameToType.put(mismatchesCovariateName, RecalDataManager.BaseRecalibrationType.BASE_SUBSTITUTION);
// nameToType.put(insertionsCovariateName, RecalDataManager.BaseRecalibrationType.BASE_INSERTION);
// nameToType.put(deletionsCovariateName, RecalDataManager.BaseRecalibrationType.BASE_DELETION);
//
// bitSetToName.put(BitSetUtils.bitSetFrom(0), mismatchesCovariateName);
// bitSetToName.put(BitSetUtils.bitSetFrom(1), insertionsCovariateName);
// bitSetToName.put(BitSetUtils.bitSetFrom(2), deletionsCovariateName);
}
public void addCovariate(CovariateValues covariate) {
transposeCovariateValues(mismatchesKeySet, covariate.getMismatches());
transposeCovariateValues(insertionsKeySet, covariate.getInsertions());
transposeCovariateValues(deletionsKeySet, covariate.getDeletions());
nextCovariateIndex++;
}
public static RecalDataManager.BaseRecalibrationType errorModelFrom(final String modelString) {
if (!nameToType.containsKey(modelString))
throw new ReviewedStingException("Unrecognized Base Recalibration model string: " + modelString);
return nameToType.get(modelString);
}
public static String eventNameFrom(final BitSet bitSet) {
if (!bitSetToName.containsKey(bitSet))
throw new ReviewedStingException("Unrecognized Event Type BitSet: " + bitSet);
return bitSetToName.get(bitSet);
}
public BitSet[] getKeySet(final int readPosition, final RecalDataManager.BaseRecalibrationType errorModel) {
switch (errorModel) {
case BASE_SUBSTITUTION:
return getMismatchesKeySet(readPosition);
case BASE_INSERTION:
return getInsertionsKeySet(readPosition);
case BASE_DELETION:
return getDeletionsKeySet(readPosition);
default:
throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel);
}
}
public BitSet[] getMismatchesKeySet(int readPosition) {
return mismatchesKeySet[readPosition];
}
public BitSet[] getInsertionsKeySet(int readPosition) {
return insertionsKeySet[readPosition];
}
public BitSet[] getDeletionsKeySet(int readPosition) {
return deletionsKeySet[readPosition];
}
private void transposeCovariateValues(BitSet[][] keySet, BitSet[] covariateValues) {
for (int i = 0; i < covariateValues.length; i++)
keySet[i][nextCovariateIndex] = covariateValues[i];
}
private void initializeCovariateKeySet(BitSet[][] keySet, BitSet covariateName) {
int readLength = keySet.length;
int lastCovariateIndex = keySet[0].length - 1;
for (int i = 0; i < readLength; i++)
keySet[i][lastCovariateIndex] = covariateName;
}
}

View File

@ -69,35 +69,12 @@ public class CycleCovariate implements StandardCovariate {
final short init;
final short increment;
if (!read.getReadNegativeStrandFlag()) {
// Differentiate between first and second of pair.
// The sequencing machine cycle keeps incrementing for the second read in a pair. So it is possible for a read group
// to have an error affecting quality at a particular cycle on the first of pair which carries over to the second of pair.
// Therefore the cycle covariate must differentiate between first and second of pair reads.
// This effect can not be corrected by pulling out the first of pair and second of pair flags into a separate covariate because
// the current sequential model would consider the effects independently instead of jointly.
if (read.getReadPairedFlag() && read.getSecondOfPairFlag()) {
//second of pair, positive strand
init = -1;
increment = -1;
}
else {
//first of pair, positive strand
init = 1;
increment = 1;
}
init = 1;
increment = 1;
}
else {
if (read.getReadPairedFlag() && read.getSecondOfPairFlag()) {
//second of pair, negative strand
init = (short) -read.getReadLength();
increment = 1;
}
else {
//first of pair, negative strand
init = (short) read.getReadLength();
increment = -1;
}
init = (short) read.getReadLength();
increment = -1;
}
short cycle = init;
@ -121,7 +98,7 @@ public class CycleCovariate implements StandardCovariate {
// the current sequential model would consider the effects independently instead of jointly.
final boolean multiplyByNegative1 = read.getReadPairedFlag() && read.getSecondOfPairFlag();
short cycle = multiplyByNegative1 ? (short) -1 : 1;
short cycle = multiplyByNegative1 ? (short) -1 : 1; // todo -- check if this is the right behavior for mate paired reads in flow cycle platforms.
// BUGBUG: Consider looking at degradation of base quality scores in homopolymer runs to detect when the cycle incremented even though the nucleotide didn't change
// For example, AAAAAAA was probably read in two flow cycles but here we count it as one
@ -201,7 +178,7 @@ public class CycleCovariate implements StandardCovariate {
// Used to get the covariate's value from input csv file during on-the-fly recalibration
@Override
public final Object getValue(final String str) {
return Integer.parseInt(str);
return Short.parseShort(str);
}
@Override
@ -209,6 +186,11 @@ public class CycleCovariate implements StandardCovariate {
return String.format("%d", BitSetUtils.shortFrom(key));
}
@Override
public BitSet bitSetFromKey(Object key) {
return BitSetUtils.bitSetFrom((Short) key);
}
@Override
public int numberOfBits() {
return BitSetUtils.numberOfBitsToRepresent(2 * Short.MAX_VALUE); // positive and negative

View File

@ -0,0 +1,43 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
public enum EventType {
BASE_SUBSTITUTION(0, "M"),
BASE_INSERTION(1, "I"),
BASE_DELETION(2, "D");
public int index;
public String representation;
private EventType(int index, String representation) {
this.index = index;
this.representation = representation;
}
public static EventType eventFrom(int index) {
switch (index) {
case 0:
return BASE_SUBSTITUTION;
case 1:
return BASE_INSERTION;
case 2:
return BASE_DELETION;
default:
throw new ReviewedStingException(String.format("Event %d does not exist.", index));
}
}
public static EventType eventFrom(String event) {
for (EventType eventType : EventType.values())
if (eventType.representation.equals(event))
return eventType;
throw new ReviewedStingException(String.format("Event %s does not exist.", event));
}
@Override
public String toString() {
return representation;
}
}

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.BitSetUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.BitSet;
@ -40,8 +41,6 @@ import java.util.BitSet;
public class QualityScoreCovariate implements RequiredCovariate {
private final int MAX_QUAL = 50;
// Initialize any member variables using the command-line arguments passed to the walkers
@Override
public void initialize(final RecalibrationArgumentCollection RAC) {
@ -71,7 +70,7 @@ public class QualityScoreCovariate implements RequiredCovariate {
// Used to get the covariate's value from input csv file during on-the-fly recalibration
@Override
public final Object getValue(final String str) {
return Integer.parseInt(str);
return Byte.parseByte(str);
}
@Override
@ -79,8 +78,13 @@ public class QualityScoreCovariate implements RequiredCovariate {
return String.format("%d", BitSetUtils.longFrom(key));
}
@Override
public BitSet bitSetFromKey(Object key) {
return BitSetUtils.bitSetFrom((Byte) key);
}
@Override
public int numberOfBits() {
return BitSetUtils.numberOfBitsToRepresent(MAX_QUAL);
return BitSetUtils.numberOfBitsToRepresent(QualityUtils.MAX_QUAL_SCORE);
}
}

View File

@ -0,0 +1,65 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.BitSet;
/**
* The object temporarily held by a read that describes all of it's covariates.
*
* In essence, this is an array of CovariateValues, but it also has some functionality to deal with the optimizations of the NestedHashMap
*
* @author Mauricio Carneiro
* @since 2/8/12
*/
public class ReadCovariates {
private BitSet[][] mismatchesKeySet;
private BitSet[][] insertionsKeySet;
private BitSet[][] deletionsKeySet;
private int nextCovariateIndex;
public ReadCovariates(int readLength, int numberOfCovariates) {
this.mismatchesKeySet = new BitSet[readLength][numberOfCovariates];
this.insertionsKeySet = new BitSet[readLength][numberOfCovariates];
this.deletionsKeySet = new BitSet[readLength][numberOfCovariates];
this.nextCovariateIndex = 0;
}
public void addCovariate(CovariateValues covariate) {
transposeCovariateValues(mismatchesKeySet, covariate.getMismatches());
transposeCovariateValues(insertionsKeySet, covariate.getInsertions());
transposeCovariateValues(deletionsKeySet, covariate.getDeletions());
nextCovariateIndex++;
}
public BitSet[] getKeySet(final int readPosition, final EventType errorModel) {
switch (errorModel) {
case BASE_SUBSTITUTION:
return getMismatchesKeySet(readPosition);
case BASE_INSERTION:
return getInsertionsKeySet(readPosition);
case BASE_DELETION:
return getDeletionsKeySet(readPosition);
default:
throw new ReviewedStingException("Unrecognized Base Recalibration type: " + errorModel);
}
}
public BitSet[] getMismatchesKeySet(int readPosition) {
return mismatchesKeySet[readPosition];
}
public BitSet[] getInsertionsKeySet(int readPosition) {
return insertionsKeySet[readPosition];
}
public BitSet[] getDeletionsKeySet(int readPosition) {
return deletionsKeySet[readPosition];
}
private void transposeCovariateValues(BitSet[][] keySet, BitSet[] covariateValues) {
for (int i = 0; i < covariateValues.length; i++)
keySet[i][nextCovariateIndex] = covariateValues[i];
}
}

View File

@ -55,16 +55,7 @@ public class ReadGroupCovariate implements RequiredCovariate {
public CovariateValues getValues(final GATKSAMRecord read) {
final int l = read.getReadLength();
final String readGroupId = read.getReadGroup().getReadGroupId();
short shortId;
if (readGroupLookupTable.containsKey(readGroupId))
shortId = readGroupLookupTable.get(readGroupId);
else {
shortId = nextId;
readGroupLookupTable.put(readGroupId, nextId);
readGroupReverseLookupTable.put(nextId, readGroupId);
nextId++;
}
BitSet rg = BitSetUtils.bitSetFrom(shortId); // All objects must output a BitSet, so we convert the "compressed" representation of the Read Group into a bitset
BitSet rg = bitSetForReadGroup(readGroupId); // All objects must output a BitSet, so we convert the "compressed" representation of the Read Group into a bitset
BitSet[] readGroups = new BitSet[l];
Arrays.fill(readGroups, rg);
return new CovariateValues(readGroups, readGroups, readGroups);
@ -81,6 +72,11 @@ public class ReadGroupCovariate implements RequiredCovariate {
return decodeReadGroup((short) BitSetUtils.longFrom(key));
}
@Override
public BitSet bitSetFromKey(Object key) {
return bitSetForReadGroup((String) key);
}
public final String decodeReadGroup(final short id) {
return readGroupReverseLookupTable.get(id);
}
@ -89,6 +85,19 @@ public class ReadGroupCovariate implements RequiredCovariate {
public int numberOfBits() {
return BitSetUtils.numberOfBitsToRepresent(Short.MAX_VALUE);
}
private BitSet bitSetForReadGroup(String readGroupId) {
short shortId;
if (readGroupLookupTable.containsKey(readGroupId))
shortId = readGroupLookupTable.get(readGroupId);
else {
shortId = nextId;
readGroupLookupTable.put(readGroupId, nextId);
readGroupReverseLookupTable.put(nextId, readGroupId);
nextId++;
}
return BitSetUtils.bitSetFrom(shortId);
}
}

View File

@ -53,50 +53,18 @@ import java.util.Map;
*/
public class RecalDataManager {
public final NestedHashMap nestedHashMap; // The full dataset
private final HashMap<BaseRecalibrationType, NestedHashMap> dataCollapsedReadGroup; // Table where everything except read group has been collapsed
private final HashMap<BaseRecalibrationType, NestedHashMap> dataCollapsedQualityScore; // Table where everything except read group and quality score has been collapsed
private final HashMap<BaseRecalibrationType, ArrayList<NestedHashMap>> dataCollapsedByCovariate; // Tables where everything except read group, quality score, and given covariate has been collapsed
public final NestedHashMap nestedHashMap; // The full dataset
private final HashMap<EventType, NestedHashMap> dataCollapsedReadGroup; // Table where everything except read group has been collapsed
private final HashMap<EventType, NestedHashMap> dataCollapsedQualityScore; // Table where everything except read group and quality score has been collapsed
private final HashMap<EventType, ArrayList<NestedHashMap>> dataCollapsedByCovariate; // Tables where everything except read group, quality score, and given covariate has been collapsed
public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // The tag that holds the original quality scores
public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams
public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
public final static String ORIGINAL_QUAL_ATTRIBUTE_TAG = "OQ"; // The tag that holds the original quality scores
public final static String COLOR_SPACE_QUAL_ATTRIBUTE_TAG = "CQ"; // The tag that holds the color space quality scores for SOLID bams
public final static String COLOR_SPACE_ATTRIBUTE_TAG = "CS"; // The tag that holds the color space for SOLID bams
public final static String COLOR_SPACE_INCONSISTENCY_TAG = "ZC"; // A new tag made up for the recalibrator which will hold an array of ints which say if this base is inconsistent with its color
private static boolean warnUserNullPlatform = false;
private static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
public enum BaseRecalibrationType {
BASE_SUBSTITUTION(0, "M"),
BASE_INSERTION(1, "I"),
BASE_DELETION(2, "D");
public int index;
public String representation;
private BaseRecalibrationType(int index, String representation) {
this.index = index;
this.representation = representation;
}
public static BaseRecalibrationType eventFrom(int index) {
switch (index) {
case 0:
return BASE_SUBSTITUTION;
case 1:
return BASE_INSERTION;
case 2:
return BASE_DELETION;
default:
throw new ReviewedStingException(String.format("Event %d does not exist.", index));
}
}
@Override
public String toString() {
return representation;
}
}
private static final String COVARS_ATTRIBUTE = "COVARS"; // used to store covariates array as a temporary attribute inside GATKSAMRecord.\
public enum SOLID_RECAL_MODE {
/**
@ -142,10 +110,10 @@ public class RecalDataManager {
public RecalDataManager(final boolean createCollapsedTables, final int numCovariates) {
if (createCollapsedTables) { // Initialize all the collapsed tables, only used by on-the-fly recalibration
nestedHashMap = null;
dataCollapsedReadGroup = new HashMap<BaseRecalibrationType, NestedHashMap>();
dataCollapsedQualityScore = new HashMap<BaseRecalibrationType, NestedHashMap>();
dataCollapsedByCovariate = new HashMap<BaseRecalibrationType, ArrayList<NestedHashMap>>();
for (final BaseRecalibrationType errorModel : BaseRecalibrationType.values()) {
dataCollapsedReadGroup = new HashMap<EventType, NestedHashMap>();
dataCollapsedQualityScore = new HashMap<EventType, NestedHashMap>();
dataCollapsedByCovariate = new HashMap<EventType, ArrayList<NestedHashMap>>();
for (final EventType errorModel : EventType.values()) {
dataCollapsedReadGroup.put(errorModel, new NestedHashMap());
dataCollapsedQualityScore.put(errorModel, new NestedHashMap());
dataCollapsedByCovariate.put(errorModel, new ArrayList<NestedHashMap>());
@ -162,100 +130,10 @@ public class RecalDataManager {
}
}
public static CovariateKeySet covariateKeySetFrom(GATKSAMRecord read) {
return (CovariateKeySet) read.getTemporaryAttribute(COVARS_ATTRIBUTE);
public static ReadCovariates covariateKeySetFrom(GATKSAMRecord read) {
return (ReadCovariates) read.getTemporaryAttribute(COVARS_ATTRIBUTE);
}
/**
* Add the given mapping to all of the collapsed hash tables
*
* @param key The list of comparables that is the key for this mapping
* @param fullDatum The RecalDatum which is the data for this mapping
* @param PRESERVE_QSCORES_LESS_THAN The threshold in report quality for adding to the aggregate collapsed table
*/
public final void addToAllTables(final Object[] key, final RecalDatum fullDatum, final int PRESERVE_QSCORES_LESS_THAN, final BaseRecalibrationType errorModel) {
// The full dataset isn't actually ever used for anything because of the sequential calculation so no need to keep the full data HashMap around
//data.put(key, thisDatum); // add the mapping to the main table
final int qualityScore = Integer.parseInt(key[1].toString());
final Object[] readGroupCollapsedKey = new Object[1];
final Object[] qualityScoreCollapsedKey = new Object[2];
final Object[] covariateCollapsedKey = new Object[3];
RecalDatum collapsedDatum;
// Create dataCollapsedReadGroup, the table where everything except read group has been collapsed
if (qualityScore >= PRESERVE_QSCORES_LESS_THAN) {
readGroupCollapsedKey[0] = key[0]; // Make a new key with just the read group
collapsedDatum = (RecalDatum) dataCollapsedReadGroup.get(errorModel).get(readGroupCollapsedKey);
if (collapsedDatum == null) {
dataCollapsedReadGroup.get(errorModel).put(new RecalDatum(fullDatum), readGroupCollapsedKey);
}
else {
collapsedDatum.combine(fullDatum); // using combine instead of increment in order to calculate overall aggregateQReported
}
}
// Create dataCollapsedQuality, the table where everything except read group and quality score has been collapsed
qualityScoreCollapsedKey[0] = key[0]; // Make a new key with the read group ...
qualityScoreCollapsedKey[1] = key[1]; // and quality score
collapsedDatum = (RecalDatum) dataCollapsedQualityScore.get(errorModel).get(qualityScoreCollapsedKey);
if (collapsedDatum == null) {
dataCollapsedQualityScore.get(errorModel).put(new RecalDatum(fullDatum), qualityScoreCollapsedKey);
}
else {
collapsedDatum.increment(fullDatum);
}
// Create dataCollapsedByCovariate's, the tables where everything except read group, quality score, and given covariate has been collapsed
for (int iii = 0; iii < dataCollapsedByCovariate.get(errorModel).size(); iii++) {
covariateCollapsedKey[0] = key[0]; // Make a new key with the read group ...
covariateCollapsedKey[1] = key[1]; // and quality score ...
final Object theCovariateElement = key[iii + 2]; // and the given covariate
if (theCovariateElement != null) {
covariateCollapsedKey[2] = theCovariateElement;
collapsedDatum = (RecalDatum) dataCollapsedByCovariate.get(errorModel).get(iii).get(covariateCollapsedKey);
if (collapsedDatum == null) {
dataCollapsedByCovariate.get(errorModel).get(iii).put(new RecalDatum(fullDatum), covariateCollapsedKey);
}
else {
collapsedDatum.increment(fullDatum);
}
}
}
}
/**
* Loop over all the collapsed tables and turn the recalDatums found there into an empirical quality score
* that will be used in the sequential calculation in TableRecalibrationWalker
*
* @param smoothing The smoothing parameter that goes into empirical quality score calculation
* @param maxQual At which value to cap the quality scores
*/
public final void generateEmpiricalQualities(final int smoothing, final int maxQual) {
for (final BaseRecalibrationType errorModel : BaseRecalibrationType.values()) {
recursivelyGenerateEmpiricalQualities(dataCollapsedReadGroup.get(errorModel).data, smoothing, maxQual);
recursivelyGenerateEmpiricalQualities(dataCollapsedQualityScore.get(errorModel).data, smoothing, maxQual);
for (NestedHashMap map : dataCollapsedByCovariate.get(errorModel)) {
recursivelyGenerateEmpiricalQualities(map.data, smoothing, maxQual);
checkForSingletons(map.data);
}
}
}
private void recursivelyGenerateEmpiricalQualities(final Map data, final int smoothing, final int maxQual) {
for (Object comp : data.keySet()) {
final Object val = data.get(comp);
if (val instanceof RecalDatum) { // We are at the end of the nested hash maps
((RecalDatum) val).calcCombinedEmpiricalQuality(smoothing, maxQual);
}
else { // Another layer in the nested hash map
recursivelyGenerateEmpiricalQualities((Map) val, smoothing, maxQual);
}
}
}
private void checkForSingletons(final Map data) {
// todo -- this looks like it's better just as a data.valueSet() call?
@ -279,7 +157,7 @@ public class RecalDataManager {
* @param covariate Which covariate indexes the desired collapsed HashMap
* @return The desired collapsed HashMap
*/
public final NestedHashMap getCollapsedTable(final int covariate, final BaseRecalibrationType errorModel) {
public final NestedHashMap getCollapsedTable(final int covariate, final EventType errorModel) {
if (covariate == 0) {
return dataCollapsedReadGroup.get(errorModel); // Table where everything except read group has been collapsed
}
@ -652,13 +530,13 @@ public class RecalDataManager {
public static void computeCovariates(final GATKSAMRecord read, final List<Covariate> requestedCovariates) {
final int numRequestedCovariates = requestedCovariates.size();
final int readLength = read.getReadLength();
final CovariateKeySet covariateKeySet = new CovariateKeySet(readLength, numRequestedCovariates);
final ReadCovariates readCovariates = new ReadCovariates(readLength, numRequestedCovariates);
// Loop through the list of requested covariates and compute the values of each covariate for all positions in this read
for (Covariate covariate : requestedCovariates)
covariateKeySet.addCovariate(covariate.getValues(read));
readCovariates.addCovariate(covariate.getValues(read));
read.setTemporaryAttribute(COVARS_ATTRIBUTE, covariateKeySet);
read.setTemporaryAttribute(COVARS_ATTRIBUTE, readCovariates);
}
/**

View File

@ -142,6 +142,11 @@ public class RecalibrationArgumentCollection {
@Argument(fullName = "deletions_default_quality", shortName = "ddq", doc = "default quality for the base deletions covariate", required = false)
public byte DELETIONS_DEFAULT_QUALITY = 45;
@Argument(fullName = "low_quality_tail", shortName = "lqt", doc = "minimum quality for the bases in the tail of the reads to be considered", required = false)
public byte LOW_QUAL_TAIL = 2;
@Hidden
@Argument(fullName = "default_platform", shortName = "dP", required = false, doc = "If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")
public String DEFAULT_PLATFORM = null;
@ -149,4 +154,5 @@ public class RecalibrationArgumentCollection {
@Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
public String FORCE_PLATFORM = null;
}

View File

@ -30,7 +30,10 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.StingException;
@ -203,7 +206,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
public class BAQedPileupElement extends PileupElement {
public BAQedPileupElement( final PileupElement PE ) {
super(PE.getRead(), PE.getOffset(), PE.isDeletion(), PE.isBeforeDeletion(), PE.isBeforeInsertion(), PE.isNextToSoftClip());
super(PE.getRead(), PE.getOffset(), PE.isDeletion(), PE.isBeforeDeletion(), PE.isAfterDeletion(), PE.isBeforeInsertion(), PE.isAfterInsertion(), PE.isNextToSoftClip());
}
@Override

View File

@ -4,7 +4,7 @@ import com.google.java.contract.Requires;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalDataManager;
import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -320,8 +320,8 @@ public class ClippingOp {
byte[] newBaseDeletionQuals = new byte[newLength];
System.arraycopy(read.getBaseInsertionQualities(), copyStart, newBaseInsertionQuals, 0, newLength);
System.arraycopy(read.getBaseDeletionQualities(), copyStart, newBaseDeletionQuals, 0, newLength);
hardClippedRead.setBaseQualities(newBaseInsertionQuals, RecalDataManager.BaseRecalibrationType.BASE_INSERTION);
hardClippedRead.setBaseQualities(newBaseDeletionQuals, RecalDataManager.BaseRecalibrationType.BASE_DELETION);
hardClippedRead.setBaseQualities(newBaseInsertionQuals, EventType.BASE_INSERTION);
hardClippedRead.setBaseQualities(newBaseDeletionQuals, EventType.BASE_DELETION);
}
return hardClippedRead;

View File

@ -231,15 +231,16 @@ public class ReadClipper {
/**
* Hard clips any contiguous tail (left, right or both) with base quality lower than lowQual.
* Clips any contiguous tail (left, right or both) with base quality lower than lowQual using the desired algorithm.
*
* This function will look for low quality tails and hard clip them away. A low quality tail
* ends when a base has base quality greater than lowQual.
*
* @param algorithm the algorithm to use (HardClip, SoftClip, Write N's,...)
* @param lowQual every base quality lower than or equal to this in the tail of the read will be hard clipped
* @return a new read without low quality tails
*/
private GATKSAMRecord hardClipLowQualEnds(byte lowQual) {
private GATKSAMRecord clipLowQualEnds(ClippingRepresentation algorithm, byte lowQual) {
if (read.isEmpty())
return read;
@ -254,7 +255,6 @@ public class ReadClipper {
// if the entire read should be clipped, then return an empty read.
if (leftClipIndex > rightClipIndex)
return GATKSAMRecord.emptyRead(read);
// return (new GATKSAMRecord(read.getHeader()));
if (rightClipIndex < read.getReadLength() - 1) {
this.addOp(new ClippingOp(rightClipIndex + 1, read.getReadLength() - 1));
@ -262,11 +262,18 @@ public class ReadClipper {
if (leftClipIndex > 0 ) {
this.addOp(new ClippingOp(0, leftClipIndex - 1));
}
return this.clipRead(ClippingRepresentation.HARDCLIP_BASES);
return this.clipRead(algorithm);
}
private GATKSAMRecord hardClipLowQualEnds(byte lowQual) {
return this.clipLowQualEnds(ClippingRepresentation.HARDCLIP_BASES, lowQual);
}
public static GATKSAMRecord hardClipLowQualEnds(GATKSAMRecord read, byte lowQual) {
return (new ReadClipper(read)).hardClipLowQualEnds(lowQual);
}
public static GATKSAMRecord clipLowQualEnds(GATKSAMRecord read, byte lowQual, ClippingRepresentation algorithm) {
return (new ReadClipper(read)).clipLowQualEnds(algorithm, lowQual);
}
/**

View File

@ -4,7 +4,7 @@ import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalDataManager;
import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
@ -203,8 +203,8 @@ public class FragmentUtils {
insertionQuals[iii] = secondReadInsertionQuals[iii-firstReadStop];
deletionQuals[iii] = secondReadDeletionQuals[iii-firstReadStop];
}
returnRead.setBaseQualities( insertionQuals, RecalDataManager.BaseRecalibrationType.BASE_INSERTION );
returnRead.setBaseQualities( deletionQuals, RecalDataManager.BaseRecalibrationType.BASE_DELETION );
returnRead.setBaseQualities( insertionQuals, EventType.BASE_INSERTION );
returnRead.setBaseQualities( deletionQuals, EventType.BASE_DELETION );
}
final ArrayList<GATKSAMRecord> returnList = new ArrayList<GATKSAMRecord>();

View File

@ -177,7 +177,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
for (int i = 0; i < reads.size(); i++) {
GATKSAMRecord read = reads.get(i);
int offset = offsets.get(i);
pileup.add(createNewPileupElement(read, offset, false, false, false, false)); // only used to create fake pileups for testing so ancillary information is not important
pileup.add(createNewPileupElement(read, offset, false, false, false, false, false, false)); // only used to create fake pileups for testing so ancillary information is not important
}
return pileup;
@ -196,7 +196,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
UnifiedPileupElementTracker<PE> pileup = new UnifiedPileupElementTracker<PE>();
for (GATKSAMRecord read : reads) {
pileup.add(createNewPileupElement(read, offset, false, false, false, false)); // only used to create fake pileups for testing so ancillary information is not important
pileup.add(createNewPileupElement(read, offset, false, false, false, false, false, false)); // only used to create fake pileups for testing so ancillary information is not important
}
return pileup;
@ -204,8 +204,8 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
protected abstract AbstractReadBackedPileup<RBP, PE> createNewPileup(GenomeLoc loc, PileupElementTracker<PE> pileupElementTracker);
protected abstract PE createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion, boolean isBeforeDeletion, boolean isBeforeInsertion, boolean isNextToSoftClip);
protected abstract PE createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion, boolean isBeforeDeletion, boolean isBeforeInsertion, boolean isNextToSoftClip, String nextEventBases, int nextEventLength );
protected abstract PE createNewPileupElement(final GATKSAMRecord read, final int offset, final boolean isDeletion, final boolean isBeforeDeletion, final boolean isAfterDeletion, final boolean isBeforeInsertion, final boolean isAfterInsertion, final boolean isNextToSoftClip);
protected abstract PE createNewPileupElement(final GATKSAMRecord read, final int offset, final boolean isDeletion, final boolean isBeforeDeletion, final boolean isAfterDeletion, final boolean isBeforeInsertion, final boolean isAfterInsertion, final boolean isNextToSoftClip, final String nextEventBases, final int nextEventLength );
// --------------------------------------------------------
//

View File

@ -48,7 +48,7 @@ public class ExtendedEventPileupElement extends PileupElement {
public ExtendedEventPileupElement(GATKSAMRecord read, int offset, int eventLength, String eventBases, Type type) {
super(read, offset, type == Type.DELETION, false, false, false,null,-1); // extended events are slated for removal
super(read, offset, type == Type.DELETION, false, false, false, false, false, null, -1); // extended events are slated for removal
this.read = read;
this.offset = offset;
this.eventLength = eventLength;

View File

@ -21,15 +21,17 @@ public class PileupElement implements Comparable<PileupElement> {
public static final byte T_FOLLOWED_BY_INSERTION_BASE = (byte) 89;
public static final byte G_FOLLOWED_BY_INSERTION_BASE = (byte) 90;
protected final GATKSAMRecord read;
protected final int offset;
protected final boolean isDeletion;
protected final boolean isBeforeDeletion;
protected final boolean isBeforeInsertion;
protected final boolean isNextToSoftClip;
protected final int eventLength;
protected final String eventBases; // if it is a deletion, we do not have information about the actual deleted bases
// in the read itself, so we fill the string with D's; for insertions we keep actual inserted bases
protected final GATKSAMRecord read; // the read this base belongs to
protected final int offset; // the offset in the bases array for this base
protected final boolean isDeletion; // is this base a deletion
protected final boolean isBeforeDeletion; // is the base to the right of this base an deletion
protected final boolean isAfterDeletion; // is the base to the left of this base a deletion
protected final boolean isBeforeInsertion; // is the base to the right of this base an insertion
protected final boolean isAfterInsertion; // is the base to the left of this base an insertion
protected final boolean isNextToSoftClip; // is this base either before or after a soft clipped base
protected final int eventLength; // what is the length of the event (insertion or deletion) *after* this base
protected final String eventBases; // if it is a deletion, we do not have information about the actual deleted bases in the read itself, so we fill the string with D's; for insertions we keep actual inserted bases
/**
@ -39,7 +41,9 @@ public class PileupElement implements Comparable<PileupElement> {
* @param offset the position in the read for this base. All deletions must be left aligned! (-1 is only allowed for reads starting with insertions)
* @param isDeletion whether or not this base is a deletion
* @param isBeforeDeletion whether or not this base is before a deletion
* @param isAfterDeletion whether or not this base is after a deletion
* @param isBeforeInsertion whether or not this base is before an insertion
* @param isAfterInsertion whether or not this base is after an insertion
* @param isNextToSoftClip whether or not this base is next to a soft clipped base
* @param nextEventBases bases in event in case element comes before insertion or deletion
* @param nextEventLength length of next event in case it's insertion or deletion
@ -48,8 +52,7 @@ public class PileupElement implements Comparable<PileupElement> {
"read != null",
"offset >= -1",
"offset <= read.getReadLength()"})
public PileupElement(final GATKSAMRecord read, final int offset, final boolean isDeletion, final boolean isBeforeDeletion, final boolean isBeforeInsertion, final boolean isNextToSoftClip,
final String nextEventBases, final int nextEventLength) {
public PileupElement(final GATKSAMRecord read, final int offset, final boolean isDeletion, final boolean isBeforeDeletion, final boolean isAfterDeletion, final boolean isBeforeInsertion, final boolean isAfterInsertion, final boolean isNextToSoftClip, final String nextEventBases, final int nextEventLength) {
if (offset < 0 && isDeletion)
throw new ReviewedStingException("Pileup Element cannot create a deletion with a negative offset");
@ -57,20 +60,22 @@ public class PileupElement implements Comparable<PileupElement> {
this.offset = offset;
this.isDeletion = isDeletion;
this.isBeforeDeletion = isBeforeDeletion;
this.isAfterDeletion = isAfterDeletion;
this.isBeforeInsertion = isBeforeInsertion;
this.isAfterInsertion = isAfterInsertion;
this.isNextToSoftClip = isNextToSoftClip;
if (isBeforeInsertion)
eventBases = nextEventBases;
else
eventBases = null; // ignore argument in any other case
eventBases = null; // ignore argument in any other case
if (isBeforeDeletion || isBeforeInsertion)
eventLength = nextEventLength;
else
eventLength = -1;
}
public PileupElement(final GATKSAMRecord read, final int offset, final boolean isDeletion, final boolean isBeforeDeletion, final boolean isBeforeInsertion, final boolean isNextToSoftClip) {
this(read,offset, isDeletion, isBeforeDeletion, isBeforeInsertion, isNextToSoftClip, null, -1);
public PileupElement(final GATKSAMRecord read, final int offset, final boolean isDeletion, final boolean isBeforeDeletion, final boolean isAfterDeletion, final boolean isBeforeInsertion, final boolean isAfterInsertion, final boolean isNextToSoftClip) {
this(read,offset, isDeletion, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, null, -1);
}
public boolean isDeletion() {
return isDeletion;
@ -80,10 +85,18 @@ public class PileupElement implements Comparable<PileupElement> {
return isBeforeDeletion;
}
public boolean isAfterDeletion() {
return isAfterDeletion;
}
public boolean isBeforeInsertion() {
return isBeforeInsertion;
}
public boolean isAfterInsertion() {
return isAfterInsertion;
}
public boolean isNextToSoftClip() {
return isNextToSoftClip;
}
@ -123,14 +136,14 @@ public class PileupElement implements Comparable<PileupElement> {
}
/**
* Returns length of the event (number of inserted or deleted bases
* @return length of the event (number of inserted or deleted bases
*/
public int getEventLength() {
return eventLength;
}
/**
* Returns actual sequence of inserted bases, or a null if the event is a deletion or if there is no event in the associated read.
* @return actual sequence of inserted bases, or a null if the event is a deletion or if there is no event in the associated read.
*/
public String getEventBases() {
return eventBases;
@ -185,13 +198,9 @@ public class PileupElement implements Comparable<PileupElement> {
//
// --------------------------------------------------------------------------
// public boolean isReducedRead() {
// return read.isReducedRead();
// }
/**
* Returns the number of elements in the pileup element.
* <p/>
*
* Unless this is a reduced read, the number of elements in a pileup element is one. In the event of
* this being a reduced read and a deletion, we return the average number of elements between the left
* and right elements to the deletion. We assume the deletion to be left aligned.

View File

@ -96,12 +96,11 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<
}
@Override
protected ExtendedEventPileupElement createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion, boolean isBeforeDeletion, boolean isBeforeInsertion, boolean isNextToSoftClip) {
protected ExtendedEventPileupElement createNewPileupElement(final GATKSAMRecord read, final int offset, final boolean isDeletion, final boolean isBeforeDeletion, final boolean isAfterDeletion, final boolean isBeforeInsertion, final boolean isAfterInsertion, final boolean isNextToSoftClip) {
throw new UnsupportedOperationException("Not enough information provided to create a new pileup element");
}
@Override
protected ExtendedEventPileupElement createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion, boolean isBeforeDeletion, boolean isBeforeInsertion,
boolean isNextToSoftClip,String nextEventBases, int nextEventLength) {
protected ExtendedEventPileupElement createNewPileupElement(final GATKSAMRecord read, final int offset, final boolean isDeletion, final boolean isBeforeDeletion, final boolean isAfterDeletion, final boolean isBeforeInsertion, final boolean isAfterInsertion, final boolean isNextToSoftClip, final String nextEventBases, final int nextEventLength ) {
throw new UnsupportedOperationException("Not enough information provided to create a new pileup element");
}

View File

@ -56,6 +56,9 @@ public class ReadBackedPileupImpl extends AbstractReadBackedPileup<ReadBackedPil
*
* @param loc
* @param pileup
* @param size
* @param nDeletions
* @param nMQ0Reads
*/
public ReadBackedPileupImpl(GenomeLoc loc, List<PileupElement> pileup, int size, int nDeletions, int nMQ0Reads) {
super(loc, pileup, size, nDeletions, nMQ0Reads);
@ -71,13 +74,14 @@ public class ReadBackedPileupImpl extends AbstractReadBackedPileup<ReadBackedPil
}
@Override
protected PileupElement createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion, boolean isBeforeDeletion, boolean isBeforeInsertion,
boolean isNextToSoftClip) {
return new PileupElement(read, offset, isDeletion, isBeforeDeletion, isBeforeInsertion, isNextToSoftClip, null,0);
protected PileupElement createNewPileupElement(final GATKSAMRecord read, final int offset, final boolean isDeletion, final boolean isBeforeDeletion, final boolean isAfterDeletion, final boolean isBeforeInsertion, final boolean isAfterInsertion, final boolean isNextToSoftClip) {
return new PileupElement(read, offset, isDeletion, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, null, 0);
}
protected PileupElement createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion, boolean isBeforeDeletion, boolean isBeforeInsertion,
boolean isNextToSoftClip,String nextEventBases, final int nextEventLength) {
return new PileupElement(read, offset, isDeletion, isBeforeDeletion, isBeforeInsertion, isNextToSoftClip, nextEventBases,nextEventLength);
@Override
protected PileupElement createNewPileupElement(final GATKSAMRecord read, final int offset, final boolean isDeletion, final boolean isBeforeDeletion, final boolean isAfterDeletion, final boolean isBeforeInsertion, final boolean isAfterInsertion, final boolean isNextToSoftClip, final String nextEventBases, final int nextEventLength ) {
return new PileupElement(read, offset, isDeletion, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, nextEventBases, nextEventLength);
}
}

View File

@ -26,10 +26,11 @@
package org.broadinstitute.sting.utils.recalibration;
import org.broadinstitute.sting.gatk.walkers.bqsr.*;
import org.broadinstitute.sting.utils.BitSetUtils;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.collections.NestedHashMap;
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.text.XReadLines;
@ -37,7 +38,8 @@ import org.broadinstitute.sting.utils.text.XReadLines;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Pattern;
@ -50,82 +52,96 @@ import java.util.regex.Pattern;
public class BaseRecalibration {
private RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps
private final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // List of covariates to be used in this calculation
public static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
public static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
private ArrayList<HashMap<BitSet, RecalDatum>> collapsedHashes = new ArrayList<HashMap<BitSet, RecalDatum>> (); // All the collapsed data tables
private final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // List of all covariates to be used in this calculation
private final ArrayList<Covariate> requiredCovariates = new ArrayList<Covariate>(); // List of required covariates to be used in this calculation
private final ArrayList<Covariate> optionalCovariates = new ArrayList<Covariate>(); // List of optional covariates to be used in this calculation
public static final Pattern REQUIRED_COVARIATE_PATTERN = Pattern.compile("^# Required Covariates.*");
public static final Pattern OPTIONAL_COVARIATE_PATTERN = Pattern.compile("^# Optional Covariates.*");
public static final String EOF_MARKER = "EOF";
private static final int MAX_QUALITY_SCORE = 65; //BUGBUG: what value to use here?
private NestedHashMap qualityScoreByFullCovariateKey = new NestedHashMap(); // Caches the result of performSequentialQualityCalculation(...) for all sets of covariate values.
private static final byte SMOOTHING_CONSTANT = 1;
ArrayList<BQSRKeyManager> keyManagers = new ArrayList<BQSRKeyManager>();
public BaseRecalibration(final File RECAL_FILE) {
// Get a list of all available covariates
final List<Class<? extends Covariate>> classes = new PluginManager<Covariate>(Covariate.class).getPlugins();
RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // todo -- initialize with the parameters from the csv file!
int lineNumber = 0;
boolean foundAllCovariates = false;
boolean foundRequiredCovariates = false;
boolean foundOptionalCovariates = false;
boolean initializedKeyManagers = false;
// Read in the data from the csv file and populate the data map and covariates list
boolean sawEOF = false;
try {
for (String line : new XReadLines(RECAL_FILE)) {
lineNumber++;
if (EOF_MARKER.equals(line)) {
sawEOF = true;
}
else if (COMMENT_PATTERN.matcher(line).matches()) {
; // Skip over the comment lines, (which start with '#')
}
// Read in the covariates that were used from the input file
else if (COVARIATE_PATTERN.matcher(line).matches()) { // The line string is either specifying a covariate or is giving csv data
if (foundAllCovariates) {
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE);
}
else { // Found the covariate list in input file, loop through all of them and instantiate them
String[] vals = line.split(",");
for (int iii = 0; iii < vals.length - 4; iii++) { // There are n-4 covariates. The last four items are ErrorModel, nObservations, nMismatch, and Qempirical
boolean foundClass = false;
for (Class<?> covClass : classes) {
if ((vals[iii] + "Covariate").equalsIgnoreCase(covClass.getSimpleName())) {
foundClass = true;
try {
Covariate covariate = (Covariate) covClass.newInstance();
requestedCovariates.add(covariate);
} catch (Exception e) {
throw new DynamicClassResolutionException(covClass, e);
}
sawEOF = EOF_MARKER.equals(line);
if (sawEOF)
break;
boolean requiredCovariatesLine = REQUIRED_COVARIATE_PATTERN.matcher(line).matches();
boolean optionalCovariatesLine = OPTIONAL_COVARIATE_PATTERN.matcher(line).matches();
if (requiredCovariatesLine && foundRequiredCovariates)
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Duplicate required covariates line");
if (optionalCovariatesLine && foundOptionalCovariates)
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Duplicate optional covariates line");
if (optionalCovariatesLine && !foundRequiredCovariates)
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Optional covariates reported before Required covariates");
if (requiredCovariatesLine || optionalCovariatesLine) {
String [] covariateNames = line.split(": ")[1].split(","); // take the second half of the string (past the ":") and split it by "," to get the list of required covariates
List<Covariate> covariateList = requiredCovariatesLine ? requiredCovariates : optionalCovariates; // set the appropriate covariate list to update
for (String covariateName : covariateNames) {
boolean foundClass = false;
for (Class<?> covClass : classes) {
if ((covariateName + "Covariate").equalsIgnoreCase(covClass.getSimpleName())) {
foundClass = true;
try {
Covariate covariate = (Covariate) covClass.newInstance();
covariate.initialize(RAC);
requestedCovariates.add(covariate);
covariateList.add(covariate);
} catch (Exception e) {
throw new DynamicClassResolutionException(covClass, e);
}
}
if (!foundClass) {
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. The requested covariate type (" + (vals[iii] + "Covariate") + ") isn't a valid covariate option.");
}
}
if (!foundClass)
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. The requested covariate type (" + (covariateName + "Covariate") + ") isn't a valid covariate option.");
}
foundRequiredCovariates = foundRequiredCovariates || requiredCovariatesLine;
foundOptionalCovariates = foundOptionalCovariates || optionalCovariatesLine;
}
}
else { // Found a line of data
if (!foundAllCovariates) {
foundAllCovariates = true;
else if (!line.startsWith("#")) { // if this is not a comment line that we don't care about, it is DATA!
if (!foundRequiredCovariates || !foundOptionalCovariates) // At this point all the covariates should have been found and initialized
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Covariate names can't be found in file: " + RECAL_FILE);
// At this point all the covariates should have been found and initialized
if (requestedCovariates.size() < 2) {
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Covariate names can't be found in file: " + RECAL_FILE);
if (!initializedKeyManagers) {
ArrayList<Covariate> emptyList = new ArrayList<Covariate>(0);
ArrayList<Covariate> requiredCovariatesUpToThis = new ArrayList<Covariate>(); // Initialize one key manager for each table of required covariate
for (Covariate covariate : requiredCovariates) { // Every required covariate table includes all preceding required covariates (e.g. RG ; RG,Q )
requiredCovariatesUpToThis.add(covariate);
keyManagers.add(new BQSRKeyManager(requiredCovariatesUpToThis, emptyList));
}
final boolean createCollapsedTables = true;
// Initialize any covariate member variables using the shared argument collection
RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
for (Covariate cov : requestedCovariates) {
cov.initialize(RAC);
}
// Initialize the data hashMaps
dataManager = new RecalDataManager(createCollapsedTables, requestedCovariates.size());
keyManagers.add(new BQSRKeyManager(requiredCovariates, optionalCovariates)); // One master key manager for the collapsed tables
initializedKeyManagers = true;
}
addCSVData(RECAL_FILE, line); // Parse the line and add the data to the HashMap
addCSVData(RECAL_FILE, line); // Parse the line and add the data to the HashMap
}
}
@ -140,67 +156,113 @@ public class BaseRecalibration {
throw new UserException.MalformedFile(RECAL_FILE, errorMessage);
}
if (dataManager == null) {
throw new UserException.MalformedFile(RECAL_FILE, "Can't initialize the data manager. Perhaps the recal csv file contains no data?");
}
dataManager.generateEmpiricalQualities(1, MAX_QUALITY_SCORE);
generateEmpiricalQualities(SMOOTHING_CONSTANT);
}
/**
* For each covariate read in a value and parse it. Associate those values with the data itself (num observation and num mismatches)
*
* @param file The CSV file we read the line from (for exception throwing purposes)
* @param line A line of CSV data read from the recalibration table data file
*/
private void addCSVData(final File file, final String line) {
final String[] vals = line.split(",");
boolean hasOptionalCovariates = optionalCovariates.size() > 0; // Do we have optional covariates in this key?
int addOptionalCovariates = hasOptionalCovariates ? 2 : 0; // If we have optional covariates at all, add two to the size of the array (to acommodate the covariate and the id)
final Object[] key = new Object[requiredCovariates.size() + addOptionalCovariates + 1]; // Reserve enough space for the required covariates, optional covariate, id and eventType
// Check if the data line is malformed, for example if the read group string contains a comma then it won't be parsed correctly
if (vals.length != requestedCovariates.size() + 4) { // +4 because of ErrorModel, nObservations, nMismatch, and Qempirical
throw new UserException.MalformedFile(file, "Malformed input recalibration file. Found data line with too many fields: " + line +
" --Perhaps the read group string contains a comma and isn't being parsed correctly.");
int indexCovariateValue = key.length - 3; // In the order of keys, the optional covariate comes right after the required covariates
int indexCovariateID = key.length - 2; // followed by the covariate ID
int indexEventType = key.length - 1; // and the event type
addKeysToArray(key, vals, requiredCovariates, 0); // Add the required covariates keys
if (hasOptionalCovariates) {
key[indexCovariateID] = Short.parseShort(vals[indexCovariateID]); // Add the optional covariate ID
Covariate covariate = optionalCovariates.get((Short) key[indexCovariateID]); // Get the covariate object for this ID
key[indexCovariateValue] = covariate.getValue(vals[indexCovariateValue]); // Add the optional covariate value, given the ID
}
key[indexEventType] = EventType.eventFrom(vals[indexEventType]); // Add the event type
final Object[] key = new Object[requestedCovariates.size()];
Covariate cov;
int iii;
for (iii = 0; iii < requestedCovariates.size(); iii++) {
cov = requestedCovariates.get(iii);
key[iii] = cov.getValue(vals[iii]);
int datumIndex = key.length; // The recal datum starts at the end of the key (after the event type)
long count = Long.parseLong(vals[datumIndex]); // Number of observations
long errors = Long.parseLong(vals[datumIndex + 1]); // Number of errors observed
double reportedQual = Double.parseDouble(vals[1]); // The reported Q score --> todo -- I don't like having the Q score hard coded in vals[1]. Generalize it!
final RecalDatum datum = new RecalDatum(count, errors, reportedQual, 0.0); // Create a new datum using the number of observations, number of mismatches, and reported quality score
addToAllTables(key, datum); // Add that datum to all the collapsed tables which will be used in the sequential calculation
}
/**
* Add the given mapping to all of the collapsed hash tables
*
* @param key The list of comparables that is the key for this mapping
* @param fullDatum The RecalDatum which is the data for this mapping
*/
private void addToAllTables(final Object[] key, final RecalDatum fullDatum) {
int nHashes = requiredCovariates.size(); // We will always need one hash per required covariate
if (optionalCovariates.size() > 0) // If we do have optional covariates
nHashes += 1; // we will need one extra hash table with the optional covariate encoded in the key set on top of the required covariates
for (int hashIndex = 0; hashIndex < nHashes; hashIndex++) {
HashMap<BitSet, RecalDatum> table; // object to hold the hash table we are going to manipulate
if (hashIndex >= collapsedHashes.size()) { // if we haven't yet created the collapsed hash table for this index, create it now!
table = new HashMap<BitSet, RecalDatum>();
collapsedHashes.add(table); // Because this is the only place where we add tables to the ArrayList, they will always be in the order we want.
}
else
table = collapsedHashes.get(hashIndex); // if the table has been previously created, just assign it to the "table" object for manipulation
int copyTo = hashIndex + 1; // this will copy the covariates up to the index of the one we are including now (1 for RG, 2 for QS,...)
if (copyTo > requiredCovariates.size()) // only in the case where we have optional covariates we need to increase the size of the array
copyTo = requiredCovariates.size() + 2; // if we have optional covarites, add the optional covariate and it's id to the size of the key
Object[] tableKey = new Object[copyTo + 1]; // create a new array that will hold as many keys as hashIndex (1 for RG hash, 2 for QualityScore hash, 3 for covariate hash plus the event type
System.arraycopy(key, 0, tableKey, 0, copyTo); // copy the keys for the corresponding covariates into the tableKey.
tableKey[tableKey.length-1] = key[key.length - 1]; // add the event type. The event type is always the last key, on both key sets.
BitSet hashKey = keyManagers.get(hashIndex).bitSetFromKey(tableKey); // Add bitset key with fullDatum to the appropriate hash
RecalDatum datum = table.get(hashKey);
if (datum == null)
datum = fullDatum;
else if (hashIndex == 0) // Special case for the ReadGroup covariate
datum.combine(fullDatum);
else
datum.increment(fullDatum);
table.put(hashKey, datum);
}
final String modelString = vals[iii++];
final RecalDataManager.BaseRecalibrationType errorModel = CovariateKeySet.errorModelFrom(modelString);
// Create a new datum using the number of observations, number of mismatches, and reported quality score
final RecalDatum datum = new RecalDatum(Long.parseLong(vals[iii]), Long.parseLong(vals[iii + 1]), Double.parseDouble(vals[1]), 0.0);
// Add that datum to all the collapsed tables which will be used in the sequential calculation
dataManager.addToAllTables(key, datum, QualityUtils.MIN_USABLE_Q_SCORE, errorModel); //BUGBUG: used to be Q5 now is Q6, probably doesn't matter
}
public void recalibrateRead(final GATKSAMRecord read) {
/**
* Loop over all the collapsed tables and turn the recalDatums found there into an empirical quality score
* that will be used in the sequential calculation in TableRecalibrationWalker
*
* @param smoothing The smoothing parameter that goes into empirical quality score calculation
*/
private void generateEmpiricalQualities(final int smoothing) {
for (final HashMap<BitSet, RecalDatum> table : collapsedHashes)
for (final RecalDatum datum : table.values())
datum.calcCombinedEmpiricalQuality(smoothing, QualityUtils.MAX_QUAL_SCORE);
}
public void recalibrateRead(final GATKSAMRecord read) {
//compute all covariate values for this read
RecalDataManager.computeCovariates(read, requestedCovariates);
final CovariateKeySet covariateKeySet = RecalDataManager.covariateKeySetFrom(read);
final ReadCovariates readCovariates = RecalDataManager.covariateKeySetFrom(read);
for (final RecalDataManager.BaseRecalibrationType errorModel : RecalDataManager.BaseRecalibrationType.values()) {
for (final EventType errorModel : EventType.values()) {
final byte[] originalQuals = read.getBaseQualities(errorModel);
final byte[] recalQuals = originalQuals.clone();
// For each base in the read
for (int offset = 0; offset < read.getReadLength(); offset++) {
final Object[] fullCovariateKeyWithErrorMode = covariateKeySet.getKeySet(offset, errorModel);
final Object[] fullCovariateKey = Arrays.copyOfRange(fullCovariateKeyWithErrorMode, 0, fullCovariateKeyWithErrorMode.length - 1); // need to strip off the error mode which was appended to the list of covariates
// BUGBUG: This caching seems to put the entire key set into memory which negates the benefits of storing the delta delta tables?
//Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKeyWithErrorMode);
//if( qualityScore == null ) {
final byte qualityScore = performSequentialQualityCalculation(errorModel, fullCovariateKey);
// qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKeyWithErrorMode);
//}
final BitSet[] keySet = readCovariates.getKeySet(offset, errorModel);
final byte qualityScore = performSequentialQualityCalculation(keySet, errorModel);
recalQuals[offset] = qualityScore;
}
@ -209,6 +271,8 @@ public class BaseRecalibration {
}
}
/**
* Implements a serial recalibration of the reads using the combinational table.
* First, we perform a positional recalibration, and then a subsequent dinuc correction.
@ -221,20 +285,26 @@ public class BaseRecalibration {
* - The final shift equation is:
*
* Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... )
*
* todo -- I extremely dislike the way all this math is hardcoded... should rethink the data structures for this method in particular.
*
* @param key The list of Comparables that were calculated from the covariates
* @param errorModel the event type
* @return A recalibrated quality score as a byte
*/
private byte performSequentialQualityCalculation(final RecalDataManager.BaseRecalibrationType errorModel, final Object... key) {
final byte qualFromRead = (byte) Integer.parseInt(key[1].toString());
final Object[] readGroupCollapsedKey = new Object[1];
final Object[] qualityScoreCollapsedKey = new Object[2];
final Object[] covariateCollapsedKey = new Object[3];
private byte performSequentialQualityCalculation(BitSet[] key, EventType errorModel) {
final byte qualFromRead = (byte) BitSetUtils.shortFrom(key[1]);
final int readGroupKeyIndex = 0;
final int qualKeyIndex = 1;
final int covariatesKeyIndex = 2;
// The global quality shift (over the read group only)
readGroupCollapsedKey[0] = key[0];
final RecalDatum globalRecalDatum = ((RecalDatum) dataManager.getCollapsedTable(0, errorModel).get(readGroupCollapsedKey));
List<BitSet> bitKeys = keyManagers.get(readGroupKeyIndex).bitSetsFromAllKeys(key, errorModel);
if (bitKeys.size() > 1)
throw new ReviewedStingException("There should only be one key for the RG collapsed table, something went wrong here");
final RecalDatum globalRecalDatum = collapsedHashes.get(readGroupKeyIndex).get(bitKeys.get(0));
double globalDeltaQ = 0.0;
if (globalRecalDatum != null) {
final double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality();
@ -243,9 +313,11 @@ public class BaseRecalibration {
}
// The shift in quality between reported and empirical
qualityScoreCollapsedKey[0] = key[0];
qualityScoreCollapsedKey[1] = key[1];
final RecalDatum qReportedRecalDatum = ((RecalDatum) dataManager.getCollapsedTable(1, errorModel).get(qualityScoreCollapsedKey));
bitKeys = keyManagers.get(qualKeyIndex).bitSetsFromAllKeys(key, errorModel);
if (bitKeys.size() > 1)
throw new ReviewedStingException("There should only be one key for the Qual collapsed table, something went wrong here");
final RecalDatum qReportedRecalDatum = collapsedHashes.get(qualKeyIndex).get(bitKeys.get(0));
double deltaQReported = 0.0;
if (qReportedRecalDatum != null) {
final double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality();
@ -253,13 +325,11 @@ public class BaseRecalibration {
}
// The shift in quality due to each covariate by itself in turn
bitKeys = keyManagers.get(covariatesKeyIndex).bitSetsFromAllKeys(key, errorModel);
double deltaQCovariates = 0.0;
double deltaQCovariateEmpirical;
covariateCollapsedKey[0] = key[0];
covariateCollapsedKey[1] = key[1];
for (int iii = 2; iii < key.length; iii++) {
covariateCollapsedKey[2] = key[iii]; // The given covariate
final RecalDatum covariateRecalDatum = ((RecalDatum) dataManager.getCollapsedTable(iii, errorModel).get(covariateCollapsedKey));
for (BitSet k : bitKeys) {
final RecalDatum covariateRecalDatum = collapsedHashes.get(covariatesKeyIndex).get(k);
if (covariateRecalDatum != null) {
deltaQCovariateEmpirical = covariateRecalDatum.getEmpiricalQuality();
deltaQCovariates += (deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported));
@ -267,7 +337,7 @@ public class BaseRecalibration {
}
final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
return QualityUtils.boundQual((int) Math.round(newQuality), (byte) MAX_QUALITY_SCORE);
return QualityUtils.boundQual((int) Math.round(newQuality), QualityUtils.MAX_QUAL_SCORE);
}
/**
@ -283,4 +353,19 @@ public class BaseRecalibration {
}
}
}
/**
* Shared functionality to add keys
*
* @param array the target array we are creating the keys in
* @param keys the actual keys we're using as a source
* @param covariateList the covariate list to loop through
* @param keyIndex the index in the keys and the arrays objects to run from
*/
private void addKeysToArray(final Object[] array, final String[] keys, List<Covariate> covariateList, int keyIndex) {
for (Covariate covariate : covariateList) {
array[keyIndex] = covariate.getValue(keys[keyIndex]);
keyIndex++;
}
}
}

View File

@ -361,10 +361,10 @@ public class ArtificialSAMUtils {
final GATKSAMRecord left = pair.get(0);
final GATKSAMRecord right = pair.get(1);
pileupElements.add(new PileupElement(left, pos - leftStart, false, false, false, false));
pileupElements.add(new PileupElement(left, pos - leftStart, false, false, false, false, false, false));
if (pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd()) {
pileupElements.add(new PileupElement(right, pos - rightStart, false, false, false, false));
pileupElements.add(new PileupElement(right, pos - rightStart, false, false, false, false, false, false));
}
}

View File

@ -25,7 +25,7 @@
package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.*;
import org.broadinstitute.sting.gatk.walkers.bqsr.RecalDataManager;
import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
import org.broadinstitute.sting.utils.NGSPlatform;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -165,7 +165,7 @@ public class GATKSAMRecord extends BAMRecord {
/**
* Setters and Accessors for base insertion and base deletion quality scores
*/
public void setBaseQualities( final byte[] quals, final RecalDataManager.BaseRecalibrationType errorModel ) {
public void setBaseQualities( final byte[] quals, final EventType errorModel ) {
switch( errorModel ) {
case BASE_SUBSTITUTION:
setBaseQualities(quals);
@ -181,7 +181,7 @@ public class GATKSAMRecord extends BAMRecord {
}
}
public byte[] getBaseQualities( final RecalDataManager.BaseRecalibrationType errorModel ) {
public byte[] getBaseQualities( final EventType errorModel ) {
switch( errorModel ) {
case BASE_SUBSTITUTION:
return getBaseQualities();
@ -204,7 +204,7 @@ public class GATKSAMRecord extends BAMRecord {
quals = new byte[getBaseQualities().length];
Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
// be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
setBaseQualities(quals, RecalDataManager.BaseRecalibrationType.BASE_INSERTION);
setBaseQualities(quals, EventType.BASE_INSERTION);
}
return quals;
}
@ -215,7 +215,7 @@ public class GATKSAMRecord extends BAMRecord {
quals = new byte[getBaseQualities().length];
Arrays.fill(quals, (byte) 45); // Some day in the future when base insertion and base deletion quals exist the samtools API will
// be updated and the original quals will be pulled here, but for now we assume the original quality is a flat Q45
setBaseQualities(quals, RecalDataManager.BaseRecalibrationType.BASE_DELETION);
setBaseQualities(quals, EventType.BASE_DELETION);
}
return quals;
}

View File

@ -1,6 +1,8 @@
package org.broadinstitute.sting.gatk.walkers.bqsr;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.clipping.ClippingRepresentation;
import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -33,24 +35,22 @@ public class ContextCovariateUnitTest {
public void testSimpleContexts() {
byte[] quals = ReadUtils.createRandomReadQuals(10000);
byte[] bbases = ReadUtils.createRandomReadBases(10000, true);
String bases = stringFrom(bbases);
// System.out.println("Read: " + bases);
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bbases, quals, bbases.length + "M");
GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, RAC.LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS);
CovariateValues values = covariate.getValues(read);
verifyCovariateArray(values.getMismatches(), RAC.MISMATCHES_CONTEXT_SIZE, bases);
verifyCovariateArray(values.getInsertions(), RAC.INSERTIONS_CONTEXT_SIZE, bases);
verifyCovariateArray(values.getDeletions(), RAC.DELETIONS_CONTEXT_SIZE, bases);
verifyCovariateArray(values.getMismatches(), RAC.MISMATCHES_CONTEXT_SIZE, stringFrom(clippedRead.getReadBases()));
verifyCovariateArray(values.getInsertions(), RAC.INSERTIONS_CONTEXT_SIZE, stringFrom(clippedRead.getReadBases()));
verifyCovariateArray(values.getDeletions(), RAC.DELETIONS_CONTEXT_SIZE, stringFrom(clippedRead.getReadBases()));
}
private void verifyCovariateArray(BitSet[] values, int contextSize, String bases) {
for (int i = 0; i < values.length; i++) {
String expectedContext = covariate.NO_CONTEXT_VALUE;
String expectedContext = null;
if (i >= contextSize) {
String context = bases.substring(i - contextSize, i);
if (!context.contains("N"))
expectedContext = context;
}
// System.out.println(String.format("Context [%d]:\n%s\n%s\n", i, covariate.keyFromBitSet(values[i]), expectedContext));
Assert.assertEquals(covariate.keyFromBitSet(values[i]), expectedContext);
}
}

View File

@ -45,15 +45,6 @@ public class CycleCovariateUnitTest {
values = covariate.getValues(read);
verifyCovariateArray(values.getMismatches(), readLength, (short) -1);
read.setReadPairedFlag(true);
read.setSecondOfPairFlag(true);
values = covariate.getValues(read);
verifyCovariateArray(values.getMismatches(), (short) -readLength, (short) 1);
read.setReadNegativeStrandFlag(false);
values = covariate.getValues(read);
verifyCovariateArray(values.getMismatches(), (short) -1, (short) -1);
}
private void verifyCovariateArray(BitSet[] values, short init, short increment) {

View File

@ -0,0 +1,21 @@
package org.broadinstitute.sting.utils.recalibration;
import org.testng.annotations.Test;
import java.io.File;
/**
* Unit tests for on-the-fly recalibration.
*
* @author Mauricio Carneiro
* @since 3/16/12
*/
public class BaseRecalibrationUnitTest {
@Test(enabled=true)
public void testReadingCSV() {
File csv = new File("public/testdata/exampleCSV.csv");
BaseRecalibration baseRecalibration = new BaseRecalibration(csv);
System.out.println("Success");
}
}

View File

@ -42,8 +42,8 @@ public class GATKSAMRecordUnitTest extends BaseTest {
@Test
public void testReducedReadPileupElement() {
PileupElement readp = new PileupElement(read, 0, false, false, false, false);
PileupElement reducedreadp = new PileupElement(reducedRead, 0, false, false, false, false);
PileupElement readp = new PileupElement(read, 0, false, false, false, false, false, false);
PileupElement reducedreadp = new PileupElement(reducedRead, 0, false, false, false, false, false, false);
Assert.assertFalse(readp.getRead().isReducedRead());