Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
e4d371dc80
|
|
@ -25,8 +25,11 @@ import java.util.*;
|
|||
* @since 3/6/12
|
||||
*/
|
||||
public class BQSRKeyManager {
|
||||
private final List<RequiredCovariateInfo> requiredCovariates;
|
||||
private final List<OptionalCovariateInfo> optionalCovariates;
|
||||
|
||||
private final List<Covariate> requiredCovariates;
|
||||
private final List<Covariate> optionalCovariates;
|
||||
private final List<RequiredCovariateInfo> requiredCovariatesInfo;
|
||||
private final List<OptionalCovariateInfo> optionalCovariatesInfo;
|
||||
private final Map<String, Short> covariateNameToIDMap;
|
||||
|
||||
private int nRequiredBits; // Number of bits used to represent the required covariates
|
||||
|
|
@ -44,15 +47,17 @@ public class BQSRKeyManager {
|
|||
* @param optionalCovariates the ordered list of optional covariates
|
||||
*/
|
||||
public BQSRKeyManager(List<Covariate> requiredCovariates, List<Covariate> optionalCovariates) {
|
||||
this.requiredCovariates = new ArrayList<RequiredCovariateInfo>(requiredCovariates.size()); // initialize the required covariates list
|
||||
this.optionalCovariates = new ArrayList<OptionalCovariateInfo>(optionalCovariates.size()); // initialize the optional covariates list (size may be 0, it's okay)
|
||||
this.covariateNameToIDMap = new HashMap<String, Short>(optionalCovariates.size()*2); // the map from covariate name to covariate id (when reading GATK Reports, we get the IDs as names of covariates)
|
||||
this.requiredCovariates = new ArrayList<Covariate>(requiredCovariates);
|
||||
this.optionalCovariates = new ArrayList<Covariate>(optionalCovariates);
|
||||
requiredCovariatesInfo = new ArrayList<RequiredCovariateInfo>(requiredCovariates.size()); // initialize the required covariates list
|
||||
optionalCovariatesInfo = new ArrayList<OptionalCovariateInfo>(optionalCovariates.size()); // initialize the optional covariates list (size may be 0, it's okay)
|
||||
covariateNameToIDMap = new HashMap<String, Short>(optionalCovariates.size()*2); // the map from covariate name to covariate id (when reading GATK Reports, we get the IDs as names of covariates)
|
||||
|
||||
nRequiredBits = 0;
|
||||
for (Covariate required : requiredCovariates) { // create a list of required covariates with the extra information for key management
|
||||
int nBits = required.numberOfBits(); // number of bits used by this covariate
|
||||
BitSet mask = genericMask(nRequiredBits, nBits); // create a mask for this covariate
|
||||
this.requiredCovariates.add(new RequiredCovariateInfo(nRequiredBits, mask, required)); // Create an object for this required covariate
|
||||
requiredCovariatesInfo.add(new RequiredCovariateInfo(nRequiredBits, mask, required)); // Create an object for this required covariate
|
||||
nRequiredBits += nBits;
|
||||
}
|
||||
|
||||
|
|
@ -61,10 +66,10 @@ public class BQSRKeyManager {
|
|||
for (Covariate optional : optionalCovariates) {
|
||||
int nBits = optional.numberOfBits(); // number of bits used by this covariate
|
||||
nOptionalBits = Math.max(nOptionalBits, nBits); // optional covariates are represented by the number of bits needed by biggest covariate
|
||||
BitSet optionalID = BitSetUtils.bitSetFrom(id); // calculate the optional covariate ID for this covariate
|
||||
this.optionalCovariates.add(new OptionalCovariateInfo(optionalID, optional)); // optional covariates have standardized mask and number of bits, so no need to store in the RequiredCovariateInfo object
|
||||
BitSet optionalID = bitSetFromId(id); // calculate the optional covariate ID for this covariate
|
||||
optionalCovariatesInfo.add(new OptionalCovariateInfo(optionalID, optional)); // optional covariates have standardized mask and number of bits, so no need to store in the RequiredCovariateInfo object
|
||||
String covariateName = optional.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport
|
||||
this.covariateNameToIDMap.put(covariateName, id);
|
||||
covariateNameToIDMap.put(covariateName, id);
|
||||
id++;
|
||||
}
|
||||
|
||||
|
|
@ -93,17 +98,17 @@ public class BQSRKeyManager {
|
|||
* @return one key in bitset representation per covariate
|
||||
*/
|
||||
public List<BitSet> bitSetsFromAllKeys(BitSet[] allKeys, EventType eventType) {
|
||||
List<BitSet> allBitSets = new LinkedList<BitSet>(); // Generate one key per optional covariate
|
||||
List<BitSet> allBitSets = new ArrayList<BitSet>(); // Generate one key per optional covariate
|
||||
|
||||
BitSet eventBitSet = BitSetUtils.bitSetFrom(eventType.index); // create a bitset with the event type
|
||||
BitSet eventBitSet = bitSetFromEvent(eventType); // create a bitset with the event type
|
||||
int eventTypeBitIndex = nRequiredBits + nOptionalBits + nOptionalIDBits; // Location in the bit set to add the event type bits
|
||||
|
||||
int covariateIndex = 0;
|
||||
BitSet requiredKey = new BitSet(nRequiredBits); // This will be a bitset holding all the required keys, to replicate later on
|
||||
for (RequiredCovariateInfo infoRequired : requiredCovariates)
|
||||
for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo)
|
||||
addBitSetToKeyAtLocation(requiredKey, allKeys[covariateIndex++], infoRequired.bitsBefore); // Add all the required covariates to the key set
|
||||
|
||||
for (OptionalCovariateInfo infoOptional : optionalCovariates) {
|
||||
for (OptionalCovariateInfo infoOptional : optionalCovariatesInfo) {
|
||||
BitSet covariateKey = allKeys[covariateIndex++]; // get the bitset from all keys
|
||||
if (covariateKey == null)
|
||||
continue; // do not add nulls to the final set of keys.
|
||||
|
|
@ -116,7 +121,7 @@ public class BQSRKeyManager {
|
|||
allBitSets.add(optionalKey); // add this key to the list of keys
|
||||
}
|
||||
|
||||
if (optionalCovariates.size() == 0) { // special case when we have no optional covariates, add the event type to the required key (our only key)
|
||||
if (optionalCovariatesInfo.size() == 0) { // special case when we have no optional covariates, add the event type to the required key (our only key)
|
||||
addBitSetToKeyAtLocation(requiredKey, eventBitSet, eventTypeBitIndex); // Add the event type
|
||||
allBitSets.add(requiredKey); // add this key to the list of keys
|
||||
}
|
||||
|
|
@ -140,16 +145,16 @@ public class BQSRKeyManager {
|
|||
BitSet bitSetKey = new BitSet(totalNumberOfBits);
|
||||
|
||||
int requiredCovariate = 0;
|
||||
for (RequiredCovariateInfo infoRequired : requiredCovariates) {
|
||||
for (RequiredCovariateInfo infoRequired : requiredCovariatesInfo) {
|
||||
BitSet covariateBitSet = infoRequired.covariate.bitSetFromKey(key[requiredCovariate++]); // create a bitset from the object key provided using the required covariate's interface
|
||||
addBitSetToKeyAtLocation(bitSetKey, covariateBitSet, infoRequired.bitsBefore); // add it to the bitset key
|
||||
}
|
||||
|
||||
if (optionalCovariates.size() > 0) {
|
||||
int optionalCovariate = requiredCovariates.size(); // the optional covariate index in the key array
|
||||
if (optionalCovariatesInfo.size() > 0) {
|
||||
int optionalCovariate = requiredCovariatesInfo.size(); // the optional covariate index in the key array
|
||||
int covariateIDIndex = optionalCovariate + 1; // the optional covariate ID index is right after the optional covariate's
|
||||
int covariateID = parseCovariateID(key[covariateIDIndex]); // when reading the GATK Report the ID may come in a String instead of an index
|
||||
OptionalCovariateInfo infoOptional = optionalCovariates.get(covariateID); // so we can get the optional covariate information
|
||||
OptionalCovariateInfo infoOptional = optionalCovariatesInfo.get(covariateID); // so we can get the optional covariate information
|
||||
|
||||
BitSet covariateBitSet = infoOptional.covariate.bitSetFromKey(key[optionalCovariate]); // convert the optional covariate key into a bitset using the covariate's interface
|
||||
addBitSetToKeyAtLocation(bitSetKey, covariateBitSet, nRequiredBits); // add the optional covariate right after the required covariates
|
||||
|
|
@ -185,16 +190,16 @@ public class BQSRKeyManager {
|
|||
*/
|
||||
public List<Object> keySetFrom(BitSet key) {
|
||||
List<Object> objectKeys = new ArrayList<Object>();
|
||||
for (RequiredCovariateInfo info : requiredCovariates) {
|
||||
for (RequiredCovariateInfo info : requiredCovariatesInfo) {
|
||||
BitSet covariateBitSet = extractBitSetFromKey(key, info.mask, info.bitsBefore); // get the covariate's bitset
|
||||
objectKeys.add(info.covariate.keyFromBitSet(covariateBitSet)); // convert the bitset to object using covariate's interface
|
||||
}
|
||||
|
||||
if (optionalCovariates.size() > 0) {
|
||||
if (optionalCovariatesInfo.size() > 0) {
|
||||
BitSet covBitSet = extractBitSetFromKey(key, optionalCovariateMask, nRequiredBits); // mask out the covariate bit set
|
||||
BitSet idbs = extractBitSetFromKey(key, optionalCovariateIDMask, nRequiredBits + nOptionalBits); // mask out the covariate order (to identify which covariate this is)
|
||||
short id = BitSetUtils.shortFrom(idbs); // covert the id bitset into a short
|
||||
Covariate covariate = optionalCovariates.get(id).covariate; // get the corresponding optional covariate object
|
||||
Covariate covariate = optionalCovariatesInfo.get(id).covariate; // get the corresponding optional covariate object
|
||||
objectKeys.add(covariate.keyFromBitSet(covBitSet)); // add the optional covariate to the key set
|
||||
objectKeys.add(covariate.getClass().getSimpleName().split("Covariate")[0]); // add the covariate name using the id
|
||||
}
|
||||
|
|
@ -204,17 +209,11 @@ public class BQSRKeyManager {
|
|||
}
|
||||
|
||||
public List<Covariate> getRequiredCovariates() {
|
||||
ArrayList<Covariate> list = new ArrayList<Covariate>(requiredCovariates.size());
|
||||
for (RequiredCovariateInfo info : requiredCovariates)
|
||||
list.add(info.covariate);
|
||||
return list;
|
||||
return requiredCovariates;
|
||||
}
|
||||
|
||||
public List<Covariate> getOptionalCovariates() {
|
||||
ArrayList<Covariate> list = new ArrayList<Covariate>(optionalCovariates.size());
|
||||
for (OptionalCovariateInfo info : optionalCovariates)
|
||||
list.add(info.covariate);
|
||||
return list;
|
||||
return optionalCovariates;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -258,9 +257,20 @@ public class BQSRKeyManager {
|
|||
eventKey.set(i - firstBitIndex);
|
||||
return EventType.eventFrom(BitSetUtils.shortFrom(eventKey));
|
||||
}
|
||||
|
||||
private BitSet bitSetFromEvent(EventType eventType) {
|
||||
return BitSetUtils.bitSetFrom(eventType.index);
|
||||
|
||||
// cache the BitSet representing an event since it's otherwise created a massive amount of times
|
||||
private static final Map<EventType, BitSet> eventTypeCache = new HashMap<EventType, BitSet>(EventType.values().length);
|
||||
static {
|
||||
for (final EventType eventType : EventType.values())
|
||||
eventTypeCache.put(eventType, BitSetUtils.bitSetFrom(eventType.index));
|
||||
}
|
||||
|
||||
private BitSet bitSetFromEvent(final EventType eventType) {
|
||||
return eventTypeCache.get(eventType);
|
||||
}
|
||||
|
||||
private BitSet bitSetFromId(final short id) {
|
||||
return BitSetUtils.bitSetFrom(id);
|
||||
}
|
||||
|
||||
private int bitsInEventType() {
|
||||
|
|
@ -287,24 +297,24 @@ public class BQSRKeyManager {
|
|||
if (this == other)
|
||||
return true;
|
||||
|
||||
if (requiredCovariates.size() != other.requiredCovariates.size() || optionalCovariates.size() != other.optionalCovariates.size())
|
||||
if (requiredCovariatesInfo.size() != other.requiredCovariatesInfo.size() ||
|
||||
optionalCovariatesInfo.size() != other.optionalCovariatesInfo.size())
|
||||
return false;
|
||||
|
||||
Iterator<RequiredCovariateInfo> otherRequiredIterator = other.requiredCovariates.iterator();
|
||||
for (RequiredCovariateInfo thisInfo: requiredCovariates) {
|
||||
RequiredCovariateInfo otherInfo = otherRequiredIterator.next();
|
||||
|
||||
String thisName = thisInfo.covariate.getClass().getSimpleName();
|
||||
String otherName = otherInfo.covariate.getClass().getSimpleName();
|
||||
for (int i = 0; i < requiredCovariates.size(); i++) {
|
||||
Covariate myRequiredCovariate = requiredCovariates.get(i);
|
||||
Covariate otherRequiredCovariate = other.requiredCovariates.get(i);
|
||||
String thisName = myRequiredCovariate.getClass().getSimpleName();
|
||||
String otherName = otherRequiredCovariate.getClass().getSimpleName();
|
||||
if (!thisName.equals(otherName))
|
||||
return false;
|
||||
}
|
||||
|
||||
Iterator<OptionalCovariateInfo> otherOptionalIterator = other.optionalCovariates.iterator();
|
||||
for (OptionalCovariateInfo thisInfo : optionalCovariates) {
|
||||
OptionalCovariateInfo otherInfo = otherOptionalIterator.next();
|
||||
String thisName = thisInfo.covariate.getClass().getSimpleName();
|
||||
String otherName = otherInfo.covariate.getClass().getSimpleName();
|
||||
for (int i = 0; i < optionalCovariates.size(); i++) {
|
||||
Covariate myOptionalCovariate = optionalCovariates.get(i);
|
||||
Covariate otherOptionalCovariate = other.optionalCovariates.get(i);
|
||||
String thisName = myOptionalCovariate.getClass().getSimpleName();
|
||||
String otherName = otherOptionalCovariate.getClass().getSimpleName();
|
||||
if (!thisName.equals(otherName))
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -126,8 +126,8 @@ public class ContextCovariate implements StandardCovariate {
|
|||
private BitSet contextWith(byte[] bases, int offset, int contextSize) {
|
||||
BitSet result = null;
|
||||
if (offset - contextSize + 1 >= 0) {
|
||||
String context = new String(Arrays.copyOfRange(bases, offset - contextSize + 1, offset + 1));
|
||||
if (!context.contains("N"))
|
||||
final byte[] context = Arrays.copyOfRange(bases, offset - contextSize + 1, offset + 1);
|
||||
if (!BaseUtils.containsBase(context, BaseUtils.N))
|
||||
result = BitSetUtils.bitSetFrom(context);
|
||||
}
|
||||
return result;
|
||||
|
|
|
|||
|
|
@ -139,7 +139,6 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
// Outputs
|
||||
/////////////////////////////
|
||||
@Output(fullName="recal_file", shortName="recalFile", doc="The output recal file used by ApplyRecalibration", required=true)
|
||||
protected File recalFile = null;
|
||||
protected VariantContextWriter recalWriter = null;
|
||||
|
||||
@Output(fullName="tranches_file", shortName="tranchesFile", doc="The output tranches file used by ApplyRecalibration", required=true)
|
||||
|
|
@ -230,9 +229,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
throw new UserException.CommandLineException( "No truth set found! Please provide sets of known polymorphic loci marked with the truth=true ROD binding tag. For example, -B:hapmap,VCF,known=false,training=true,truth=true,prior=12.0 hapmapFile.vcf" );
|
||||
}
|
||||
|
||||
final VCFHeader vcfHeader = new VCFHeader();
|
||||
recalWriter = VariantContextWriterFactory.create(recalFile, getMasterSequenceDictionary());
|
||||
recalWriter.writeHeader(vcfHeader);
|
||||
recalWriter.writeHeader( new VCFHeader() );
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -101,6 +101,17 @@ public class BaseUtils {
|
|||
return extendedBaseToBaseIndex(base1) == extendedBaseToBaseIndex(base2);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true iff the bases array contains at least one instance of base
|
||||
*/
|
||||
static public boolean containsBase(final byte[] bases, final byte base) {
|
||||
for ( final byte b : bases ) {
|
||||
if ( b == base )
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a IUPAC nucleotide code to a pair of bases
|
||||
*
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.ObjectOutputStream;
|
||||
import java.util.BitSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Utilities for bitset conversion
|
||||
|
|
@ -71,8 +73,15 @@ public class BitSetUtils {
|
|||
* @return a bitset representation of the short
|
||||
*/
|
||||
public static BitSet bitSetFrom(short number) {
|
||||
return bitSetFrom(number, NBITS_SHORT_REPRESENTATION);
|
||||
BitSet result = shortCache.get(number);
|
||||
if (result == null) {
|
||||
result = bitSetFrom(number, NBITS_SHORT_REPRESENTATION);
|
||||
shortCache.put(number, result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
// use a static cache for shorts (but not for longs, because there could be a lot of entries)
|
||||
private static final Map<Short, BitSet> shortCache = new HashMap<Short, BitSet>(2 * Short.MAX_VALUE);
|
||||
|
||||
/**
|
||||
* Creates a BitSet representation of an arbitrary integer (number of bits capped at 64 -- long precision)
|
||||
|
|
@ -82,7 +91,7 @@ public class BitSetUtils {
|
|||
* @return a bitset representation of the integer
|
||||
*/
|
||||
public static BitSet bitSetFrom(long number, int nBits) {
|
||||
BitSet bitSet = new BitSet();
|
||||
BitSet bitSet = new BitSet(nBits);
|
||||
boolean isNegative = number < 0;
|
||||
int bitIndex = 0;
|
||||
while (number != 0) {
|
||||
|
|
@ -130,32 +139,32 @@ public class BitSetUtils {
|
|||
if (number < 0)
|
||||
throw new ReviewedStingException("dna conversion cannot handle negative numbers. Possible overflow?");
|
||||
|
||||
int length = contextLengthFor(number); // the length of the context (the number of combinations is memoized, so costs zero to separate this into two method calls)
|
||||
number -= combinationsFor(length - 1); // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation
|
||||
final int length = contextLengthFor(number); // the length of the context (the number of combinations is memoized, so costs zero to separate this into two method calls)
|
||||
number -= combinationsFor(length - 1); // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation
|
||||
|
||||
String dna = "";
|
||||
StringBuilder dna = new StringBuilder();
|
||||
while (number > 0) { // perform a simple base_10 to base_4 conversion (quasi-canonical)
|
||||
byte base = (byte) (number % 4);
|
||||
switch (base) {
|
||||
case 0:
|
||||
dna = "A" + dna;
|
||||
dna.append('A');
|
||||
break;
|
||||
case 1:
|
||||
dna = "C" + dna;
|
||||
dna.append('C');
|
||||
break;
|
||||
case 2:
|
||||
dna = "G" + dna;
|
||||
dna.append('G');
|
||||
break;
|
||||
case 3:
|
||||
dna = "T" + dna;
|
||||
dna.append('T');
|
||||
break;
|
||||
}
|
||||
number /= 4;
|
||||
}
|
||||
for (int j = dna.length(); j < length; j++)
|
||||
dna = "A" + dna; // add leading A's as necessary (due to the "quasi" canonical status, see description above)
|
||||
dna.append('A'); // add leading A's as necessary (due to the "quasi" canonical status, see description above)
|
||||
|
||||
return dna;
|
||||
return dna.reverse().toString(); // make sure to reverse the string since we should have been pre-pending all along
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -178,27 +187,18 @@ public class BitSetUtils {
|
|||
* @return the bitset representing the dna sequence
|
||||
*/
|
||||
public static BitSet bitSetFrom(String dna) {
|
||||
if (dna.length() > MAX_DNA_CONTEXT)
|
||||
throw new ReviewedStingException(String.format("DNA Length cannot be bigger than %d. dna: %s (%d)", MAX_DNA_CONTEXT, dna, dna.length()));
|
||||
return bitSetFrom(dna.getBytes());
|
||||
}
|
||||
|
||||
long baseTen = 0; // the number in base_10 that we are going to use to generate the bit set
|
||||
long preContext = combinationsFor(dna.length() - 1); // the sum of all combinations that preceded the length of the dna string
|
||||
for (int i = 0; i < dna.length(); i++) {
|
||||
public static BitSet bitSetFrom(final byte[] dna) {
|
||||
if (dna.length > MAX_DNA_CONTEXT)
|
||||
throw new ReviewedStingException(String.format("DNA Length cannot be bigger than %d. dna: %s (%d)", MAX_DNA_CONTEXT, dna, dna.length));
|
||||
|
||||
final long preContext = combinationsFor(dna.length - 1); // the sum of all combinations that preceded the length of the dna string
|
||||
long baseTen = 0; // the number in base_10 that we are going to use to generate the bit set
|
||||
for (final byte base : dna) {
|
||||
baseTen *= 4;
|
||||
switch (dna.charAt(i)) {
|
||||
case 'A':
|
||||
baseTen += 0;
|
||||
break;
|
||||
case 'C':
|
||||
baseTen += 1;
|
||||
break;
|
||||
case 'G':
|
||||
baseTen += 2;
|
||||
break;
|
||||
case 'T':
|
||||
baseTen += 3;
|
||||
break;
|
||||
}
|
||||
baseTen += BaseUtils.simpleBaseToBaseIndex(base);
|
||||
}
|
||||
return bitSetFrom(baseTen + preContext); // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length.
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils.clipping;
|
|||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
|
@ -135,10 +136,9 @@ public class ReadClipper {
|
|||
ops.clear();
|
||||
if ( clippedRead.isEmpty() )
|
||||
return GATKSAMRecord.emptyRead(clippedRead);
|
||||
// return new GATKSAMRecord( clippedRead.getHeader() );
|
||||
return clippedRead;
|
||||
} catch (CloneNotSupportedException e) {
|
||||
throw new RuntimeException(e); // this should never happen
|
||||
throw new RuntimeException(e); // this should never happen
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -188,7 +188,6 @@ public class ReadClipper {
|
|||
private GATKSAMRecord hardClipByReadCoordinates(int start, int stop) {
|
||||
if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1))
|
||||
return GATKSAMRecord.emptyRead(read);
|
||||
// return new GATKSAMRecord(read.getHeader());
|
||||
|
||||
this.addOp(new ClippingOp(start, stop));
|
||||
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
|
||||
|
|
@ -213,14 +212,12 @@ public class ReadClipper {
|
|||
private GATKSAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) {
|
||||
if (read.isEmpty() || left == right)
|
||||
return GATKSAMRecord.emptyRead(read);
|
||||
// return new GATKSAMRecord(read.getHeader());
|
||||
GATKSAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1);
|
||||
|
||||
// after clipping one tail, it is possible that the consequent hard clipping of adjacent deletions
|
||||
// make the left cut index no longer part of the read. In that case, clip the read entirely.
|
||||
if (left > leftTailRead.getAlignmentEnd())
|
||||
return GATKSAMRecord.emptyRead(read);
|
||||
// return new GATKSAMRecord(read.getHeader());
|
||||
|
||||
ReadClipper clipper = new ReadClipper(leftTailRead);
|
||||
return clipper.hardClipByReferenceCoordinatesLeftTail(left);
|
||||
|
|
@ -402,17 +399,77 @@ public class ReadClipper {
|
|||
|
||||
/**
|
||||
* Turns soft clipped bases into matches
|
||||
*
|
||||
* @return a new read with every soft clip turned into a match
|
||||
*/
|
||||
private GATKSAMRecord revertSoftClippedBases() {
|
||||
this.addOp(new ClippingOp(0, 0)); // UNSOFTCLIP_BASES doesn't need coordinates
|
||||
if (read.isEmpty())
|
||||
return read;
|
||||
|
||||
this.addOp(new ClippingOp(0, 0));
|
||||
return this.clipRead(ClippingRepresentation.REVERT_SOFTCLIPPED_BASES);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reverts ALL soft-clipped bases
|
||||
*
|
||||
* @param read the read
|
||||
* @return the read with all soft-clipped bases turned into matches
|
||||
*/
|
||||
public static GATKSAMRecord revertSoftClippedBases(GATKSAMRecord read) {
|
||||
return (new ReadClipper(read)).revertSoftClippedBases();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reverts only soft clipped bases with quality score greater than or equal to minQual
|
||||
*
|
||||
* Note: Will write a temporary field with the number of soft clips that were undone on each side (left: 'SL', right: 'SR')
|
||||
*
|
||||
* @param read the read
|
||||
* @param minQual the mininum base quality score to revert the base (inclusive)
|
||||
* @return the read with high quality soft clips reverted
|
||||
*/
|
||||
public static GATKSAMRecord revertSoftClippedBases(GATKSAMRecord read, byte minQual) {
|
||||
int nLeadingSoftClips = read.getAlignmentStart() - read.getSoftStart();
|
||||
if (read.isEmpty() || nLeadingSoftClips > read.getReadLength())
|
||||
return GATKSAMRecord.emptyRead(read);
|
||||
|
||||
byte [] quals = read.getBaseQualities(EventType.BASE_SUBSTITUTION);
|
||||
int left = -1;
|
||||
|
||||
if (nLeadingSoftClips > 0) {
|
||||
for (int i = nLeadingSoftClips - 1; i >= 0; i--) {
|
||||
if (quals[i] >= minQual)
|
||||
left = i;
|
||||
else
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int right = -1;
|
||||
int nTailingSoftClips = read.getSoftEnd() - read.getAlignmentEnd();
|
||||
if (nTailingSoftClips > 0) {
|
||||
for (int i = read.getReadLength() - nTailingSoftClips; i < read.getReadLength() ; i++) {
|
||||
if (quals[i] >= minQual)
|
||||
right = i;
|
||||
else
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
GATKSAMRecord clippedRead = read;
|
||||
if (right >= 0) {
|
||||
if (right + 1 < clippedRead.getReadLength())
|
||||
clippedRead = hardClipByReadCoordinates(clippedRead, right+1, clippedRead.getReadLength()-1); // first we hard clip the low quality soft clips on the left tail
|
||||
clippedRead.setTemporaryAttribute("SR", nTailingSoftClips - (read.getReadLength() - right - 1)); // keep track of how may bases to 're-softclip' after processing
|
||||
}
|
||||
if (left >= 0) {
|
||||
if (left - 1 > 0)
|
||||
clippedRead = hardClipByReadCoordinates(clippedRead, 0, left-1); // then we hard clip the low quality soft clips on the right tail
|
||||
clippedRead.setTemporaryAttribute("SL", nLeadingSoftClips - left); // keep track of how may bases to 're-softclip' after processing
|
||||
}
|
||||
return revertSoftClippedBases(clippedRead); // now that we have only good bases in the soft clips, we can revert them all
|
||||
}
|
||||
|
||||
/**
|
||||
* Generic functionality to hard clip a read, used internally by hardClipByReferenceCoordinatesLeftTail
|
||||
* and hardClipByReferenceCoordinatesRightTail. Should not be used directly.
|
||||
|
|
@ -446,9 +503,6 @@ public class ReadClipper {
|
|||
stop = read.getReadLength() - 1;
|
||||
}
|
||||
|
||||
// if ((start == 0 && stop == read.getReadLength() - 1))
|
||||
// return GATKSAMRecord.emptyRead(read);
|
||||
|
||||
if (start < 0 || stop > read.getReadLength() - 1)
|
||||
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");
|
||||
|
||||
|
|
|
|||
|
|
@ -88,19 +88,18 @@ public class BaseRecalibration {
|
|||
public void recalibrateRead(final GATKSAMRecord read) {
|
||||
final ReadCovariates readCovariates = RecalDataManager.computeCovariates(read, requestedCovariates); // compute all covariates for the read
|
||||
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
|
||||
final byte[] originalQuals = read.getBaseQualities(errorModel);
|
||||
final byte[] recalQuals = originalQuals.clone();
|
||||
final byte[] quals = read.getBaseQualities(errorModel);
|
||||
|
||||
for (int offset = 0; offset < read.getReadLength(); offset++) { // recalibrate all bases in the read
|
||||
byte qualityScore = originalQuals[offset];
|
||||
final byte originalQualityScore = quals[offset];
|
||||
|
||||
if (qualityScore >= QualityUtils.MIN_USABLE_Q_SCORE) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
|
||||
if (originalQualityScore >= QualityUtils.MIN_USABLE_Q_SCORE) { // only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
|
||||
final BitSet[] keySet = readCovariates.getKeySet(offset, errorModel); // get the keyset for this base using the error model
|
||||
qualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base
|
||||
final byte recalibratedQualityScore = performSequentialQualityCalculation(keySet, errorModel); // recalibrate the base
|
||||
quals[offset] = recalibratedQualityScore;
|
||||
}
|
||||
recalQuals[offset] = qualityScore;
|
||||
}
|
||||
read.setBaseQualities(recalQuals, errorModel);
|
||||
read.setBaseQualities(quals, errorModel);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -428,8 +428,8 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
* Use this method if you want to create a new empty GATKSAMRecord based on
|
||||
* another GATKSAMRecord
|
||||
*
|
||||
* @param read
|
||||
* @return
|
||||
* @param read a read to copy the header from
|
||||
* @return a read with no bases but safe for the GATK
|
||||
*/
|
||||
public static GATKSAMRecord emptyRead(GATKSAMRecord read) {
|
||||
GATKSAMRecord emptyRead = new GATKSAMRecord(read.getHeader(),
|
||||
|
|
|
|||
|
|
@ -0,0 +1,48 @@
|
|||
package org.broadinstitute.sting.utils.sam;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
||||
public class ReadUnclippedStartWithNoTiesComparator implements Comparator<SAMRecord> {
|
||||
@Requires("c1 >= 0 && c2 >= 0")
|
||||
@Ensures("result == 0 || result == 1 || result == -1")
|
||||
private int compareContigs(int c1, int c2) {
|
||||
if (c1 == c2)
|
||||
return 0;
|
||||
else if (c1 > c2)
|
||||
return 1;
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Requires("r1 != null && r2 != null")
|
||||
@Ensures("result == 0 || result == 1 || result == -1")
|
||||
public int compare(SAMRecord r1, SAMRecord r2) {
|
||||
int result;
|
||||
|
||||
if (r1 == r2)
|
||||
result = 0;
|
||||
|
||||
else if (r1.getReadUnmappedFlag())
|
||||
result = 1;
|
||||
else if (r2.getReadUnmappedFlag())
|
||||
result = -1;
|
||||
else {
|
||||
final int cmpContig = compareContigs(r1.getReferenceIndex(), r2.getReferenceIndex());
|
||||
|
||||
if (cmpContig != 0)
|
||||
result = cmpContig;
|
||||
|
||||
else {
|
||||
if (r1.getUnclippedStart() < r2.getUnclippedStart())
|
||||
result = -1;
|
||||
else
|
||||
result = 1;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
@ -47,6 +47,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
" -T VariantRecalibrator" +
|
||||
" -input " + params.inVCF +
|
||||
" -L 20:1,000,000-40,000,000" +
|
||||
" --no_cmdline_in_header" +
|
||||
" -an QD -an HaplotypeScore -an HRun" +
|
||||
" -percentBad 0.07" +
|
||||
" --minNumBadVariants 0" +
|
||||
|
|
@ -92,6 +93,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
" -T VariantRecalibrator" +
|
||||
" -input " + params.inVCF +
|
||||
" -L 20:1,000,000-40,000,000" +
|
||||
" --no_cmdline_in_header" +
|
||||
" -an QD -an ReadPosRankSum -an HaplotypeScore" +
|
||||
" -percentBad 0.08" +
|
||||
" -mode INDEL -mG 3" +
|
||||
|
|
|
|||
Loading…
Reference in New Issue