Reduced reads are now aware of their original alignments
* Added annotations for reads that had been soft clipped prior to being reduced so that we can later recuperate their original alignments (start and end). * Tags keep the alignment shifts, not real alignment, for better compression * Tags are defined in the GATKSAMRecord * GATKSAMRecord has new functionality to retrieve original alignment start of all reads (trimmed or not) -- getOriginalAlignmentStart() and getOriginalAligmentEnd() * Updated ReduceReads MD5s accordingly
This commit is contained in:
parent
07f72516ae
commit
0d4027104f
|
|
@ -43,7 +43,10 @@ import java.util.Map;
|
|||
*
|
||||
*/
|
||||
public class GATKSAMRecord extends BAMRecord {
|
||||
public static final String REDUCED_READ_CONSENSUS_TAG = "RR";
|
||||
// ReduceReads specific attribute tags
|
||||
public static final String REDUCED_READ_CONSENSUS_TAG = "RR"; // marks a synthetic read produced by the ReduceReads tool
|
||||
public static final String REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT = "OS"; // reads that are clipped may use this attribute to keep track of their original alignment start
|
||||
public static final String REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT = "OE"; // reads that are clipped may use this attribute to keep track of their original alignment end
|
||||
|
||||
// the SAMRecord data we're caching
|
||||
private String mReadString = null;
|
||||
|
|
@ -321,6 +324,36 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines the original alignment start of a previously clipped read.
|
||||
*
|
||||
* This is useful for reads that have been trimmed to a variant region and lost the information of it's original alignment end
|
||||
*
|
||||
* @return the alignment start of a read before it was clipped
|
||||
*/
|
||||
public int getOriginalAlignmentStart() {
|
||||
int originalAlignmentStart = getUnclippedStart();
|
||||
Integer alignmentShift = (Integer) getAttribute(REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT);
|
||||
if (alignmentShift != null)
|
||||
originalAlignmentStart += alignmentShift;
|
||||
return originalAlignmentStart;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines the original alignment end of a previously clipped read.
|
||||
*
|
||||
* This is useful for reads that have been trimmed to a variant region and lost the information of it's original alignment end
|
||||
*
|
||||
* @return the alignment end of a read before it was clipped
|
||||
*/
|
||||
public int getOriginalAlignmentEnd() {
|
||||
int originalAlignmentEnd = getUnclippedEnd();
|
||||
Integer alignmentShift = (Integer) getAttribute(REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT);
|
||||
if (alignmentShift != null)
|
||||
originalAlignmentEnd -= alignmentShift;
|
||||
return originalAlignmentEnd;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an empty GATKSAMRecord with the read's header, read group and mate
|
||||
* information, but empty (not-null) fields:
|
||||
|
|
@ -363,4 +396,21 @@ public class GATKSAMRecord extends BAMRecord {
|
|||
return emptyRead;
|
||||
}
|
||||
|
||||
/**
|
||||
* Shallow copy of everything, except for the attribute list and the temporary attributes.
|
||||
* A new list of the attributes is created for both, but the attributes themselves are copied by reference.
|
||||
* This should be safe because callers should never modify a mutable value returned by any of the get() methods anyway.
|
||||
*
|
||||
* @return a shallow copy of the GATKSAMRecord
|
||||
* @throws CloneNotSupportedException
|
||||
*/
|
||||
@Override
|
||||
public Object clone() throws CloneNotSupportedException {
|
||||
final GATKSAMRecord clone = (GATKSAMRecord) super.clone();
|
||||
if (temporaryAttributes != null) {
|
||||
for (Object attribute : temporaryAttributes.keySet())
|
||||
clone.setTemporaryAttribute(attribute, temporaryAttributes.get(attribute));
|
||||
}
|
||||
return clone;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue