GATKSAMRecord emptyRead static constructor

* Creates an empty GATKSAMRecord with empty (not null) Cigar, bases and quals. Allows empty reads to be probed without breaking.
 * All ReadClipper utilities now emit empty reads for fully clipped reads
This commit is contained in:
Mauricio Carneiro 2011-12-27 17:00:47 -05:00
parent 8259c748f2
commit f692911903
3 changed files with 55 additions and 7 deletions

View File

@ -286,7 +286,9 @@ public class ClippingOp {
@Requires({"start <= stop", "start == 0 || stop == read.getReadLength() - 1"})
private GATKSAMRecord hardClip (GATKSAMRecord read, int start, int stop) {
if (start == 0 && stop == read.getReadLength() - 1)
return new GATKSAMRecord(read.getHeader());
return GATKSAMRecord.emptyRead(read);
// return new GATKSAMRecord(read.getHeader());
// If the read is unmapped there is no Cigar string and neither should we create a new cigar string
CigarShift cigarShift = (read.getReadUnmappedFlag()) ? new CigarShift(new Cigar(), 0, 0) : hardClipCigar(read.getCigar(), start, stop);

View File

@ -134,7 +134,8 @@ public class ReadClipper {
wasClipped = true;
ops.clear();
if ( clippedRead.isEmpty() )
return new GATKSAMRecord( clippedRead.getHeader() );
return GATKSAMRecord.emptyRead(clippedRead);
// return new GATKSAMRecord( clippedRead.getHeader() );
return clippedRead;
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e); // this should never happen
@ -186,7 +187,8 @@ public class ReadClipper {
"start == 0 || stop == read.getReadLength() - 1"}) // cannot clip the middle of the read
private GATKSAMRecord hardClipByReadCoordinates(int start, int stop) {
if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1))
return new GATKSAMRecord(read.getHeader());
return GATKSAMRecord.emptyRead(read);
// return new GATKSAMRecord(read.getHeader());
this.addOp(new ClippingOp(start, stop));
return clipRead(ClippingRepresentation.HARDCLIP_BASES);
@ -210,13 +212,15 @@ public class ReadClipper {
"right <= read.getAlignmentEnd()"}) // coordinate has to be within the mapped read
private GATKSAMRecord hardClipBothEndsByReferenceCoordinates(int left, int right) {
if (read.isEmpty() || left == right)
return new GATKSAMRecord(read.getHeader());
return GATKSAMRecord.emptyRead(read);
// return new GATKSAMRecord(read.getHeader());
GATKSAMRecord leftTailRead = hardClipByReferenceCoordinates(right, -1);
// after clipping one tail, it is possible that the consequent hard clipping of adjacent deletions
// make the left cut index no longer part of the read. In that case, clip the read entirely.
if (left > leftTailRead.getAlignmentEnd())
return new GATKSAMRecord(read.getHeader());
return GATKSAMRecord.emptyRead(read);
// return new GATKSAMRecord(read.getHeader());
ReadClipper clipper = new ReadClipper(leftTailRead);
return clipper.hardClipByReferenceCoordinatesLeftTail(left);
@ -249,7 +253,8 @@ public class ReadClipper {
// if the entire read should be clipped, then return an empty read.
if (leftClipIndex > rightClipIndex)
return (new GATKSAMRecord(read.getHeader()));
return GATKSAMRecord.emptyRead(read);
// return (new GATKSAMRecord(read.getHeader()));
if (rightClipIndex < read.getReadLength() - 1) {
this.addOp(new ClippingOp(rightClipIndex + 1, read.getReadLength() - 1));
@ -379,7 +384,8 @@ public class ReadClipper {
int stop = (refStop < 0) ? read.getReadLength() - 1 : ReadUtils.getReadCoordinateForReferenceCoordinate(read, refStop, ReadUtils.ClippingTail.LEFT_TAIL);
if (read.isEmpty() || (start == 0 && stop == read.getReadLength() - 1))
return new GATKSAMRecord(read.getHeader());
return GATKSAMRecord.emptyRead(read);
// return new GATKSAMRecord(read.getHeader());
if (start < 0 || stop > read.getReadLength() - 1)
throw new ReviewedStingException("Trying to clip before the start or after the end of a read");

View File

@ -316,6 +316,46 @@ public class GATKSAMRecord extends BAMRecord {
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
}
/**
* Creates an empty GATKSAMRecord with the read's header, read group and mate
* information, but empty (not-null) fields:
* - Cigar String
* - Read Bases
* - Base Qualities
*
* Use this method if you want to create a new empty GATKSAMRecord based on
* another GATKSAMRecord
*
* @param read
* @return
*/
public static GATKSAMRecord emptyRead(GATKSAMRecord read) {
GATKSAMRecord emptyRead = new GATKSAMRecord(read.getHeader(),
read.getReferenceIndex(),
0,
(short) 0,
(short) 0,
0,
0,
read.getFlags(),
0,
read.getMateReferenceIndex(),
read.getMateAlignmentStart(),
read.getInferredInsertSize(),
null);
emptyRead.setCigarString("");
emptyRead.setReadBases(new byte[0]);
emptyRead.setBaseQualities(new byte[0]);
SAMReadGroupRecord samRG = read.getReadGroup();
emptyRead.clearAttributes();
if (samRG != null) {
GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord(samRG);
emptyRead.setReadGroup(rg);
}
return emptyRead;
}
}