ReadUtils cleanup

* Removed all clipping functionality from ReadUtils (it should all be done using the ReadClipper now)
 * Cleaned up functionality that wasn't being used or had been superseded by other code (in an effort to reduce multiple unsupported implementations)
 * Made all meaningful functions public and added better comments/explanation to the headers
This commit is contained in:
Mauricio Carneiro 2011-12-20 14:35:33 -05:00
parent 1c4774c475
commit 07128a2ad2
6 changed files with 112 additions and 376 deletions

View File

@ -299,9 +299,8 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
*/ */
public ReadClipperWithData map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) { public ReadClipperWithData map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
if ( onlyDoRead == null || read.getReadName().equals(onlyDoRead) ) { if ( onlyDoRead == null || read.getReadName().equals(onlyDoRead) ) {
if ( clippingRepresentation == ClippingRepresentation.HARDCLIP_BASES ) { if ( clippingRepresentation == ClippingRepresentation.HARDCLIP_BASES )
read = ReadUtils.replaceSoftClipsWithMatches(read); read = (new ReadClipper(read)).revertSoftClippedBases();
}
ReadClipperWithData clipper = new ReadClipperWithData(read, sequencesToClip); ReadClipperWithData clipper = new ReadClipperWithData(read, sequencesToClip);
// //

View File

@ -81,7 +81,7 @@ public class SplitSamFileWalker extends ReadWalker<SAMRecord, Map<String, SAMFil
for ( SAMReadGroupRecord readGroup : this.getToolkit().getSAMFileHeader().getReadGroups()) { for ( SAMReadGroupRecord readGroup : this.getToolkit().getSAMFileHeader().getReadGroups()) {
final String sample = readGroup.getSample(); final String sample = readGroup.getSample();
if ( ! headers.containsKey(sample) ) { if ( ! headers.containsKey(sample) ) {
SAMFileHeader header = ReadUtils.copySAMFileHeader(this.getToolkit().getSAMFileHeader()); SAMFileHeader header = duplicateSAMFileHeader(this.getToolkit().getSAMFileHeader());
logger.debug(String.format("Creating BAM header for sample %s", sample)); logger.debug(String.format("Creating BAM header for sample %s", sample));
ArrayList<SAMReadGroupRecord> readGroups = new ArrayList<SAMReadGroupRecord>(); ArrayList<SAMReadGroupRecord> readGroups = new ArrayList<SAMReadGroupRecord>();
header.setReadGroups(readGroups); header.setReadGroups(readGroups);
@ -121,4 +121,20 @@ public class SplitSamFileWalker extends ReadWalker<SAMRecord, Map<String, SAMFil
return outputs; return outputs;
} }
public static SAMFileHeader duplicateSAMFileHeader(SAMFileHeader toCopy) {
SAMFileHeader copy = new SAMFileHeader();
copy.setSortOrder(toCopy.getSortOrder());
copy.setGroupOrder(toCopy.getGroupOrder());
copy.setProgramRecords(toCopy.getProgramRecords());
copy.setReadGroups(toCopy.getReadGroups());
copy.setSequenceDictionary(toCopy.getSequenceDictionary());
for (Map.Entry<String, String> e : toCopy.getAttributes())
copy.setAttribute(e.getKey(), e.getValue());
return copy;
}
} }

View File

@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
@ -125,7 +126,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
for ( ExtendedEventPileupElement p : indelPileup.toExtendedIterable() ) { for ( ExtendedEventPileupElement p : indelPileup.toExtendedIterable() ) {
//SAMRecord read = p.getRead(); //SAMRecord read = p.getRead();
GATKSAMRecord read = ReadUtils.hardClipAdaptorSequence(p.getRead()); GATKSAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
if (read == null) if (read == null)
continue; continue;
if(ReadUtils.is454Read(read)) { if(ReadUtils.is454Read(read)) {

View File

@ -463,7 +463,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
private void emitReadLists() { private void emitReadLists() {
// pre-merge lists to sort them in preparation for constrained SAMFileWriter // pre-merge lists to sort them in preparation for constrained SAMFileWriter
readsNotToClean.addAll(readsToClean.getReads()); readsNotToClean.addAll(readsToClean.getReads());
ReadUtils.coordinateSortReads(readsNotToClean); ReadUtils.sortReadsByCoordinate(readsNotToClean);
manager.addReads(readsNotToClean, readsActuallyCleaned); manager.addReads(readsNotToClean, readsActuallyCleaned);
readsToClean.clear(); readsToClean.clear();
readsNotToClean.clear(); readsNotToClean.clear();

View File

@ -32,6 +32,7 @@ import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.Haplotype;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.sam.ReadUtils;
@ -409,7 +410,7 @@ public class PairHMMIndelErrorModel {
} }
else { else {
//System.out.format("%d %s\n",p.getRead().getAlignmentStart(), p.getRead().getClass().getName()); //System.out.format("%d %s\n",p.getRead().getAlignmentStart(), p.getRead().getClass().getName());
SAMRecord read = ReadUtils.hardClipAdaptorSequence(p.getRead()); SAMRecord read = ReadClipper.hardClipAdaptorSequence(p.getRead());
if (read == null) if (read == null)
continue; continue;

View File

@ -47,11 +47,34 @@ public class ReadUtils {
private static int DEFAULT_ADAPTOR_SIZE = 100; private static int DEFAULT_ADAPTOR_SIZE = 100;
/**
* A marker to tell which end of the read has been clipped
*/
public enum ClippingTail { public enum ClippingTail {
LEFT_TAIL, LEFT_TAIL,
RIGHT_TAIL RIGHT_TAIL
} }
/**
* A HashMap of the SAM spec read flag names
*
* Note: This is not being used right now, but can be useful in the future
*/
private static final Map<Integer, String> readFlagNames = new HashMap<Integer, String>();
static {
readFlagNames.put(0x1, "Paired");
readFlagNames.put(0x2, "Proper");
readFlagNames.put(0x4, "Unmapped");
readFlagNames.put(0x8, "MateUnmapped");
readFlagNames.put(0x10, "Forward");
//readFlagNames.put(0x20, "MateForward");
readFlagNames.put(0x40, "FirstOfPair");
readFlagNames.put(0x80, "SecondOfPair");
readFlagNames.put(0x100, "NotPrimary");
readFlagNames.put(0x200, "NON-PF");
readFlagNames.put(0x400, "Duplicate");
}
/** /**
* This enum represents all the different ways in which a read can overlap an interval. * This enum represents all the different ways in which a read can overlap an interval.
* *
@ -96,38 +119,22 @@ public class ReadUtils {
*/ */
public enum ReadAndIntervalOverlap {NO_OVERLAP_CONTIG, NO_OVERLAP_LEFT, NO_OVERLAP_RIGHT, NO_OVERLAP_HARDCLIPPED_LEFT, NO_OVERLAP_HARDCLIPPED_RIGHT, OVERLAP_LEFT, OVERLAP_RIGHT, OVERLAP_LEFT_AND_RIGHT, OVERLAP_CONTAINED} public enum ReadAndIntervalOverlap {NO_OVERLAP_CONTIG, NO_OVERLAP_LEFT, NO_OVERLAP_RIGHT, NO_OVERLAP_HARDCLIPPED_LEFT, NO_OVERLAP_HARDCLIPPED_RIGHT, OVERLAP_LEFT, OVERLAP_RIGHT, OVERLAP_LEFT_AND_RIGHT, OVERLAP_CONTAINED}
public static SAMFileHeader copySAMFileHeader(SAMFileHeader toCopy) { /**
SAMFileHeader copy = new SAMFileHeader(); * Creates a SAMFileWriter with the given compression level if you request a bam file. Creates a regular
* SAMFileWriter without compression otherwise.
copy.setSortOrder(toCopy.getSortOrder()); *
copy.setGroupOrder(toCopy.getGroupOrder()); * @param header
copy.setProgramRecords(toCopy.getProgramRecords()); * @param presorted
copy.setReadGroups(toCopy.getReadGroups()); * @param file
copy.setSequenceDictionary(toCopy.getSequenceDictionary()); * @param compression
* @return a SAMFileWriter with the compression level if it is a bam.
for (Map.Entry<String, String> e : toCopy.getAttributes()) */
copy.setAttribute(e.getKey(), e.getValue());
return copy;
}
public static SAMFileWriter createSAMFileWriterWithCompression(SAMFileHeader header, boolean presorted, String file, int compression) { public static SAMFileWriter createSAMFileWriterWithCompression(SAMFileHeader header, boolean presorted, String file, int compression) {
if (file.endsWith(".bam")) if (file.endsWith(".bam"))
return new SAMFileWriterFactory().makeBAMWriter(header, presorted, new File(file), compression); return new SAMFileWriterFactory().makeBAMWriter(header, presorted, new File(file), compression);
return new SAMFileWriterFactory().makeSAMOrBAMWriter(header, presorted, new File(file)); return new SAMFileWriterFactory().makeSAMOrBAMWriter(header, presorted, new File(file));
} }
public static boolean isPlatformRead(SAMRecord read, String name) {
SAMReadGroupRecord readGroup = read.getReadGroup();
if (readGroup != null) {
Object readPlatformAttr = readGroup.getAttribute("PL");
if (readPlatformAttr != null)
return readPlatformAttr.toString().toUpperCase().contains(name);
}
return false;
}
/** /**
* is this base inside the adaptor of the read? * is this base inside the adaptor of the read?
* *
@ -175,7 +182,7 @@ public class ReadUtils {
* @param read the read being tested for the adaptor boundary * @param read the read being tested for the adaptor boundary
* @return the reference coordinate for the adaptor boundary (effectively the first base IN the adaptor, closest to the read. NULL if the read is unmapped or the insert size cannot be determined (and is necessary for the calculation). * @return the reference coordinate for the adaptor boundary (effectively the first base IN the adaptor, closest to the read. NULL if the read is unmapped or the insert size cannot be determined (and is necessary for the calculation).
*/ */
protected static Integer getAdaptorBoundary(final SAMRecord read) { public static Integer getAdaptorBoundary(final SAMRecord read) {
if ( read.getReadUnmappedFlag() ) if ( read.getReadUnmappedFlag() )
return null; // don't worry about unmapped pairs return null; // don't worry about unmapped pairs
@ -192,363 +199,54 @@ public class ReadUtils {
return adaptorBoundary; return adaptorBoundary;
} }
// return true if the read needs to be completely clipped
private static GATKSAMRecord hardClipStartOfRead(GATKSAMRecord oldRec, int stopPosition) {
if ( stopPosition >= oldRec.getAlignmentEnd() ) {
// BAM representation issue -- we can't clip away all bases in a read, just leave it alone and let the filter deal with it
//System.out.printf("Entire read needs to be clipped: %50s %n", read.getReadName());
return null;
}
GATKSAMRecord read;
try {
read = (GATKSAMRecord)oldRec.clone();
} catch (Exception e) {
return null;
}
//System.out.printf("Clipping start of read: %50s start=%d adaptorEnd=%d isize=%d %n",
// read.getReadName(), read.getAlignmentStart(), stopPosition, read.getInferredInsertSize());
Cigar oldCigar = read.getCigar();
LinkedList<CigarElement> newCigarElements = new LinkedList<CigarElement>();
int currentPos = read.getAlignmentStart();
int basesToClip = 0;
int basesAlreadyClipped = 0;
for ( CigarElement ce : oldCigar.getCigarElements() ) {
if ( currentPos > stopPosition) {
newCigarElements.add(ce);
continue;
}
int elementLength = ce.getLength();
switch ( ce.getOperator() ) {
case M:
for (int i = 0; i < elementLength; i++, currentPos++, basesToClip++) {
if ( currentPos > stopPosition ) {
newCigarElements.add(new CigarElement(elementLength - i, CigarOperator.M));
break;
}
}
break;
case I:
case S:
basesToClip += elementLength;
break;
case D:
case N:
currentPos += elementLength;
break;
case H:
basesAlreadyClipped += elementLength;
case P:
break;
default: throw new ReviewedStingException("The " + ce.getOperator() + " cigar element is not currently supported");
}
}
// copy over the unclipped bases
final byte[] bases = read.getReadBases();
final byte[] quals = read.getBaseQualities();
int newLength = bases.length - basesToClip;
byte[] newBases = new byte[newLength];
byte[] newQuals = new byte[newLength];
System.arraycopy(bases, basesToClip, newBases, 0, newLength);
System.arraycopy(quals, basesToClip, newQuals, 0, newLength);
read.setReadBases(newBases);
read.setBaseQualities(newQuals);
// now add a CIGAR element for the clipped bases
newCigarElements.addFirst(new CigarElement(basesToClip + basesAlreadyClipped, CigarOperator.H));
Cigar newCigar = new Cigar(newCigarElements);
read.setCigar(newCigar);
// adjust the start accordingly
read.setAlignmentStart(stopPosition + 1);
return read;
}
private static GATKSAMRecord hardClipEndOfRead(GATKSAMRecord oldRec, int startPosition) {
if ( startPosition <= oldRec.getAlignmentStart() ) {
// BAM representation issue -- we can't clip away all bases in a read, just leave it alone and let the filter deal with it
//System.out.printf("Entire read needs to be clipped: %50s %n", read.getReadName());
return null;
}
GATKSAMRecord read;
try {
read = (GATKSAMRecord)oldRec.clone();
} catch (Exception e) {
return null;
}
//System.out.printf("Clipping end of read: %50s adaptorStart=%d end=%d isize=%d %n",
// read.getReadName(), startPosition, read.getAlignmentEnd(), read.getInferredInsertSize());
Cigar oldCigar = read.getCigar();
LinkedList<CigarElement> newCigarElements = new LinkedList<CigarElement>();
int currentPos = read.getAlignmentStart();
int basesToKeep = 0;
int basesAlreadyClipped = 0;
for ( CigarElement ce : oldCigar.getCigarElements() ) {
int elementLength = ce.getLength();
if ( currentPos >= startPosition ) {
if ( ce.getOperator() == CigarOperator.H )
basesAlreadyClipped += elementLength;
continue;
}
switch ( ce.getOperator() ) {
case M:
for (int i = 0; i < elementLength; i++, currentPos++, basesToKeep++) {
if ( currentPos == startPosition ) {
newCigarElements.add(new CigarElement(i, CigarOperator.M));
break;
}
}
if ( currentPos != startPosition )
newCigarElements.add(ce);
break;
case I:
case S:
newCigarElements.add(ce);
basesToKeep += elementLength;
break;
case D:
case N:
newCigarElements.add(ce);
currentPos += elementLength;
break;
case H:
case P:
newCigarElements.add(ce);
break;
default: throw new ReviewedStingException("The " + ce.getOperator() + " cigar element is not currently supported");
}
}
// copy over the unclipped bases
final byte[] bases = read.getReadBases();
final byte[] quals = read.getBaseQualities();
byte[] newBases = new byte[basesToKeep];
byte[] newQuals = new byte[basesToKeep];
System.arraycopy(bases, 0, newBases, 0, basesToKeep);
System.arraycopy(quals, 0, newQuals, 0, basesToKeep);
read.setReadBases(newBases);
read.setBaseQualities(newQuals);
// now add a CIGAR element for the clipped bases
newCigarElements.add(new CigarElement((bases.length - basesToKeep) + basesAlreadyClipped, CigarOperator.H));
Cigar newCigar = new Cigar(newCigarElements);
read.setCigar(newCigar);
// adjust the stop accordingly
// read.setAlignmentEnd(startPosition - 1);
return read;
}
/** /**
* Hard clips away (i.e.g, removes from the read) bases that were previously soft clipped. * is the read a 454 read ?
* *
* @param read * @param read the read to test
* @return * @return checks the read group tag PL for the default 454 tag
*/ */
@Requires("read != null")
@Ensures("result != null")
public static GATKSAMRecord hardClipSoftClippedBases(GATKSAMRecord read) {
List<CigarElement> cigarElts = read.getCigar().getCigarElements();
if ( cigarElts.size() == 1 ) // can't be soft clipped, just return
return read;
int keepStart = 0, keepEnd = read.getReadLength() - 1;
List<CigarElement> newCigarElements = new LinkedList<CigarElement>();
for ( int i = 0; i < cigarElts.size(); i++ ) {
CigarElement ce = cigarElts.get(i);
int l = ce.getLength();
switch ( ce.getOperator() ) {
case S:
if ( i == 0 )
keepStart = l;
else
keepEnd = read.getReadLength() - l - 1;
newCigarElements.add(new CigarElement(l, CigarOperator.HARD_CLIP));
break;
default:
newCigarElements.add(ce);
break;
}
}
// Merges tandem cigar elements like 5H10H or 2S5S to 15H or 7S
// this will happen if you soft clip a read that has been hard clipped before
// like: 5H20S => 5H20H
List<CigarElement> mergedCigarElements = new LinkedList<CigarElement>();
Iterator<CigarElement> cigarElementIterator = newCigarElements.iterator();
CigarOperator currentOperator = null;
int currentOperatorLength = 0;
while (cigarElementIterator.hasNext()) {
CigarElement cigarElement = cigarElementIterator.next();
if (currentOperator != cigarElement.getOperator()) {
if (currentOperator != null)
mergedCigarElements.add(new CigarElement(currentOperatorLength, currentOperator));
currentOperator = cigarElement.getOperator();
currentOperatorLength = cigarElement.getLength();
}
else
currentOperatorLength += cigarElement.getLength();
}
mergedCigarElements.add(new CigarElement(currentOperatorLength, currentOperator));
return hardClipBases(read, keepStart, keepEnd, mergedCigarElements);
}
/**
* Hard clips out the bases in read, keeping the bases from keepStart to keepEnd, inclusive. Note these
* are offsets, so they are 0 based
*
* @param read
* @param keepStart
* @param keepEnd
* @param newCigarElements
* @return
*/
@Requires({
"read != null",
"keepStart >= 0",
"keepEnd < read.getReadLength()",
"read.getReadUnmappedFlag() || newCigarElements != null"})
@Ensures("result != null")
public static GATKSAMRecord hardClipBases(GATKSAMRecord read, int keepStart, int keepEnd, List<CigarElement> newCigarElements) {
int newLength = keepEnd - keepStart + 1;
if ( newLength != read.getReadLength() ) {
try {
read = (GATKSAMRecord)read.clone();
// copy over the unclipped bases
final byte[] bases = read.getReadBases();
final byte[] quals = read.getBaseQualities();
byte[] newBases = new byte[newLength];
byte[] newQuals = new byte[newLength];
System.arraycopy(bases, keepStart, newBases, 0, newLength);
System.arraycopy(quals, keepStart, newQuals, 0, newLength);
read.setReadBases(newBases);
read.setBaseQualities(newQuals);
// now add a CIGAR element for the clipped bases, if the read isn't unmapped
if ( ! read.getReadUnmappedFlag() ) {
Cigar newCigar = new Cigar(newCigarElements);
read.setCigar(newCigar);
}
} catch ( CloneNotSupportedException e ) {
throw new ReviewedStingException("WTF, where did clone go?", e);
}
}
return read;
}
public static GATKSAMRecord replaceSoftClipsWithMatches(GATKSAMRecord read) {
List<CigarElement> newCigarElements = new ArrayList<CigarElement>();
for ( CigarElement ce : read.getCigar().getCigarElements() ) {
if ( ce.getOperator() == CigarOperator.SOFT_CLIP )
newCigarElements.add(new CigarElement(ce.getLength(), CigarOperator.MATCH_OR_MISMATCH));
else
newCigarElements.add(ce);
}
if ( newCigarElements.size() > 1 ) { //
CigarElement first = newCigarElements.get(0);
CigarElement second = newCigarElements.get(1);
if ( first.getOperator() == CigarOperator.MATCH_OR_MISMATCH && second.getOperator() == CigarOperator.MATCH_OR_MISMATCH ) {
newCigarElements.set(0, new CigarElement(first.getLength() + second.getLength(), CigarOperator.MATCH_OR_MISMATCH));
newCigarElements.remove(1);
}
}
if ( newCigarElements.size() > 1 ) { //
CigarElement penult = newCigarElements.get(newCigarElements.size()-2);
CigarElement last = newCigarElements.get(newCigarElements.size()-1);
if ( penult.getOperator() == CigarOperator.MATCH_OR_MISMATCH && penult.getOperator() == CigarOperator.MATCH_OR_MISMATCH ) {
newCigarElements.set(newCigarElements.size()-2, new CigarElement(penult.getLength() + last.getLength(), CigarOperator.MATCH_OR_MISMATCH));
newCigarElements.remove(newCigarElements.size()-1);
}
}
read.setCigar(new Cigar(newCigarElements));
return read;
}
/**
*
* @param read original SAM record
* @return a new read with adaptor sequence hard-clipped out or null if read is fully clipped
*/
public static GATKSAMRecord hardClipAdaptorSequence(final GATKSAMRecord read) {
final Integer adaptorBoundary = getAdaptorBoundary(read);
if (adaptorBoundary == null || !isInsideRead(read, adaptorBoundary))
return read;
return (read.getReadNegativeStrandFlag()) ? hardClipStartOfRead(read, adaptorBoundary) : hardClipEndOfRead(read, adaptorBoundary);
}
public static boolean is454Read(SAMRecord read) { public static boolean is454Read(SAMRecord read) {
return isPlatformRead(read, "454"); return isPlatformRead(read, "454");
} }
/**
* is the read a SOLiD read ?
*
* @param read the read to test
* @return checks the read group tag PL for the default SOLiD tag
*/
public static boolean isSOLiDRead(SAMRecord read) { public static boolean isSOLiDRead(SAMRecord read) {
return isPlatformRead(read, "SOLID"); return isPlatformRead(read, "SOLID");
} }
/**
* is the read a SLX read ?
*
* @param read the read to test
* @return checks the read group tag PL for the default SLX tag
*/
public static boolean isSLXRead(SAMRecord read) { public static boolean isSLXRead(SAMRecord read) {
return isPlatformRead(read, "ILLUMINA"); return isPlatformRead(read, "ILLUMINA");
} }
private static final Map<Integer, String> readFlagNames /**
= new HashMap<Integer, String>(); * checks if the read has a platform tag in the readgroup equal to 'name' ?
*
static { * @param read the read to test
readFlagNames.put(0x1, "Paired"); * @param name the platform name to test
readFlagNames.put(0x2, "Proper"); * @return whether or not name == PL tag in the read group of read
readFlagNames.put(0x4, "Unmapped"); */
readFlagNames.put(0x8, "MateUnmapped"); public static boolean isPlatformRead(SAMRecord read, String name) {
readFlagNames.put(0x10, "Forward"); SAMReadGroupRecord readGroup = read.getReadGroup();
//readFlagNames.put(0x20, "MateForward"); if (readGroup != null) {
readFlagNames.put(0x40, "FirstOfPair"); Object readPlatformAttr = readGroup.getAttribute("PL");
readFlagNames.put(0x80, "SecondOfPair"); if (readPlatformAttr != null)
readFlagNames.put(0x100, "NotPrimary"); return readPlatformAttr.toString().toUpperCase().contains(name);
readFlagNames.put(0x200, "NON-PF");
readFlagNames.put(0x400, "Duplicate");
}
public static String readFlagsAsString(GATKSAMRecord read) {
String flags = "";
for (int flag : readFlagNames.keySet()) {
if ((read.getFlags() & flag) != 0) {
flags += readFlagNames.get(flag) + " ";
}
} }
return flags; return false;
} }
/** /**
* Returns the collections of reads sorted in coordinate order, according to the order defined * Returns the collections of reads sorted in coordinate order, according to the order defined
* in the reads themselves * in the reads themselves
@ -556,12 +254,19 @@ public class ReadUtils {
* @param reads * @param reads
* @return * @return
*/ */
public final static List<GATKSAMRecord> coordinateSortReads(List<GATKSAMRecord> reads) { public final static List<GATKSAMRecord> sortReadsByCoordinate(List<GATKSAMRecord> reads) {
final SAMRecordComparator comparer = new SAMRecordCoordinateComparator(); final SAMRecordComparator comparer = new SAMRecordCoordinateComparator();
Collections.sort(reads, comparer); Collections.sort(reads, comparer);
return reads; return reads;
} }
/**
* If a read starts in INSERTION, returns the first element length.
*
* Warning: If the read has Hard or Soft clips before the insertion this function will return 0.
* @param read
* @return the length of the first insertion, or 0 if there is none (see warning).
*/
public final static int getFirstInsertionOffset(SAMRecord read) { public final static int getFirstInsertionOffset(SAMRecord read) {
CigarElement e = read.getCigar().getCigarElement(0); CigarElement e = read.getCigar().getCigarElement(0);
if ( e.getOperator() == CigarOperator.I ) if ( e.getOperator() == CigarOperator.I )
@ -570,8 +275,15 @@ public class ReadUtils {
return 0; return 0;
} }
/**
* If a read ends in INSERTION, returns the last element length.
*
* Warning: If the read has Hard or Soft clips after the insertion this function will return 0.
* @param read
* @return the length of the last insertion, or 0 if there is none (see warning).
*/
public final static int getLastInsertionOffset(SAMRecord read) { public final static int getLastInsertionOffset(SAMRecord read) {
CigarElement e = read.getCigar().getCigarElement(read.getCigarLength()-1); CigarElement e = read.getCigar().getCigarElement(read.getCigarLength() - 1);
if ( e.getOperator() == CigarOperator.I ) if ( e.getOperator() == CigarOperator.I )
return e.getLength(); return e.getLength();
else else
@ -622,6 +334,7 @@ public class ReadUtils {
return ReadAndIntervalOverlap.OVERLAP_RIGHT; return ReadAndIntervalOverlap.OVERLAP_RIGHT;
} }
@Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"}) @Ensures({"result >= read.getUnclippedStart()", "result <= read.getUnclippedEnd() || readIsEntirelyInsertion(read)"})
public static int getRefCoordSoftUnclippedStart(GATKSAMRecord read) { public static int getRefCoordSoftUnclippedStart(GATKSAMRecord read) {
int start = read.getUnclippedStart(); int start = read.getUnclippedStart();
@ -803,7 +516,13 @@ public class ReadUtils {
return referenceCoordinate >= read.getAlignmentStart() && referenceCoordinate <= read.getAlignmentEnd(); return referenceCoordinate >= read.getAlignmentStart() && referenceCoordinate <= read.getAlignmentEnd();
} }
private static boolean readIsEntirelyInsertion(GATKSAMRecord read) { /**
* Is this read all insertion?
*
* @param read
* @return whether or not the only element in the cigar string is an Insertion
*/
public static boolean readIsEntirelyInsertion(GATKSAMRecord read) {
for (CigarElement cigarElement : read.getCigar().getCigarElements()) { for (CigarElement cigarElement : read.getCigar().getCigarElements()) {
if (cigarElement.getOperator() != CigarOperator.INSERTION) if (cigarElement.getOperator() != CigarOperator.INSERTION)
return false; return false;