diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java index 6279e0061..c2e69ee2d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java @@ -4,6 +4,7 @@ import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.HasGenomeLocation; +import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.ArrayList; @@ -38,14 +39,30 @@ public class ActiveRegion implements HasGenomeLocation { fullExtentReferenceLoc = fullExtentReferenceLoc.union( genomeLocParser.createGenomeLoc( read ) ); reads.add( read ); } + + public void hardClipToActiveRegion() { + final ArrayList clippedReads = ReadClipper.hardClipToRegion( reads, activeRegionLoc.getStart(), activeRegionLoc.getStop() ); + reads.clear(); + reads.addAll(clippedReads); + } public ArrayList getReads() { return reads; } - public byte[] getReference( final IndexedFastaSequenceFile referenceReader ) { - return getReference( referenceReader, 0 ); + public byte[] getActiveRegionReference( final IndexedFastaSequenceFile referenceReader ) { + return getActiveRegionReference(referenceReader, 0); } - public byte[] getReference( final IndexedFastaSequenceFile referenceReader, final int padding ) { + public byte[] getActiveRegionReference( final IndexedFastaSequenceFile referenceReader, final int padding ) { + return referenceReader.getSubsequenceAt( activeRegionLoc.getContig(), + Math.max(1, activeRegionLoc.getStart() - padding), + Math.min(referenceReader.getSequenceDictionary().getSequence(activeRegionLoc.getContig()).getSequenceLength(), activeRegionLoc.getStop() + padding) ).getBases(); + } + + public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader ) { + return getFullReference(referenceReader, 0); + } + + public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader, final int padding ) { return referenceReader.getSubsequenceAt( fullExtentReferenceLoc.getContig(), Math.max(1, fullExtentReferenceLoc.getStart() - padding), Math.min(referenceReader.getSequenceDictionary().getSequence(fullExtentReferenceLoc.getContig()).getSequenceLength(), fullExtentReferenceLoc.getStop() + padding) ).getBases(); diff --git a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java index 7a664bd61..1eab43256 100644 --- a/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java +++ b/public/java/src/org/broadinstitute/sting/utils/clipping/ReadClipper.java @@ -312,6 +312,42 @@ public class ReadClipper { } + /** + * Hard clip the read to the variable region (from refStart to refStop) + * + * @param read the read to be clipped + * @param refStart the beginning of the variant region (inclusive) + * @param refStop the end of the variant region (inclusive) + * @return the read hard clipped to the variant region + */ + public static GATKSAMRecord hardClipToRegion( final GATKSAMRecord read, final int refStart, final int refStop ) { + final int start = read.getAlignmentStart(); + final int stop = read.getAlignmentEnd(); + + // check if the read is contained in region + if (start <= refStop && stop >= refStart) { + if (start < refStart && stop > refStop) + return hardClipBothEndsByReferenceCoordinates(read, refStart - 1, refStop + 1); + else if (start < refStart) + return hardClipByReferenceCoordinatesLeftTail(read, refStart - 1); + else if (stop > refStop) + return hardClipByReferenceCoordinatesRightTail(read, refStop + 1); + return read; + } else + return GATKSAMRecord.emptyRead(read); + + } + public static ArrayList hardClipToRegion( final ArrayList reads, final int refStart, final int refStop ) { + final ArrayList returnList = new ArrayList( reads.size() ); + for( final GATKSAMRecord read : reads ) { + final GATKSAMRecord clippedRead = hardClipToRegion( read, refStart, refStop ); + if( !clippedRead.isEmpty() ) { + returnList.add( clippedRead ); + } + } + return returnList; + } + /** * Checks if a read contains adaptor sequences. If it does, hard clips them out. *