Merge pull request #1111 from jsilter/overclippedreadfilter_endsoption
Add additional option to OverclippedReadFilter
This commit is contained in:
commit
69fd4af15a
|
|
@ -40,8 +40,8 @@ import org.broadinstitute.gatk.utils.exceptions.UserException;
|
|||
* This filter is intended to filter out reads that are potentially from foreign organisms.
|
||||
* From experience with sequencing of human DNA we have found cases of contamination by bacterial
|
||||
* organisms; the symptoms of such contamination are a class of reads with only a small number
|
||||
* of aligned bases and additionally many soft-clipped bases on both ends. This filter is intended
|
||||
* to remove such reads.
|
||||
* of aligned bases and additionally many soft-clipped bases. This filter is intended
|
||||
* to remove such reads. Consecutive soft-clipped blocks are treated as a single block
|
||||
* </p>
|
||||
*
|
||||
*/
|
||||
|
|
@ -50,25 +50,31 @@ public class OverclippedReadFilter extends ReadFilter {
|
|||
@Argument(fullName = "filter_is_too_short_value", shortName = "filterTooShort", doc = "Value for which reads with less than this number of aligned bases is considered too short", required = false)
|
||||
int tooShort = 30;
|
||||
|
||||
@Argument(fullName = "do_not_require_softclips_both_ends", shortName = "NoRequireSCBothEnds", doc = "Allow a read to be filtered out based on having only 1 soft-clipped block. By default, both ends must have a soft-clipped block, setting this flag requires only 1 soft-clipped block.", required = false)
|
||||
Boolean doNotRequireSoftclipsOnBothEnds = false;
|
||||
|
||||
|
||||
public boolean filterOut(final SAMRecord read) {
|
||||
boolean sawLeadingSoftclip = false;
|
||||
boolean sawAlignedBase = false;
|
||||
int alignedLength = 0;
|
||||
int softClipBlocks = 0;
|
||||
int minSoftClipBlocks = doNotRequireSoftclipsOnBothEnds ? 1 : 2;
|
||||
CigarOperator lastOperator = null;
|
||||
|
||||
for ( final CigarElement element : read.getCigar().getCigarElements() ) {
|
||||
if ( element.getOperator() == CigarOperator.S ) {
|
||||
if ( sawAlignedBase ) // if this is true then we must also have seen a leading soft-clip
|
||||
return (alignedLength < tooShort);
|
||||
sawLeadingSoftclip = true;
|
||||
//Treat consecutive S blocks as a single one
|
||||
if(lastOperator != CigarOperator.S){
|
||||
softClipBlocks += 1;
|
||||
}
|
||||
|
||||
} else if ( element.getOperator().consumesReadBases() ) { // M, I, X, and EQ (S was already accounted for above)
|
||||
if ( !sawLeadingSoftclip )
|
||||
return false;
|
||||
sawAlignedBase = true;
|
||||
alignedLength += element.getLength();
|
||||
}
|
||||
lastOperator = element.getOperator();
|
||||
}
|
||||
|
||||
return false;
|
||||
return(alignedLength < tooShort && softClipBlocks >= minSoftClipBlocks);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -42,11 +42,12 @@ import java.util.*;
|
|||
public class OverclippedReadFilterUnitTest extends ReadFilterTest {
|
||||
|
||||
@Test(enabled = true, dataProvider= "OverclippedDataProvider")
|
||||
public void testOverclippedFilter(final String cigarString, final boolean expectedResult) {
|
||||
public void testOverclippedFilter(final String cigarString, boolean doNotRequireSoftclipsOnBothEnds, final boolean expectedResult) {
|
||||
|
||||
final OverclippedReadFilter filter = new OverclippedReadFilter();
|
||||
final SAMRecord read = buildSAMRecord(cigarString);
|
||||
Assert.assertEquals(filter.filterOut(read), expectedResult, cigarString);
|
||||
final OverclippedReadFilter filter = new OverclippedReadFilter();
|
||||
filter.doNotRequireSoftclipsOnBothEnds = doNotRequireSoftclipsOnBothEnds;
|
||||
final SAMRecord read = buildSAMRecord(cigarString);
|
||||
Assert.assertEquals(filter.filterOut(read), expectedResult, cigarString);
|
||||
}
|
||||
|
||||
private SAMRecord buildSAMRecord(final String cigarString) {
|
||||
|
|
@ -58,20 +59,47 @@ public class OverclippedReadFilterUnitTest extends ReadFilterTest {
|
|||
public Iterator<Object[]> overclippedDataProvider() {
|
||||
final List<Object[]> result = new LinkedList<Object[]>();
|
||||
|
||||
result.add(new Object[] { "1S10M1S", true });
|
||||
result.add(new Object[] { "1S10X1S", true });
|
||||
result.add(new Object[] { "1H1S10M1S1H", true });
|
||||
result.add(new Object[] { "1S40M1S", false });
|
||||
result.add(new Object[] { "1S40X1S", false });
|
||||
result.add(new Object[] { "1H10M1S", false });
|
||||
result.add(new Object[] { "1S10M1H", false });
|
||||
result.add(new Object[] { "10M1S", false });
|
||||
result.add(new Object[] { "1S10M", false });
|
||||
result.add(new Object[] { "1S10M10D10M1S", true });
|
||||
result.add(new Object[] { "1S1M40I1S", false });
|
||||
result.add(new Object[] { "1S10I1S", true });
|
||||
result.add(new Object[] { "1S40I1S", false });
|
||||
result.add(new Object[] { "1S10M1S", false, true });
|
||||
result.add(new Object[] { "1S10X1S", false, true });
|
||||
result.add(new Object[] { "1H1S10M1S1H", false, true });
|
||||
result.add(new Object[] { "1S40M1S", false, false});
|
||||
result.add(new Object[] { "1S40X1S", false, false });
|
||||
result.add(new Object[] { "1H10M1S", false, false});
|
||||
result.add(new Object[] { "1S10M1H", false, false});
|
||||
|
||||
result.add(new Object[] { "10M1S", false, false});
|
||||
result.add(new Object[] { "1S10M", false, false});
|
||||
|
||||
result.add(new Object[] { "10M1S", true, true});
|
||||
result.add(new Object[] { "1S10M", true, true});
|
||||
|
||||
result.add(new Object[] { "1S10M10D10M1S", false, true });
|
||||
result.add(new Object[] { "1S1M40I1S", false, false });
|
||||
|
||||
result.add(new Object[] { "1S10I1S", false, true });
|
||||
result.add(new Object[] { "1S40I1S", false, false });
|
||||
result.add(new Object[] { "1S40I1S", true, false });
|
||||
|
||||
result.add(new Object[] { "25S40I25M", true, false });
|
||||
|
||||
//Read is too short once soft-clipping removed
|
||||
result.add(new Object[] { "25S25M", true, true });
|
||||
result.add(new Object[] { "25S25X", true, true });
|
||||
result.add(new Object[] { "25S25H", true, true });
|
||||
result.add(new Object[] { "25S25H", false, false });
|
||||
|
||||
result.add(new Object[] { "25S25M25S", false, true });
|
||||
result.add(new Object[] { "25M25S", true, true });
|
||||
result.add(new Object[] { "25S25M", true, true });
|
||||
|
||||
result.add(new Object[] { "25S35S", true, true });
|
||||
|
||||
//Read long enough even with soft clipping removed
|
||||
result.add(new Object[] { "25S35M25S", true, false });
|
||||
result.add(new Object[] { "35M25S", true, false });
|
||||
result.add(new Object[] { "25S35M", true, false });
|
||||
|
||||
return result.iterator();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue