Merge pull request #242 from broadinstitute/vrr_N_cigar_error_and_override_option
Reads with N operator in CIGAR now result in a User exception. Option to filter them out is provided.
This commit is contained in:
commit
2a935374f3
|
|
@ -288,9 +288,10 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
|
||||
@Test
|
||||
public void testNsInCigar() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
final WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "testWithNs.bam -o %s -L 8:141813600-141813700 -out_mode EMIT_ALL_SITES", 1,
|
||||
Arrays.asList("2ae3fd39c53a6954d32faed8703adfe8"));
|
||||
UserException.UnsupportedCigarOperatorException.class);
|
||||
|
||||
executeTest("test calling on reads with Ns in CIGAR", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -36,6 +36,8 @@ public class ValidationExclusion {
|
|||
// our validation options
|
||||
|
||||
public enum TYPE {
|
||||
ALLOW_N_CIGAR_READS, // ignore the presence of N operators in CIGARs: do not blow up and process reads that contain one or more N operators.
|
||||
// This exclusion does not have effect on reads that get filtered {@see MalformedReadFilter}.
|
||||
ALLOW_UNINDEXED_BAM, // allow bam files that do not have an index; we'll traverse them using monolithic shard
|
||||
ALLOW_UNSET_BAM_SORT_ORDER, // assume that the bam is sorted, even if the SO (sort-order) flag is not set
|
||||
NO_READ_ORDER_VERIFICATION, // do not validate that the reads are in order as we take them from the bam file
|
||||
|
|
|
|||
|
|
@ -25,14 +25,16 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.filters;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import net.sf.samtools.SAMTagUtil;
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.util.Collections;
|
||||
|
||||
/**
|
||||
* Filter out malformed reads.
|
||||
*
|
||||
|
|
@ -40,20 +42,46 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
|||
* @version 0.1
|
||||
*/
|
||||
public class MalformedReadFilter extends ReadFilter {
|
||||
|
||||
|
||||
private static final String FILTER_READS_WITH_N_CIGAR_ARGUMENT_FULL_NAME = "filter_reads_with_N_cigar" ;
|
||||
|
||||
private SAMFileHeader header;
|
||||
|
||||
@Argument(fullName = FILTER_READS_WITH_N_CIGAR_ARGUMENT_FULL_NAME, shortName = "filterRNC", doc = "filter out reads with CIGAR containing the N operator, instead of stop processing and report an error.", required = false)
|
||||
boolean filterReadsWithNCigar = false;
|
||||
|
||||
|
||||
@Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up.", required = false)
|
||||
boolean filterMismatchingBaseAndQuals = false;
|
||||
|
||||
@Argument(fullName = "filter_bases_not_stored", shortName = "filterNoBases", doc = "if a read has no stored bases (i.e. a '*'), filter out the read instead of blowing up.", required = false)
|
||||
boolean filterBasesNotStored = false;
|
||||
|
||||
/**
|
||||
* Indicates the applicable validation exclusions
|
||||
*/
|
||||
private boolean allowNCigars;
|
||||
|
||||
@Override
|
||||
public void initialize(GenomeAnalysisEngine engine) {
|
||||
this.header = engine.getSAMFileHeader();
|
||||
public void initialize(final GenomeAnalysisEngine engine) {
|
||||
header = engine.getSAMFileHeader();
|
||||
ValidationExclusion validationExclusions = null;
|
||||
final SAMDataSource rds = engine.getReadsDataSource();
|
||||
if (rds != null) {
|
||||
final ReadProperties rps = rds.getReadsInfo();
|
||||
if (rps != null) {
|
||||
validationExclusions = rps.getValidationExclusionList();
|
||||
}
|
||||
}
|
||||
if (validationExclusions == null) {
|
||||
allowNCigars = false;
|
||||
} else {
|
||||
allowNCigars = validationExclusions.contains(ValidationExclusion.TYPE.ALLOW_N_CIGAR_READS);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean filterOut(SAMRecord read) {
|
||||
public boolean filterOut(final SAMRecord read) {
|
||||
// slowly changing the behavior to blow up first and filtering out if a parameter is explicitly provided
|
||||
return !checkInvalidAlignmentStart(read) ||
|
||||
!checkInvalidAlignmentEnd(read) ||
|
||||
|
|
@ -61,7 +89,8 @@ public class MalformedReadFilter extends ReadFilter {
|
|||
!checkHasReadGroup(read) ||
|
||||
!checkMismatchingBasesAndQuals(read, filterMismatchingBaseAndQuals) ||
|
||||
!checkCigarDisagreesWithAlignment(read) ||
|
||||
!checkSeqStored(read, filterBasesNotStored);
|
||||
!checkSeqStored(read, filterBasesNotStored) ||
|
||||
!checkCigarIsSupported(read,filterReadsWithNCigar,allowNCigars);
|
||||
}
|
||||
|
||||
private static boolean checkHasReadGroup(final SAMRecord read) {
|
||||
|
|
@ -80,7 +109,7 @@ public class MalformedReadFilter extends ReadFilter {
|
|||
* @param read The read to validate.
|
||||
* @return true if read start is valid, false otherwise.
|
||||
*/
|
||||
private static boolean checkInvalidAlignmentStart( SAMRecord read ) {
|
||||
private static boolean checkInvalidAlignmentStart(final SAMRecord read ) {
|
||||
// read is not flagged as 'unmapped', but alignment start is NO_ALIGNMENT_START
|
||||
if( !read.getReadUnmappedFlag() && read.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START )
|
||||
return false;
|
||||
|
|
@ -95,7 +124,7 @@ public class MalformedReadFilter extends ReadFilter {
|
|||
* @param read The read to validate.
|
||||
* @return true if read end is valid, false otherwise.
|
||||
*/
|
||||
private static boolean checkInvalidAlignmentEnd( SAMRecord read ) {
|
||||
private static boolean checkInvalidAlignmentEnd(final SAMRecord read ) {
|
||||
// Alignment aligns to negative number of bases in the reference.
|
||||
if( !read.getReadUnmappedFlag() && read.getAlignmentEnd() != -1 && (read.getAlignmentEnd()-read.getAlignmentStart()+1)<0 )
|
||||
return false;
|
||||
|
|
@ -108,11 +137,11 @@ public class MalformedReadFilter extends ReadFilter {
|
|||
* @param read The read to verify.
|
||||
* @return true if alignment agrees with header, false othrewise.
|
||||
*/
|
||||
private static boolean checkAlignmentDisagreesWithHeader( SAMFileHeader header, SAMRecord read ) {
|
||||
private static boolean checkAlignmentDisagreesWithHeader(final SAMFileHeader header, final SAMRecord read ) {
|
||||
// Read is aligned to nonexistent contig
|
||||
if( read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && read.getAlignmentStart() != SAMRecord.NO_ALIGNMENT_START )
|
||||
return false;
|
||||
SAMSequenceRecord contigHeader = header.getSequence( read.getReferenceIndex() );
|
||||
final SAMSequenceRecord contigHeader = header.getSequence( read.getReferenceIndex() );
|
||||
// Read is aligned to a point after the end of the contig
|
||||
if( !read.getReadUnmappedFlag() && read.getAlignmentStart() > contigHeader.getSequenceLength() )
|
||||
return false;
|
||||
|
|
@ -124,7 +153,7 @@ public class MalformedReadFilter extends ReadFilter {
|
|||
* @param read The read to validate.
|
||||
* @return true if cigar agrees with alignment, false otherwise.
|
||||
*/
|
||||
private static boolean checkCigarDisagreesWithAlignment(SAMRecord read) {
|
||||
private static boolean checkCigarDisagreesWithAlignment(final SAMRecord read) {
|
||||
// Read has a valid alignment start, but the CIGAR string is empty
|
||||
if( !read.getReadUnmappedFlag() &&
|
||||
read.getAlignmentStart() != -1 &&
|
||||
|
|
@ -134,13 +163,72 @@ public class MalformedReadFilter extends ReadFilter {
|
|||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check for unsupported CIGAR operators.
|
||||
* Currently the N operator is not supported.
|
||||
* @param read The read to validate.
|
||||
* @param filterReadsWithNCigar whether the offending read should just
|
||||
* be silently filtered or not.
|
||||
* @param allowNCigars whether reads that contain N operators in their CIGARs
|
||||
* can be processed or an exception should be thrown instead.
|
||||
* @throws UserException.UnsupportedCigarOperatorException
|
||||
* if {@link #filterReadsWithNCigar} is <code>false</code> and
|
||||
* the input read has some unsupported operation.
|
||||
* @return <code>true</code> if the read CIGAR operations are
|
||||
* fully supported, otherwise <code>false</code>, as long as
|
||||
* no exception has been thrown.
|
||||
*/
|
||||
private static boolean checkCigarIsSupported(final SAMRecord read, final boolean filterReadsWithNCigar, final boolean allowNCigars) {
|
||||
if( containsNOperator(read)) {
|
||||
if (! filterReadsWithNCigar && !allowNCigars) {
|
||||
throw new UserException.UnsupportedCigarOperatorException(
|
||||
CigarOperator.N,read,
|
||||
"Perhaps you are"
|
||||
+ " trying to use RNA-Seq data?"
|
||||
+ " While we are currently actively working to"
|
||||
+ " support this data type unfortunately the"
|
||||
+ " GATK cannot be used with this data in its"
|
||||
+ " current form. You have the option of either"
|
||||
+ " filtering out all reads with operator "
|
||||
+ CigarOperator.N + " in their CIGAR string"
|
||||
+ " (please add --"
|
||||
+ FILTER_READS_WITH_N_CIGAR_ARGUMENT_FULL_NAME
|
||||
+ " to your command line) or"
|
||||
+ " assume the risk of processing those reads as they"
|
||||
+ " are including the pertinent unsafe flag (please add -U"
|
||||
+ ' ' + ValidationExclusion.TYPE.ALLOW_N_CIGAR_READS
|
||||
+ " to your command line). Notice however that if you were"
|
||||
+ " to choose the latter, an unspecified subset of the"
|
||||
+ " analytical outputs of an unspecified subset of the tools"
|
||||
+ " will become unpredictable. Consequently the GATK team"
|
||||
+ " might well not be able to provide you with the usual support"
|
||||
+ " with any issue regarding any output");
|
||||
}
|
||||
return ! filterReadsWithNCigar;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private static boolean containsNOperator(final SAMRecord read) {
|
||||
final Cigar cigar = read.getCigar();
|
||||
if (cigar == null) {
|
||||
return false;
|
||||
}
|
||||
for (final CigarElement ce : cigar.getCigarElements()) {
|
||||
if (ce.getOperator() == CigarOperator.N) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the read has the same number of bases and base qualities
|
||||
* @param read the read to validate
|
||||
* @return true if they have the same number. False otherwise.
|
||||
*/
|
||||
private static boolean checkMismatchingBasesAndQuals(SAMRecord read, boolean filterMismatchingBaseAndQuals) {
|
||||
boolean result;
|
||||
private static boolean checkMismatchingBasesAndQuals(final SAMRecord read, final boolean filterMismatchingBaseAndQuals) {
|
||||
final boolean result;
|
||||
if (read.getReadLength() == read.getBaseQualities().length)
|
||||
result = true;
|
||||
else if (filterMismatchingBaseAndQuals)
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.exceptions;
|
||||
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
|
|
@ -87,6 +88,19 @@ public class UserException extends ReviewedStingException {
|
|||
}
|
||||
}
|
||||
|
||||
public static class UnsupportedCigarOperatorException extends UserException {
|
||||
public UnsupportedCigarOperatorException(final CigarOperator co, final SAMRecord read, final String message) {
|
||||
super(String.format(
|
||||
"Unsupported CIGAR operator %s in read %s at %s:%d. %s",
|
||||
co,
|
||||
read.getReadName(),
|
||||
read.getReferenceName(),
|
||||
read.getAlignmentStart(),
|
||||
message));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static class MalformedGenomeLoc extends UserException {
|
||||
public MalformedGenomeLoc(String message, GenomeLoc loc) {
|
||||
super(String.format("Badly formed genome loc: %s: %s", message, loc));
|
||||
|
|
|
|||
|
|
@ -131,6 +131,7 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
|
|||
final String root = "-T ErrorThrowing -R " + exampleFASTA;
|
||||
final String args = root + cfg.args + " -E " + cfg.expectedException.getSimpleName();
|
||||
WalkerTestSpec spec = new WalkerTestSpec(args, 0, cfg.expectedException);
|
||||
|
||||
executeTest(cfg.toString(), spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,77 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.filters;
|
||||
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Collections;
|
||||
|
||||
|
||||
/**
|
||||
* Tests for the {@link MalformedReadFilter} when the unsafe flag
|
||||
* {@link ValidationExclusion.TYPE#ALLOW_N_CIGAR_READS} is set.
|
||||
*
|
||||
* @author Valentin Ruano-Rubio
|
||||
* @since 6/6/13
|
||||
*/
|
||||
public class AllowNCigarMalformedReadFilterUnitTest extends MalformedReadFilterUnitTest {
|
||||
|
||||
|
||||
@Override
|
||||
protected ValidationExclusion composeValidationExclusion() {
|
||||
return new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALLOW_N_CIGAR_READS));
|
||||
}
|
||||
|
||||
|
||||
@Test(enabled = true,
|
||||
dataProvider= "UnsupportedCigarOperatorDataProvider")
|
||||
@CigarOperatorTest(CigarOperatorTest.Outcome.IGNORE)
|
||||
public void testCigarNOperatorFilterIgnore(final String cigarString) {
|
||||
|
||||
final MalformedReadFilter filter = buildMalformedReadFilter(false);
|
||||
final SAMRecord nContainingCigarRead = buildSAMRecord(cigarString);
|
||||
Assert.assertFalse(filter.filterOut(nContainingCigarRead),
|
||||
"filters out N containing Cigar when it should ignore the fact");
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
@Override
|
||||
public void testCigarNOperatorFilterException(final String cigarString) {
|
||||
// Nothing to do here.
|
||||
// Just deactivates the parents test case.
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -25,11 +25,25 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.filters;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.TextCigarCodec;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException.UnsupportedCigarOperatorException;
|
||||
|
||||
import java.lang.annotation.*;
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -38,14 +52,14 @@ import org.testng.annotations.Test;
|
|||
* @author Eric Banks
|
||||
* @since 3/14/13
|
||||
*/
|
||||
public class MalformedReadFilterUnitTest {
|
||||
public class MalformedReadFilterUnitTest extends ReadFilterTest {
|
||||
|
||||
//////////////////////////////////////
|
||||
// Test the checkSeqStored() method //
|
||||
//////////////////////////////////////
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testcheckSeqStored () {
|
||||
public void testCheckSeqStored () {
|
||||
|
||||
final GATKSAMRecord goodRead = ArtificialSAMUtils.createArtificialRead(new byte[]{(byte)'A'}, new byte[]{(byte)'A'}, "1M");
|
||||
final GATKSAMRecord badRead = ArtificialSAMUtils.createArtificialRead(new byte[]{}, new byte[]{}, "1M");
|
||||
|
|
@ -59,4 +73,174 @@ public class MalformedReadFilterUnitTest {
|
|||
Assert.assertTrue(false, "We should have exceptioned out in the previous line");
|
||||
} catch (UserException e) { }
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider= "UnsupportedCigarOperatorDataProvider")
|
||||
@CigarOperatorTest(CigarOperatorTest.Outcome.FILTER)
|
||||
public void testCigarNOperatorFilterTruePositive(String cigarString) {
|
||||
|
||||
final MalformedReadFilter filter = buildMalformedReadFilter(true);
|
||||
final SAMRecord nContainingCigarRead = buildSAMRecord(cigarString);
|
||||
Assert.assertTrue(filter.filterOut(nContainingCigarRead),
|
||||
" Did not filtered out a N containing CIGAR read");
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider= "UnsupportedCigarOperatorDataProvider")
|
||||
@CigarOperatorTest(CigarOperatorTest.Outcome.ACCEPT)
|
||||
public void testCigarNOperatorFilterTrueNegative(String cigarString) {
|
||||
|
||||
final MalformedReadFilter filter = buildMalformedReadFilter(true);
|
||||
final SAMRecord nonNContainingCigarRead = buildSAMRecord(cigarString);
|
||||
Assert.assertFalse(filter.filterOut(nonNContainingCigarRead),
|
||||
" Filtered out a non-N containing CIGAR read");
|
||||
}
|
||||
|
||||
@Test(enabled = true,
|
||||
expectedExceptions = UnsupportedCigarOperatorException.class,
|
||||
dataProvider= "UnsupportedCigarOperatorDataProvider")
|
||||
@CigarOperatorTest(CigarOperatorTest.Outcome.EXCEPTION)
|
||||
public void testCigarNOperatorFilterException(final String cigarString) {
|
||||
|
||||
final MalformedReadFilter filter = buildMalformedReadFilter(false);
|
||||
final SAMRecord nContainingCigarRead = buildSAMRecord(cigarString);
|
||||
|
||||
filter.filterOut(nContainingCigarRead);
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider="UnsupportedCigarOperatorDataProvider")
|
||||
@CigarOperatorTest(CigarOperatorTest.Outcome.ACCEPT)
|
||||
public void testCigarNOperatorFilterControl(final String cigarString) {
|
||||
|
||||
final MalformedReadFilter filter = buildMalformedReadFilter(false);
|
||||
final SAMRecord nonNContainingCigarRead = buildSAMRecord(cigarString);
|
||||
|
||||
Assert.assertFalse(filter.filterOut(nonNContainingCigarRead));
|
||||
}
|
||||
|
||||
protected SAMRecord buildSAMRecord(final String cigarString) {
|
||||
final Cigar nContainingCigar = TextCigarCodec.getSingleton().decode(cigarString);
|
||||
return this.createRead(nContainingCigar, 1, 0, 10);
|
||||
}
|
||||
|
||||
protected MalformedReadFilter buildMalformedReadFilter(final boolean filterRNO) {
|
||||
return buildMalformedReadFiter(filterRNO,new ValidationExclusion.TYPE[] {});
|
||||
}
|
||||
|
||||
protected MalformedReadFilter buildMalformedReadFiter(boolean filterRNO, final ValidationExclusion.TYPE... excl) {
|
||||
final ValidationExclusion ve = new ValidationExclusion(Arrays.asList(excl));
|
||||
|
||||
final MalformedReadFilter filter = new MalformedReadFilter();
|
||||
|
||||
final SAMFileHeader h = getHeader();
|
||||
final SAMDataSource ds = getDataSource();
|
||||
|
||||
final GenomeAnalysisEngine gae = new GenomeAnalysisEngine() {
|
||||
@Override
|
||||
public SAMFileHeader getSAMFileHeader() {
|
||||
return h;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SAMDataSource getReadsDataSource() {
|
||||
return ds;
|
||||
}
|
||||
};
|
||||
filter.initialize(gae);
|
||||
filter.filterReadsWithNCigar = filterRNO;
|
||||
return filter;
|
||||
}
|
||||
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
@Target(ElementType.METHOD)
|
||||
@Inherited
|
||||
protected @interface CigarOperatorTest {
|
||||
|
||||
enum Outcome {
|
||||
ANY,ACCEPT,FILTER,EXCEPTION,IGNORE;
|
||||
|
||||
public boolean appliesTo (String cigar) {
|
||||
boolean hasN = cigar.indexOf('N') != -1;
|
||||
switch (this) {
|
||||
case ANY: return true;
|
||||
case ACCEPT: return !hasN;
|
||||
case IGNORE: return hasN;
|
||||
case FILTER:
|
||||
case EXCEPTION:
|
||||
default:
|
||||
return hasN;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Outcome value() default Outcome.ANY;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cigar test data for unsupported operator test.
|
||||
* Each element of this array corresponds to a test case. In turn the first element of the test case array is the
|
||||
* Cigar string for that test case and the second indicates whether it should be filtered due to the presence of a
|
||||
* unsupported operator
|
||||
*/
|
||||
private static final String[] TEST_CIGARS = {
|
||||
"101M10D20I10M",
|
||||
"6M14N5M",
|
||||
"1N",
|
||||
"101M",
|
||||
"110N",
|
||||
"2N4M",
|
||||
"4M2N",
|
||||
"3M1I1M",
|
||||
"1M2I2M",
|
||||
"1M10N1I1M",
|
||||
"1M1I1D",
|
||||
"11N12M1I34M12N"
|
||||
};
|
||||
|
||||
@DataProvider(name= "UnsupportedCigarOperatorDataProvider")
|
||||
public Iterator<Object[]> unsupportedOperatorDataProvider(final Method testMethod) {
|
||||
final CigarOperatorTest a = resolveCigarOperatorTestAnnotation(testMethod);
|
||||
final List<Object[]> result = new LinkedList<Object[]>();
|
||||
for (final String cigarString : TEST_CIGARS) {
|
||||
if (a == null || a.value().appliesTo(cigarString)) {
|
||||
result.add(new Object[] { cigarString });
|
||||
}
|
||||
}
|
||||
return result.iterator();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the most specific {@link CigarOperatorTest} annotation for the
|
||||
* signature of the test method provided.
|
||||
* <p/>
|
||||
* This in-house implementation is required due to the fact that method
|
||||
* annotations do not have inheritance.
|
||||
*
|
||||
* @param m targeted test method.
|
||||
* @return <code>null</code> if there is no {@link CigarOperatorTest}
|
||||
* annotation in this or overridden methods.
|
||||
*/
|
||||
private CigarOperatorTest resolveCigarOperatorTestAnnotation(final Method m) {
|
||||
CigarOperatorTest res = m.getAnnotation(CigarOperatorTest.class);
|
||||
if (res != null) {
|
||||
return res;
|
||||
}
|
||||
Class<?> c = this.getClass();
|
||||
Class<?> p = c.getSuperclass();
|
||||
while (p != null && p != Object.class) {
|
||||
try {
|
||||
final Method met = p.getDeclaredMethod(m.getName(),
|
||||
m.getParameterTypes());
|
||||
res = met.getAnnotation(CigarOperatorTest.class);
|
||||
if (res != null) {
|
||||
break;
|
||||
}
|
||||
} catch (NoSuchMethodException e) {
|
||||
// Its ok; nothing to do here, just keep looking.
|
||||
}
|
||||
c = p;
|
||||
p = c.getSuperclass();
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,370 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.filters;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
|
||||
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.testng.annotations.AfterClass;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Class ReadBaseTest
|
||||
* <p/>
|
||||
* This is the base test class for read filter test classes. All read
|
||||
* filter test cases should extend from this
|
||||
* class; it sets ups a header mock up to test read filtering.
|
||||
*
|
||||
* Feel free to override non-final method to modify the behavior
|
||||
* (i.e. change how read group id are formatted, or complete a header).
|
||||
*
|
||||
* <p/>
|
||||
* You can statically determine the number of read-group involved
|
||||
* in the test by calling {@link #ReadFilterTest(int)} in you constructor.
|
||||
* <p/>
|
||||
*
|
||||
* Notice that the same header object is shared by all test and
|
||||
* it is initialized by Junit (calling {@link #beforeClass()}.
|
||||
*
|
||||
* @author Valentin Ruano Rubio
|
||||
* @date May 23, 2013
|
||||
*/
|
||||
public class ReadFilterTest extends BaseTest {
|
||||
|
||||
private static final int DEFAULT_READ_GROUP_COUNT = 5;
|
||||
private static final int DEFAULT_READER_COUNT = 1;
|
||||
private static final String DEFAULT_READ_GROUP_PREFIX = "ReadGroup";
|
||||
private static final String DEFAULT_PLATFORM_UNIT_PREFIX = "Lane";
|
||||
private static final String DEFAULT_SAMPLE_NAME_PREFIX = "Sample";
|
||||
private static final String DEFAULT_PLATFORM_PREFIX = "Platform";
|
||||
private static final int DEFAULT_CHROMOSOME_COUNT = 1;
|
||||
private static final int DEFAULT_CHROMOSOME_START_INDEX = 1;
|
||||
private static final int DEFAULT_CHROMOSOME_SIZE = 1000;
|
||||
private static final String DEFAULT_SAM_FILE_FORMAT = "readfile-%3d.bam";
|
||||
|
||||
private final int groupCount;
|
||||
|
||||
private SAMFileHeader header;
|
||||
|
||||
private SAMDataSource dataSource;
|
||||
|
||||
/**
|
||||
* Constructs a new read-filter test providing the number of read
|
||||
* groups in the file.
|
||||
*
|
||||
* @param groupCount number of read-group in the fictional SAM file,
|
||||
* must be equal or greater than 1.
|
||||
*/
|
||||
protected ReadFilterTest(final int groupCount) {
|
||||
if (groupCount < 1) {
|
||||
throw new IllegalArgumentException(
|
||||
"the read group count must at least be 1");
|
||||
}
|
||||
this.groupCount = groupCount;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the data source.
|
||||
*
|
||||
* @throws IllegalStateException if the data source was not initialized
|
||||
* invoking {@link #beforeClass()}
|
||||
* @return never <code>null</code>
|
||||
*/
|
||||
protected final SAMDataSource getDataSource() {
|
||||
checkDataSourceExists();
|
||||
return dataSource;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the mock-up SAM file header for testing.
|
||||
*
|
||||
* @throws IllegalStateException if the header was not initialized
|
||||
* invoking {@link #beforeClass()}
|
||||
* @return never <code>null</code>
|
||||
*/
|
||||
protected final SAMFileHeader getHeader() {
|
||||
checkHeaderExists();
|
||||
return header;
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a read filter test with the default number of groups
|
||||
* ({@link #DEFAULT_READ_GROUP_COUNT}.
|
||||
*/
|
||||
public ReadFilterTest() {
|
||||
this(DEFAULT_READ_GROUP_COUNT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the number of read groups involved in the test
|
||||
* @return <code>1</code> or greater.
|
||||
*/
|
||||
protected final int getReadGroupCount() {
|
||||
return groupCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Composes the Id for the read group given its index.
|
||||
*
|
||||
* This methods must return a unique distinct ID for each possible index and
|
||||
* it must be the same value each time it is invoked.
|
||||
*
|
||||
* @param index the index of the targeted read group in the range
|
||||
* [1,{@link #getReadGroupCount()}]
|
||||
* @return never <code>null</code> and must be unique to each possible
|
||||
* read group index.
|
||||
*/
|
||||
protected String composeReadGroupId(final int index) {
|
||||
checkReadGroupIndex(index);
|
||||
return DEFAULT_READ_GROUP_PREFIX + index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Composes the Platform name for the read group given its index.
|
||||
*
|
||||
* This method must always return the same value give an index.
|
||||
*
|
||||
* @param index the index of the targeted read group in the range
|
||||
* [1,{@link #getReadGroupCount()}]
|
||||
* @return never <code>null</code>.
|
||||
*/
|
||||
protected String composePlatformName(final int index) {
|
||||
checkReadGroupIndex(index);
|
||||
return DEFAULT_PLATFORM_PREFIX + (((index-1)%2)+1);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Composes the Platform unit name for the read group given its index.
|
||||
*
|
||||
* @param index the index of the targeted read group in the range
|
||||
* [1,{@link #getReadGroupCount()}]
|
||||
* @return never <code>null</code>.
|
||||
*/
|
||||
protected String composePlatformUnitName(final int index) {
|
||||
checkReadGroupIndex(index);
|
||||
return DEFAULT_PLATFORM_UNIT_PREFIX + (((index-1)%3)+1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Checks the correctness of a given read group index.
|
||||
*
|
||||
* A correct index is any value in the range [1,{@link #getReadGroupCount()}].
|
||||
*
|
||||
* @param index the target index.
|
||||
* @throws IllegalArgumentException if the input index is not correct.
|
||||
*/
|
||||
protected final void checkReadGroupIndex(final int index) {
|
||||
checkIndex(index,groupCount,"read group");
|
||||
}
|
||||
|
||||
|
||||
private void checkIndex(final int index, final int max, CharSequence name) {
|
||||
if (index < 1 || index > max) {
|
||||
throw new IllegalArgumentException(
|
||||
name + " index ("
|
||||
+ index
|
||||
+ ") is out of bounds [1," + max + "]");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks whether the header was initialized.
|
||||
*
|
||||
* @throws IllegalStateException if the header was not yet initialized.
|
||||
*/
|
||||
protected final void checkHeaderExists() {
|
||||
if (header == null) {
|
||||
throw new IllegalArgumentException(
|
||||
"header has not been initialized;"
|
||||
+ " beforeClass() was not invoked");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether the data source was initialized.
|
||||
*
|
||||
* @throws IllegalStateException if the data source was not yet initialized.
|
||||
*/
|
||||
protected final void checkDataSourceExists() {
|
||||
if (header == null) {
|
||||
throw new IllegalArgumentException(
|
||||
"data source has not been initialized;"
|
||||
+ " beforeClass() was not invoked");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the ID for a read group given its index.
|
||||
*
|
||||
* @param index the index of the targeted read group in the range
|
||||
* [1,{@link #getReadGroupCount()}]
|
||||
* @return never <code>null</code> and must be unique to each
|
||||
* possible read group index.
|
||||
*/
|
||||
protected final String getReadGroupId(final int index) {
|
||||
checkReadGroupIndex(index);
|
||||
return getHeader().getReadGroups().get(index - 1).getReadGroupId();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the platform name for a read group given its index.
|
||||
*
|
||||
* @param group the index of the targeted read group in the range
|
||||
* [1,{@link #getReadGroupCount()}]
|
||||
* @return never <code>null</code>.
|
||||
*/
|
||||
protected final String getPlatformName(final int group) {
|
||||
checkReadGroupIndex(group);
|
||||
return getHeader().getReadGroups().get(group - 1).getPlatform();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the platform unit for a read group given its index.
|
||||
*
|
||||
* @param group the index of the targeted read group in the range
|
||||
* [1,{@link #getReadGroupCount()}]
|
||||
* @return never <code>null</code>.
|
||||
*/
|
||||
protected final String getPlatformUnit(final int group) {
|
||||
checkReadGroupIndex(group);
|
||||
return getHeader().getReadGroups().get(group - 1).getPlatformUnit();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Composes the mock up SAM file header.
|
||||
*
|
||||
* It must return an equivalent (equal) value each time it is invoked.
|
||||
*
|
||||
* @return never <code>null</code>.
|
||||
*/
|
||||
protected SAMFileHeader composeHeader() {
|
||||
|
||||
return ArtificialSAMUtils.createArtificialSamHeader(
|
||||
DEFAULT_CHROMOSOME_COUNT, DEFAULT_CHROMOSOME_START_INDEX,
|
||||
DEFAULT_CHROMOSOME_SIZE);
|
||||
}
|
||||
|
||||
@BeforeClass
|
||||
public void beforeClass() {
|
||||
|
||||
header = composeHeader();
|
||||
dataSource = composeDataSource();
|
||||
final List<String> readGroupIDs = new ArrayList<String>();
|
||||
final List<String> sampleNames = new ArrayList<String>();
|
||||
|
||||
for (int i = 1; i <= getReadGroupCount(); i++) {
|
||||
final String readGroupId = composeReadGroupId(i);
|
||||
readGroupIDs.add(readGroupId);
|
||||
sampleNames.add(readGroupId);
|
||||
}
|
||||
|
||||
ArtificialSAMUtils.createEnumeratedReadGroups(
|
||||
header, readGroupIDs, sampleNames);
|
||||
|
||||
for (int i = 1; i <= getReadGroupCount(); i++) {
|
||||
final String readGroupId = readGroupIDs.get(i-1);
|
||||
final SAMReadGroupRecord groupRecord = header.getReadGroup(readGroupId);
|
||||
groupRecord.setAttribute("PL", composePlatformName(i));
|
||||
groupRecord.setAttribute("PU", composePlatformUnitName(i));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
protected ValidationExclusion composeValidationExclusion() {
|
||||
return new ValidationExclusion();
|
||||
}
|
||||
|
||||
protected SAMDataSource composeDataSource() {
|
||||
checkHeaderExists();
|
||||
final Set<SAMReaderID> readerIDs = new HashSet<>(1);
|
||||
final ThreadAllocation ta = new ThreadAllocation();
|
||||
final Integer numFileHandles = 1; // I believe that any value would do but need to confirm.
|
||||
final boolean useOriginalBaseQualities = true;
|
||||
final SAMFileReader.ValidationStringency strictness = SAMFileReader.ValidationStringency.LENIENT;
|
||||
final Integer readBufferSize = 1; // not relevant.
|
||||
final DownsamplingMethod downsamplingMethod = DownsamplingMethod.NONE;
|
||||
final ValidationExclusion exclusionList = composeValidationExclusion();
|
||||
final Collection<ReadFilter> supplementalFilters = Collections.EMPTY_SET;
|
||||
final boolean includeReadsWithDeletionAtLoci = true;
|
||||
|
||||
final GenomeLocParser glp = new GenomeLocParser(header.getSequenceDictionary());
|
||||
final SAMDataSource res = new SAMDataSource(
|
||||
readerIDs,
|
||||
ta,
|
||||
numFileHandles,
|
||||
glp,
|
||||
useOriginalBaseQualities,
|
||||
strictness,
|
||||
readBufferSize,
|
||||
downsamplingMethod,
|
||||
exclusionList,
|
||||
supplementalFilters,
|
||||
includeReadsWithDeletionAtLoci);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public void afterClass() {
|
||||
header = null;
|
||||
dataSource = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a read record.
|
||||
*
|
||||
* @param cigar the new record CIGAR.
|
||||
* @param group the new record group index that must be in the range \
|
||||
* [1,{@link #getReadGroupCount()}]
|
||||
* @param reference the reference sequence index (0-based)
|
||||
* @param start the start position of the read alignment in the reference
|
||||
* (1-based)
|
||||
* @return never <code>null</code>
|
||||
*/
|
||||
protected SAMRecord createRead(final Cigar cigar, final int group, final int reference, final int start) {
|
||||
final SAMRecord record = ArtificialSAMUtils.createArtificialRead(cigar);
|
||||
record.setHeader(getHeader());
|
||||
record.setAlignmentStart(start);
|
||||
record.setReferenceIndex(reference);
|
||||
record.setAttribute(SAMTag.RG.toString(), getReadGroupId(group));
|
||||
return record;
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -26,13 +26,10 @@
|
|||
package org.broadinstitute.sting.gatk.filters;
|
||||
|
||||
import org.testng.Assert;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
|
||||
|
|
@ -40,34 +37,7 @@ import java.util.List;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
|
||||
public class ReadGroupBlackListFilterUnitTest extends BaseTest {
|
||||
private static final int READ_GROUP_COUNT = 5;
|
||||
private static final String READ_GROUP_PREFIX = "ReadGroup";
|
||||
private static final String SAMPLE_NAME_PREFIX = "Sample";
|
||||
private static final String PLATFORM_PREFIX = "Platform";
|
||||
private static final String PLATFORM_UNIT_PREFIX = "Lane";
|
||||
private static SAMFileHeader header;
|
||||
|
||||
@BeforeClass
|
||||
public void beforeClass() {
|
||||
header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
|
||||
|
||||
List<String> readGroupIDs = new ArrayList<String>();
|
||||
List<String> sampleNames = new ArrayList<String>();
|
||||
|
||||
for (int i = 1; i <= READ_GROUP_COUNT; i++) {
|
||||
readGroupIDs.add(READ_GROUP_PREFIX + i);
|
||||
sampleNames.add(SAMPLE_NAME_PREFIX + i);
|
||||
}
|
||||
|
||||
ArtificialSAMUtils.createEnumeratedReadGroups(header, readGroupIDs, sampleNames);
|
||||
|
||||
for (int i = 1; i <= READ_GROUP_COUNT; i++) {
|
||||
SAMReadGroupRecord groupRecord = header.getReadGroup(READ_GROUP_PREFIX + i);
|
||||
groupRecord.setAttribute("PL", PLATFORM_PREFIX + (((i-1)%2)+1));
|
||||
groupRecord.setAttribute("PU", PLATFORM_UNIT_PREFIX + (((i-1)%3)+1));
|
||||
}
|
||||
}
|
||||
public class ReadGroupBlackListFilterUnitTest extends ReadFilterTest {
|
||||
|
||||
@Test(expectedExceptions=ReviewedStingException.class)
|
||||
public void testBadFilter() {
|
||||
|
|
@ -88,14 +58,14 @@ public class ReadGroupBlackListFilterUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testFilterReadGroup() {
|
||||
SAMRecord filteredRecord = ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, 1, 20);
|
||||
filteredRecord.setAttribute("RG", READ_GROUP_PREFIX + "1");
|
||||
SAMRecord filteredRecord = ArtificialSAMUtils.createArtificialRead(getHeader(), "readUno", 0, 1, 20);
|
||||
filteredRecord.setAttribute("RG", getReadGroupId(1));
|
||||
|
||||
SAMRecord unfilteredRecord = ArtificialSAMUtils.createArtificialRead(header, "readDos", 0, 2, 20);
|
||||
unfilteredRecord.setAttribute("RG", READ_GROUP_PREFIX + "2");
|
||||
SAMRecord unfilteredRecord = ArtificialSAMUtils.createArtificialRead(getHeader(), "readDos", 0, 2, 20);
|
||||
unfilteredRecord.setAttribute("RG", getReadGroupId(2));
|
||||
|
||||
List<String> filterList = new ArrayList<String>();
|
||||
filterList.add("RG:" + READ_GROUP_PREFIX + "1");
|
||||
filterList.add("RG:" + getReadGroupId(1));
|
||||
|
||||
ReadGroupBlackListFilter filter = new ReadGroupBlackListFilter(filterList);
|
||||
Assert.assertTrue(filter.filterOut(filteredRecord));
|
||||
|
|
@ -104,14 +74,14 @@ public class ReadGroupBlackListFilterUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testFilterPlatformUnit() {
|
||||
SAMRecord filteredRecord = ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, 1, 20);
|
||||
filteredRecord.setAttribute("RG", READ_GROUP_PREFIX + "1");
|
||||
SAMRecord filteredRecord = ArtificialSAMUtils.createArtificialRead(getHeader(), "readUno", 0, 1, 20);
|
||||
filteredRecord.setAttribute("RG", getReadGroupId(1));
|
||||
|
||||
SAMRecord unfilteredRecord = ArtificialSAMUtils.createArtificialRead(header, "readDos", 0, 2, 20);
|
||||
unfilteredRecord.setAttribute("RG", READ_GROUP_PREFIX + "2");
|
||||
SAMRecord unfilteredRecord = ArtificialSAMUtils.createArtificialRead(getHeader(), "readDos", 0, 2, 20);
|
||||
unfilteredRecord.setAttribute("RG", getReadGroupId(2));
|
||||
|
||||
List<String> filterList = new ArrayList<String>();
|
||||
filterList.add("PU:" + PLATFORM_UNIT_PREFIX + "1");
|
||||
filterList.add("PU:" + getPlatformUnit(1));
|
||||
|
||||
ReadGroupBlackListFilter filter = new ReadGroupBlackListFilter(filterList);
|
||||
Assert.assertTrue(filter.filterOut(filteredRecord));
|
||||
|
|
@ -123,18 +93,18 @@ public class ReadGroupBlackListFilterUnitTest extends BaseTest {
|
|||
int recordsPerGroup = 3;
|
||||
List<SAMRecord> records = new ArrayList<SAMRecord>();
|
||||
int alignmentStart = 0;
|
||||
for (int x = 1; x <= READ_GROUP_COUNT; x++) {
|
||||
SAMReadGroupRecord groupRecord = header.getReadGroup(READ_GROUP_PREFIX + x);
|
||||
for (int x = 1; x <= getReadGroupCount(); x++) {
|
||||
SAMReadGroupRecord groupRecord = getHeader().getReadGroup(getReadGroupId(x));
|
||||
for (int y = 1; y <= recordsPerGroup; y++) {
|
||||
SAMRecord record = ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, ++alignmentStart, 20);
|
||||
SAMRecord record = ArtificialSAMUtils.createArtificialRead(getHeader(), "readUno", 0, ++alignmentStart, 20);
|
||||
record.setAttribute("RG", groupRecord.getReadGroupId());
|
||||
records.add(record);
|
||||
}
|
||||
}
|
||||
|
||||
List<String> filterList = new ArrayList<String>();
|
||||
filterList.add("RG:" + READ_GROUP_PREFIX + "1");
|
||||
filterList.add("RG:" + READ_GROUP_PREFIX + "3");
|
||||
filterList.add("RG:" + getReadGroupId(1));
|
||||
filterList.add("RG:" + getReadGroupId(3));
|
||||
|
||||
ReadGroupBlackListFilter filter = new ReadGroupBlackListFilter(filterList);
|
||||
int filtered = 0;
|
||||
|
|
@ -153,7 +123,7 @@ public class ReadGroupBlackListFilterUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
int filteredExpected = recordsPerGroup * 2;
|
||||
int unfilteredExpected = recordsPerGroup * (READ_GROUP_COUNT - 2);
|
||||
int unfilteredExpected = recordsPerGroup * (getReadGroupCount() - 2);
|
||||
Assert.assertEquals(filtered, filteredExpected, "Filtered");
|
||||
Assert.assertEquals(unfiltered, unfilteredExpected, "Uniltered");
|
||||
}
|
||||
|
|
@ -163,17 +133,17 @@ public class ReadGroupBlackListFilterUnitTest extends BaseTest {
|
|||
int recordsPerGroup = 3;
|
||||
List<SAMRecord> records = new ArrayList<SAMRecord>();
|
||||
int alignmentStart = 0;
|
||||
for (int x = 1; x <= READ_GROUP_COUNT; x++) {
|
||||
SAMReadGroupRecord groupRecord = header.getReadGroup(READ_GROUP_PREFIX + x);
|
||||
for (int x = 1; x <= getReadGroupCount(); x++) {
|
||||
SAMReadGroupRecord groupRecord = getHeader().getReadGroup(getReadGroupId(x));
|
||||
for (int y = 1; y <= recordsPerGroup; y++) {
|
||||
SAMRecord record = ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, ++alignmentStart, 20);
|
||||
SAMRecord record = ArtificialSAMUtils.createArtificialRead(getHeader(), "readUno", 0, ++alignmentStart, 20);
|
||||
record.setAttribute("RG", groupRecord.getReadGroupId());
|
||||
records.add(record);
|
||||
}
|
||||
}
|
||||
|
||||
List<String> filterList = new ArrayList<String>();
|
||||
filterList.add("PU:" + PLATFORM_UNIT_PREFIX + "1");
|
||||
filterList.add("PU:" + getPlatformUnit(1));
|
||||
|
||||
ReadGroupBlackListFilter filter = new ReadGroupBlackListFilter(filterList);
|
||||
int filtered = 0;
|
||||
|
|
@ -202,10 +172,10 @@ public class ReadGroupBlackListFilterUnitTest extends BaseTest {
|
|||
int recordsPerGroup = 3;
|
||||
List<SAMRecord> records = new ArrayList<SAMRecord>();
|
||||
int alignmentStart = 0;
|
||||
for (int x = 1; x <= READ_GROUP_COUNT; x++) {
|
||||
SAMReadGroupRecord groupRecord = header.getReadGroup(READ_GROUP_PREFIX + x);
|
||||
for (int x = 1; x <= getReadGroupCount(); x++) {
|
||||
SAMReadGroupRecord groupRecord = getHeader().getReadGroup(getReadGroupId(x));
|
||||
for (int y = 1; y <= recordsPerGroup; y++) {
|
||||
SAMRecord record = ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, ++alignmentStart, 20);
|
||||
SAMRecord record = ArtificialSAMUtils.createArtificialRead(getHeader(), "readUno", 0, ++alignmentStart, 20);
|
||||
record.setAttribute("RG", groupRecord.getReadGroupId());
|
||||
records.add(record);
|
||||
}
|
||||
|
|
@ -231,7 +201,7 @@ public class ReadGroupBlackListFilterUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
int filteredExpected = recordsPerGroup * 2;
|
||||
int unfilteredExpected = recordsPerGroup * (READ_GROUP_COUNT - 2);
|
||||
int unfilteredExpected = recordsPerGroup * (getReadGroupCount() - 2);
|
||||
Assert.assertEquals(filtered, filteredExpected, "Filtered");
|
||||
Assert.assertEquals(unfiltered, unfilteredExpected, "Uniltered");
|
||||
}
|
||||
|
|
@ -241,10 +211,10 @@ public class ReadGroupBlackListFilterUnitTest extends BaseTest {
|
|||
int recordsPerGroup = 3;
|
||||
List<SAMRecord> records = new ArrayList<SAMRecord>();
|
||||
int alignmentStart = 0;
|
||||
for (int x = 1; x <= READ_GROUP_COUNT; x++) {
|
||||
SAMReadGroupRecord groupRecord = header.getReadGroup(READ_GROUP_PREFIX + x);
|
||||
for (int x = 1; x <= getReadGroupCount(); x++) {
|
||||
SAMReadGroupRecord groupRecord = getHeader().getReadGroup(getReadGroupId(x));
|
||||
for (int y = 1; y <= recordsPerGroup; y++) {
|
||||
SAMRecord record = ArtificialSAMUtils.createArtificialRead(header, "readUno", 0, ++alignmentStart, 20);
|
||||
SAMRecord record = ArtificialSAMUtils.createArtificialRead(getHeader(), "readUno", 0, ++alignmentStart, 20);
|
||||
record.setAttribute("RG", groupRecord.getReadGroupId());
|
||||
records.add(record);
|
||||
}
|
||||
|
|
@ -270,7 +240,7 @@ public class ReadGroupBlackListFilterUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
int filteredExpected = recordsPerGroup * 2;
|
||||
int unfilteredExpected = recordsPerGroup * (READ_GROUP_COUNT - 2);
|
||||
int unfilteredExpected = recordsPerGroup * (getReadGroupCount() - 2);
|
||||
Assert.assertEquals(filtered, filteredExpected, "Filtered");
|
||||
Assert.assertEquals(unfiltered, unfilteredExpected, "Uniltered");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.filters;
|
||||
|
||||
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
|
||||
import java.util.Collections;
|
||||
|
||||
|
||||
/**
|
||||
* Tests for the {@link MalformedReadFilter} when the unsafe flag
|
||||
* {@link ValidationExclusion.TYPE#ALL} is set.
|
||||
*
|
||||
* @author Valentin Ruano-Rubio
|
||||
* @since 6/6/13
|
||||
*/
|
||||
public class UnsafeMalformedReadFilterUnitTest extends AllowNCigarMalformedReadFilterUnitTest {
|
||||
|
||||
|
||||
@Override
|
||||
protected ValidationExclusion composeValidationExclusion() {
|
||||
return new ValidationExclusion(Collections.singletonList(ValidationExclusion.TYPE.ALL));
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue