Filter reads whose alignment starts past the end of the contig to which it allegedly aligns.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1188 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
194b75613b
commit
4ba2194b5e
|
|
@ -58,11 +58,6 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
/** Backing support for reads. */
|
||||
private final Reads reads;
|
||||
|
||||
/**
|
||||
* A histogram of exactly what reads were removed from the input stream and why.
|
||||
*/
|
||||
private SAMReadViolationHistogram violations = new SAMReadViolationHistogram();
|
||||
|
||||
/** our log, which we want to capture anything from this class */
|
||||
protected static Logger logger = Logger.getLogger(SAMDataSource.class);
|
||||
|
||||
|
|
@ -87,7 +82,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
* @return Histogram of reads. Will not be null.
|
||||
*/
|
||||
public SAMReadViolationHistogram getViolationHistogram() {
|
||||
return violations;
|
||||
return iteratorPool.getViolationHistogram();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -364,8 +359,6 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
Double downsamplingFraction,
|
||||
Boolean filterZeroMappingQualityReads,
|
||||
Boolean beSafeP) {
|
||||
wrappedIterator = new MalformedSAMFilteringIterator(wrappedIterator,violations);
|
||||
|
||||
// NOTE: this (and other filtering) should be done before on-the-fly sorting
|
||||
// as there is no reason to sort something that we will end of throwing away
|
||||
if (downsamplingFraction != null)
|
||||
|
|
@ -398,6 +391,11 @@ class SAMIteratorPool extends ResourcePool<ReadStreamPointer, StingSAMIterator>
|
|||
/** Source information about the reads. */
|
||||
protected Reads reads;
|
||||
|
||||
/**
|
||||
* A histogram of exactly what reads were removed from the input stream and why.
|
||||
*/
|
||||
private SAMReadViolationHistogram violations = new SAMReadViolationHistogram();
|
||||
|
||||
/** Is this a by-reads traversal or a by-locus? */
|
||||
protected boolean queryOverlapping;
|
||||
|
||||
|
|
@ -422,6 +420,14 @@ class SAMIteratorPool extends ResourcePool<ReadStreamPointer, StingSAMIterator>
|
|||
return header;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a histogram of reads that were screened out, grouped by the nature of the error.
|
||||
* @return Histogram of reads. Will not be null.
|
||||
*/
|
||||
public SAMReadViolationHistogram getViolationHistogram() {
|
||||
return violations;
|
||||
}
|
||||
|
||||
protected ReadStreamPointer selectBestExistingResource( DataStreamSegment segment, List<ReadStreamPointer> pointers ) {
|
||||
for (ReadStreamPointer pointer : pointers) {
|
||||
if (pointer.canAccessSegmentEfficiently(segment)) {
|
||||
|
|
@ -446,7 +452,7 @@ class SAMIteratorPool extends ResourcePool<ReadStreamPointer, StingSAMIterator>
|
|||
iterator = streamPointer.getReadsOverlapping((MappedStreamSegment) segment);
|
||||
}
|
||||
|
||||
return new ReleasingIterator(iterator);
|
||||
return new ReleasingIterator(new MalformedSAMFilteringIterator(header, iterator, violations));
|
||||
}
|
||||
|
||||
protected void closeResource( ReadStreamPointer resource ) {
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.utils.sam.SAMReadValidator;
|
||||
import org.broadinstitute.sting.utils.sam.SAMReadValidationException;
|
||||
|
|
@ -41,6 +42,11 @@ import java.util.NoSuchElementException;
|
|||
*/
|
||||
|
||||
public class MalformedSAMFilteringIterator implements StingSAMIterator {
|
||||
/**
|
||||
* The header to validate reads against.
|
||||
*/
|
||||
private SAMFileHeader header = null;
|
||||
|
||||
/**
|
||||
* The wrapped iterator. Get reads from here.
|
||||
*/
|
||||
|
|
@ -61,7 +67,8 @@ public class MalformedSAMFilteringIterator implements StingSAMIterator {
|
|||
* @param wrapped The wrapped iterator to use as backing data.
|
||||
* @param violations A structure to hold a breakdown of validator violations.
|
||||
*/
|
||||
public MalformedSAMFilteringIterator( StingSAMIterator wrapped, SAMReadViolationHistogram violations ) {
|
||||
public MalformedSAMFilteringIterator( SAMFileHeader header, StingSAMIterator wrapped, SAMReadViolationHistogram violations ) {
|
||||
this.header = header;
|
||||
this.wrapped = wrapped;
|
||||
this.violations = violations;
|
||||
seedNext();
|
||||
|
|
@ -118,7 +125,7 @@ public class MalformedSAMFilteringIterator implements StingSAMIterator {
|
|||
while( wrapped.hasNext() && next == null ) {
|
||||
SAMRecord toTest = wrapped.next();
|
||||
try {
|
||||
SAMReadValidator.validate(toTest);
|
||||
SAMReadValidator.validate(header,toTest);
|
||||
next = toTest;
|
||||
}
|
||||
catch ( SAMReadValidationException ex ) {
|
||||
|
|
|
|||
|
|
@ -25,6 +25,8 @@
|
|||
package org.broadinstitute.sting.utils.sam;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
|
||||
/**
|
||||
* Validates reads against a specific set of criteria. If it finds a
|
||||
|
|
@ -41,9 +43,10 @@ public class SAMReadValidator {
|
|||
* Throw an exception if the read fails.
|
||||
* @param read the read to validate. Must not be null.
|
||||
*/
|
||||
public static void validate( SAMRecord read ) throws SAMReadValidationException {
|
||||
public static void validate( SAMFileHeader header, SAMRecord read ) throws SAMReadValidationException {
|
||||
checkInvalidAlignmentStart(read);
|
||||
checkInvalidAlignmentEnd(read);
|
||||
checkAlignmentDisagreesWithHeader(header,read);
|
||||
checkCigarDisagreesWithAlignment(read);
|
||||
}
|
||||
|
||||
|
|
@ -67,6 +70,13 @@ public class SAMReadValidator {
|
|||
throw new SAMReadValidationException("Alignment ends prior to its beginning");
|
||||
}
|
||||
|
||||
private static void checkAlignmentDisagreesWithHeader( SAMFileHeader header, SAMRecord read ) {
|
||||
SAMSequenceRecord contigHeader = header.getSequence( read.getReferenceIndex() );
|
||||
if( !read.getReadUnmappedFlag() && read.getAlignmentStart() > contigHeader.getSequenceLength() ) {
|
||||
throw new SAMReadValidationException("Read is aligned to a point after the end of the contig");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check for inconsistencies between the cigar string and the
|
||||
* @param read The read to validate.
|
||||
|
|
|
|||
Loading…
Reference in New Issue