Filter reads whose alignment starts past the end of the contig to which it allegedly aligns.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1188 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
194b75613b
commit
4ba2194b5e
|
|
@ -58,11 +58,6 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
/** Backing support for reads. */
|
/** Backing support for reads. */
|
||||||
private final Reads reads;
|
private final Reads reads;
|
||||||
|
|
||||||
/**
|
|
||||||
* A histogram of exactly what reads were removed from the input stream and why.
|
|
||||||
*/
|
|
||||||
private SAMReadViolationHistogram violations = new SAMReadViolationHistogram();
|
|
||||||
|
|
||||||
/** our log, which we want to capture anything from this class */
|
/** our log, which we want to capture anything from this class */
|
||||||
protected static Logger logger = Logger.getLogger(SAMDataSource.class);
|
protected static Logger logger = Logger.getLogger(SAMDataSource.class);
|
||||||
|
|
||||||
|
|
@ -87,7 +82,7 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
* @return Histogram of reads. Will not be null.
|
* @return Histogram of reads. Will not be null.
|
||||||
*/
|
*/
|
||||||
public SAMReadViolationHistogram getViolationHistogram() {
|
public SAMReadViolationHistogram getViolationHistogram() {
|
||||||
return violations;
|
return iteratorPool.getViolationHistogram();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -364,8 +359,6 @@ public class SAMDataSource implements SimpleDataSource {
|
||||||
Double downsamplingFraction,
|
Double downsamplingFraction,
|
||||||
Boolean filterZeroMappingQualityReads,
|
Boolean filterZeroMappingQualityReads,
|
||||||
Boolean beSafeP) {
|
Boolean beSafeP) {
|
||||||
wrappedIterator = new MalformedSAMFilteringIterator(wrappedIterator,violations);
|
|
||||||
|
|
||||||
// NOTE: this (and other filtering) should be done before on-the-fly sorting
|
// NOTE: this (and other filtering) should be done before on-the-fly sorting
|
||||||
// as there is no reason to sort something that we will end of throwing away
|
// as there is no reason to sort something that we will end of throwing away
|
||||||
if (downsamplingFraction != null)
|
if (downsamplingFraction != null)
|
||||||
|
|
@ -398,6 +391,11 @@ class SAMIteratorPool extends ResourcePool<ReadStreamPointer, StingSAMIterator>
|
||||||
/** Source information about the reads. */
|
/** Source information about the reads. */
|
||||||
protected Reads reads;
|
protected Reads reads;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A histogram of exactly what reads were removed from the input stream and why.
|
||||||
|
*/
|
||||||
|
private SAMReadViolationHistogram violations = new SAMReadViolationHistogram();
|
||||||
|
|
||||||
/** Is this a by-reads traversal or a by-locus? */
|
/** Is this a by-reads traversal or a by-locus? */
|
||||||
protected boolean queryOverlapping;
|
protected boolean queryOverlapping;
|
||||||
|
|
||||||
|
|
@ -422,6 +420,14 @@ class SAMIteratorPool extends ResourcePool<ReadStreamPointer, StingSAMIterator>
|
||||||
return header;
|
return header;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a histogram of reads that were screened out, grouped by the nature of the error.
|
||||||
|
* @return Histogram of reads. Will not be null.
|
||||||
|
*/
|
||||||
|
public SAMReadViolationHistogram getViolationHistogram() {
|
||||||
|
return violations;
|
||||||
|
}
|
||||||
|
|
||||||
protected ReadStreamPointer selectBestExistingResource( DataStreamSegment segment, List<ReadStreamPointer> pointers ) {
|
protected ReadStreamPointer selectBestExistingResource( DataStreamSegment segment, List<ReadStreamPointer> pointers ) {
|
||||||
for (ReadStreamPointer pointer : pointers) {
|
for (ReadStreamPointer pointer : pointers) {
|
||||||
if (pointer.canAccessSegmentEfficiently(segment)) {
|
if (pointer.canAccessSegmentEfficiently(segment)) {
|
||||||
|
|
@ -446,7 +452,7 @@ class SAMIteratorPool extends ResourcePool<ReadStreamPointer, StingSAMIterator>
|
||||||
iterator = streamPointer.getReadsOverlapping((MappedStreamSegment) segment);
|
iterator = streamPointer.getReadsOverlapping((MappedStreamSegment) segment);
|
||||||
}
|
}
|
||||||
|
|
||||||
return new ReleasingIterator(iterator);
|
return new ReleasingIterator(new MalformedSAMFilteringIterator(header, iterator, violations));
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void closeResource( ReadStreamPointer resource ) {
|
protected void closeResource( ReadStreamPointer resource ) {
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@
|
||||||
package org.broadinstitute.sting.gatk.iterators;
|
package org.broadinstitute.sting.gatk.iterators;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import net.sf.samtools.SAMFileHeader;
|
||||||
import org.broadinstitute.sting.gatk.Reads;
|
import org.broadinstitute.sting.gatk.Reads;
|
||||||
import org.broadinstitute.sting.utils.sam.SAMReadValidator;
|
import org.broadinstitute.sting.utils.sam.SAMReadValidator;
|
||||||
import org.broadinstitute.sting.utils.sam.SAMReadValidationException;
|
import org.broadinstitute.sting.utils.sam.SAMReadValidationException;
|
||||||
|
|
@ -41,6 +42,11 @@ import java.util.NoSuchElementException;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class MalformedSAMFilteringIterator implements StingSAMIterator {
|
public class MalformedSAMFilteringIterator implements StingSAMIterator {
|
||||||
|
/**
|
||||||
|
* The header to validate reads against.
|
||||||
|
*/
|
||||||
|
private SAMFileHeader header = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The wrapped iterator. Get reads from here.
|
* The wrapped iterator. Get reads from here.
|
||||||
*/
|
*/
|
||||||
|
|
@ -61,7 +67,8 @@ public class MalformedSAMFilteringIterator implements StingSAMIterator {
|
||||||
* @param wrapped The wrapped iterator to use as backing data.
|
* @param wrapped The wrapped iterator to use as backing data.
|
||||||
* @param violations A structure to hold a breakdown of validator violations.
|
* @param violations A structure to hold a breakdown of validator violations.
|
||||||
*/
|
*/
|
||||||
public MalformedSAMFilteringIterator( StingSAMIterator wrapped, SAMReadViolationHistogram violations ) {
|
public MalformedSAMFilteringIterator( SAMFileHeader header, StingSAMIterator wrapped, SAMReadViolationHistogram violations ) {
|
||||||
|
this.header = header;
|
||||||
this.wrapped = wrapped;
|
this.wrapped = wrapped;
|
||||||
this.violations = violations;
|
this.violations = violations;
|
||||||
seedNext();
|
seedNext();
|
||||||
|
|
@ -118,7 +125,7 @@ public class MalformedSAMFilteringIterator implements StingSAMIterator {
|
||||||
while( wrapped.hasNext() && next == null ) {
|
while( wrapped.hasNext() && next == null ) {
|
||||||
SAMRecord toTest = wrapped.next();
|
SAMRecord toTest = wrapped.next();
|
||||||
try {
|
try {
|
||||||
SAMReadValidator.validate(toTest);
|
SAMReadValidator.validate(header,toTest);
|
||||||
next = toTest;
|
next = toTest;
|
||||||
}
|
}
|
||||||
catch ( SAMReadValidationException ex ) {
|
catch ( SAMReadValidationException ex ) {
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,8 @@
|
||||||
package org.broadinstitute.sting.utils.sam;
|
package org.broadinstitute.sting.utils.sam;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import net.sf.samtools.SAMFileHeader;
|
||||||
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Validates reads against a specific set of criteria. If it finds a
|
* Validates reads against a specific set of criteria. If it finds a
|
||||||
|
|
@ -41,9 +43,10 @@ public class SAMReadValidator {
|
||||||
* Throw an exception if the read fails.
|
* Throw an exception if the read fails.
|
||||||
* @param read the read to validate. Must not be null.
|
* @param read the read to validate. Must not be null.
|
||||||
*/
|
*/
|
||||||
public static void validate( SAMRecord read ) throws SAMReadValidationException {
|
public static void validate( SAMFileHeader header, SAMRecord read ) throws SAMReadValidationException {
|
||||||
checkInvalidAlignmentStart(read);
|
checkInvalidAlignmentStart(read);
|
||||||
checkInvalidAlignmentEnd(read);
|
checkInvalidAlignmentEnd(read);
|
||||||
|
checkAlignmentDisagreesWithHeader(header,read);
|
||||||
checkCigarDisagreesWithAlignment(read);
|
checkCigarDisagreesWithAlignment(read);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -67,6 +70,13 @@ public class SAMReadValidator {
|
||||||
throw new SAMReadValidationException("Alignment ends prior to its beginning");
|
throw new SAMReadValidationException("Alignment ends prior to its beginning");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void checkAlignmentDisagreesWithHeader( SAMFileHeader header, SAMRecord read ) {
|
||||||
|
SAMSequenceRecord contigHeader = header.getSequence( read.getReferenceIndex() );
|
||||||
|
if( !read.getReadUnmappedFlag() && read.getAlignmentStart() > contigHeader.getSequenceLength() ) {
|
||||||
|
throw new SAMReadValidationException("Read is aligned to a point after the end of the contig");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check for inconsistencies between the cigar string and the
|
* Check for inconsistencies between the cigar string and the
|
||||||
* @param read The read to validate.
|
* @param read The read to validate.
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue