gatk-3.8/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java

180 lines
6.7 KiB
Java
Raw Normal View History

/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.contexts;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import java.util.*;
/**
* Useful class for forwarding on locusContext data from this iterator
*
* Created by IntelliJ IDEA.
* User: mdepristo
* Date: Feb 22, 2009
* Time: 3:01:34 PM
* To change this template use File | Settings | File Templates.
*/
public class AlignmentContext {
Modifications: @CoverageAndPowerWalker - removed a hanging colon that was being printed after the reference position @VariantEvalWalker - added a command line argument for pool size for eventual use in doing pooled caller evaluations. As now, the variable is unused. @AlignmentContext - altered the scope of class variables from private to protected in order that child objects might have access to them New Additions: Filtered Contexts Sometimes we want to filter or partition reads by some aspect (quality score, read direction, current base, whatever) and use only those reads as part of the alignment context. Prior to this I've been doing the split externally and creating a new AlignmentContext object. This new approach makes it a bit easier, as each of these objects are children of AlignmentContext, and can be instantiated from a "raw" AlignmentContext. @FilteredAlignmentContext is an abstract class that defines the behavior. The abstract method 'filter' is called on the input AlignmentContext, filtering those reads and offsets by whatever you can think of. The filtered reads/offsets are then maintained in the reads and offsets fields. These classes can be passed around as AlignmentContexts themselves. Writing a new kind of read-filtered alignment context boils down to implementing the filter method. @ReverseReadsContext - a FilteredAlignmentContext that takes only reads in the reverse direction @ForwardReadsContext - a FilteredAlignmentContext that takes only reads in the forward direction @QualityScoreThresholdContext - a FilteredAlignmentContext that takes only reads above a given quality score threshold (defaults to 22 if none provided). A unit test bamfile and associated unit tests for these are in the works. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1559 348d0f76-0448-11de-a6fe-93d51630548a
2009-09-09 23:49:52 +08:00
protected GenomeLoc loc = null;
protected ReadBackedPileup basePileup = null;
protected boolean hasPileupBeenDownsampled;
Modifications: @CoverageAndPowerWalker - removed a hanging colon that was being printed after the reference position @VariantEvalWalker - added a command line argument for pool size for eventual use in doing pooled caller evaluations. As now, the variable is unused. @AlignmentContext - altered the scope of class variables from private to protected in order that child objects might have access to them New Additions: Filtered Contexts Sometimes we want to filter or partition reads by some aspect (quality score, read direction, current base, whatever) and use only those reads as part of the alignment context. Prior to this I've been doing the split externally and creating a new AlignmentContext object. This new approach makes it a bit easier, as each of these objects are children of AlignmentContext, and can be instantiated from a "raw" AlignmentContext. @FilteredAlignmentContext is an abstract class that defines the behavior. The abstract method 'filter' is called on the input AlignmentContext, filtering those reads and offsets by whatever you can think of. The filtered reads/offsets are then maintained in the reads and offsets fields. These classes can be passed around as AlignmentContexts themselves. Writing a new kind of read-filtered alignment context boils down to implementing the filter method. @ReverseReadsContext - a FilteredAlignmentContext that takes only reads in the reverse direction @ForwardReadsContext - a FilteredAlignmentContext that takes only reads in the forward direction @QualityScoreThresholdContext - a FilteredAlignmentContext that takes only reads above a given quality score threshold (defaults to 22 if none provided). A unit test bamfile and associated unit tests for these are in the works. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1559 348d0f76-0448-11de-a6fe-93d51630548a
2009-09-09 23:49:52 +08:00
/**
* The number of bases we've skipped over in the reference since the last map invocation.
* Only filled in by RodTraversals right now. By default, nothing is being skipped, so skippedBases == 0.
*/
private long skippedBases = 0;
public AlignmentContext(GenomeLoc loc, ReadBackedPileup basePileup) {
this(loc, basePileup, 0, false);
}
public AlignmentContext(GenomeLoc loc, ReadBackedPileup basePileup, boolean hasPileupBeenDownsampled) {
this(loc, basePileup, 0, hasPileupBeenDownsampled);
}
public AlignmentContext(GenomeLoc loc, ReadBackedPileup basePileup, long skippedBases) {
this(loc, basePileup, skippedBases, false);
}
public AlignmentContext(GenomeLoc loc, ReadBackedPileup basePileup, long skippedBases,boolean hasPileupBeenDownsampled ) {
if ( loc == null ) throw new ReviewedStingException("BUG: GenomeLoc in Alignment context is null");
if ( basePileup == null ) throw new ReviewedStingException("BUG: ReadBackedPileup in Alignment context is null");
if ( skippedBases < 0 ) throw new ReviewedStingException("BUG: skippedBases is -1 in Alignment context");
this.loc = loc;
this.basePileup = basePileup;
this.skippedBases = skippedBases;
this.hasPileupBeenDownsampled = hasPileupBeenDownsampled;
}
/** Returns base pileup over the current genomic location. Deprectated. Use getBasePileup() to make your intentions
* clear.
* @return
*/
@Deprecated
public ReadBackedPileup getPileup() { return basePileup; }
/** Returns base pileup over the current genomic location. May return null if this context keeps only
* extended event (indel) pileup.
* @return
*/
public ReadBackedPileup getBasePileup() {
if(!hasBasePileup())
throw new ReviewedStingException("No base pileup is available. Please check for a base pileup with hasBasePileup() before attempting to retrieve a pileup.");
return basePileup;
}
/** Returns extended event (indel) pileup over the current genomic location. May return null if this context keeps
* only base pileup.
* @return
*/
public ReadBackedExtendedEventPileup getExtendedEventPileup() {
if(!hasExtendedEventPileup())
throw new ReviewedStingException("No extended event pileup is present.");
return (ReadBackedExtendedEventPileup)basePileup;
}
/**
* Returns true if this alignment context keeps base pileup over the current genomic location.
* TODO: Syntax of AlignmentContext uses hasBasePileup() / hasExtendedEventPileup() as an enumeration mechanism. Change this to a more sensible interface.
* @return
*/
public boolean hasBasePileup() { return !(basePileup instanceof ReadBackedExtendedEventPileup); }
/** Returns true if this alignment context keeps extended event (indel) pileup over the current genomic location.
*
* @return
*/
public boolean hasExtendedEventPileup() { return basePileup instanceof ReadBackedExtendedEventPileup; }
/**
* Returns true if any reads have been filtered out of the pileup due to excess DoC.
* @return True if reads have been filtered out. False otherwise.
*/
public boolean hasPileupBeenDownsampled() { return hasPileupBeenDownsampled; }
/**
* get all of the reads within this context
*
* @return
*/
@Deprecated
//todo: unsafe and tailored for current usage only; both pileups can be null or worse, bot can be not null in theory
public List<SAMRecord> getReads() { return ( basePileup.getReads() ); }
/**
* Are there any reads associated with this locus?
*
* @return
*/
public boolean hasReads() {
return basePileup != null && basePileup.size() > 0 ;
}
/**
* How many reads cover this locus?
* @return
*/
public int size() {
return basePileup.size();
}
/**
* get a list of the equivalent positions within in the reads at Pos
*
* @return
*/
@Deprecated
public List<Integer> getOffsets() {
return basePileup.getOffsets();
}
public String getContig() { return getLocation().getContig(); }
public long getPosition() { return getLocation().getStart(); }
public GenomeLoc getLocation() { return loc; }
public void downsampleToCoverage(int coverage) {
basePileup = basePileup.getDownsampledPileup(coverage);
hasPileupBeenDownsampled = true;
}
/**
* Returns the number of bases we've skipped over in the reference since the last map invocation.
* Only filled in by RodTraversals right now. A value of 0 indicates that no bases were skipped.
*
* @return the number of skipped bases
*/
public long getSkippedBases() {
return skippedBases;
}
}