Refactoring and unit testing GenomeLocParser
-- Moved previously inner class to MRUCachingSAMSequenceDictionary, and unit test to 100% coverage -- Fully document all functions in GenomeLocParser -- Unit tests for things like parsePosition (shocking it wasn't tested!) -- Removed function to specifically create GenomeLocs for VariantContexts. The fact that you must incorporate END attributes in the context means that createGenomeLoc(Feature) works correctly -- Depreciated (and moved functionality) of setStart, setStop, and incPos to GenomeLoc -- Unit test coverage at like 80%, moving to 100% with next commit
This commit is contained in:
parent
8562bfaae1
commit
45603f58cd
|
|
@ -193,7 +193,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
|||
|
||||
private boolean overlapsKnownCNV(VariantContext cnv) {
|
||||
if ( knownCNVs != null ) {
|
||||
final GenomeLoc loc = getWalker().getToolkit().getGenomeLocParser().createGenomeLoc(cnv, true);
|
||||
final GenomeLoc loc = getWalker().getToolkit().getGenomeLocParser().createGenomeLoc(cnv);
|
||||
IntervalTree<GenomeLoc> intervalTree = knownCNVs.get(loc.getContig());
|
||||
|
||||
final Iterator<IntervalTree.Node<GenomeLoc>> nodeIt = intervalTree.overlappers(loc.getStart(), loc.getStop());
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ public class IntervalStratification extends VariantStratifier {
|
|||
|
||||
public List<Object> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
if (eval != null) {
|
||||
final GenomeLoc loc = getVariantEvalWalker().getToolkit().getGenomeLocParser().createGenomeLoc(eval, true);
|
||||
final GenomeLoc loc = getVariantEvalWalker().getToolkit().getGenomeLocParser().createGenomeLoc(eval);
|
||||
IntervalTree<GenomeLoc> intervalTree = intervalTreeByContig.get(loc.getContig());
|
||||
IntervalTree.Node<GenomeLoc> node = intervalTree.minOverlapper(loc.getStart(), loc.getStop());
|
||||
//logger.info(String.format("Overlap %s found %s", loc, node));
|
||||
|
|
|
|||
|
|
@ -530,4 +530,55 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenome
|
|||
final int cmp = this.compareTo(other);
|
||||
return cmp == -1 ? other : this;
|
||||
}
|
||||
|
||||
/**
|
||||
* create a new genome loc from an existing loc, with a new start position
|
||||
* Note that this function will NOT explicitly check the ending offset, in case someone wants to
|
||||
* set the start of a new GenomeLoc pertaining to a read that goes off the end of the contig.
|
||||
*
|
||||
* @param loc the old location
|
||||
* @param start a new start position
|
||||
*
|
||||
* @return a newly allocated GenomeLoc as loc but with start == start
|
||||
*/
|
||||
public GenomeLoc setStart(GenomeLoc loc, int start) {
|
||||
return new GenomeLoc(loc.getContig(), loc.getContigIndex(), start, loc.getStop());
|
||||
}
|
||||
|
||||
/**
|
||||
* create a new genome loc from an existing loc, with a new stop position
|
||||
* Note that this function will NOT explicitly check the ending offset, in case someone wants to
|
||||
* set the stop of a new GenomeLoc pertaining to a read that goes off the end of the contig.
|
||||
*
|
||||
* @param loc the old location
|
||||
* @param stop a new stop position
|
||||
*
|
||||
* @return a newly allocated GenomeLoc as loc but with stop == stop
|
||||
*/
|
||||
public GenomeLoc setStop(GenomeLoc loc, int stop) {
|
||||
return new GenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start, stop);
|
||||
}
|
||||
|
||||
/**
|
||||
* return a new genome loc, with an incremented position
|
||||
*
|
||||
* @param loc the old location
|
||||
*
|
||||
* @return a newly allocated GenomeLoc as loc but with start == loc.getStart() + 1
|
||||
*/
|
||||
public GenomeLoc incPos(GenomeLoc loc) {
|
||||
return incPos(loc, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* return a new genome loc, with an incremented position
|
||||
*
|
||||
* @param loc the old location
|
||||
* @param by how much to move the start and stop by
|
||||
*
|
||||
* @return a newly allocated GenomeLoc as loc but with start == loc.getStart() + by
|
||||
*/
|
||||
public GenomeLoc incPos(GenomeLoc loc, int by) {
|
||||
return new GenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start + by, loc.stop + by);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -34,10 +34,8 @@ import net.sf.samtools.SAMSequenceDictionary;
|
|||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
|
||||
/**
|
||||
* Factory class for creating GenomeLocs
|
||||
|
|
@ -49,7 +47,9 @@ public final class GenomeLocParser {
|
|||
* How much validation should we do at runtime with this parser?
|
||||
*/
|
||||
public enum ValidationLevel {
|
||||
/** Do the standard amount of validation */
|
||||
STANDARD,
|
||||
/** Don't do any real checking at all */
|
||||
NONE
|
||||
}
|
||||
|
||||
|
|
@ -68,11 +68,11 @@ public final class GenomeLocParser {
|
|||
/**
|
||||
* A thread-local CachingSequenceDictionary
|
||||
*/
|
||||
private final ThreadLocal<CachingSequenceDictionary> contigInfoPerThread =
|
||||
new ThreadLocal<CachingSequenceDictionary>() {
|
||||
private final ThreadLocal<MRUCachingSAMSequenceDictionary> contigInfoPerThread =
|
||||
new ThreadLocal<MRUCachingSAMSequenceDictionary>() {
|
||||
@Override
|
||||
protected CachingSequenceDictionary initialValue() {
|
||||
return new CachingSequenceDictionary(SINGLE_MASTER_SEQUENCE_DICTIONARY);
|
||||
protected MRUCachingSAMSequenceDictionary initialValue() {
|
||||
return new MRUCachingSAMSequenceDictionary(SINGLE_MASTER_SEQUENCE_DICTIONARY);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -84,107 +84,10 @@ public final class GenomeLocParser {
|
|||
/**
|
||||
* @return a caching sequence dictionary appropriate for this thread
|
||||
*/
|
||||
private CachingSequenceDictionary getContigInfo() {
|
||||
private MRUCachingSAMSequenceDictionary getContigInfo() {
|
||||
return contigInfoPerThread.get();
|
||||
}
|
||||
|
||||
/**
|
||||
* A wrapper class that provides efficient last used caching for the global
|
||||
* SAMSequenceDictionary underlying all of the GATK engine capabilities.
|
||||
*/
|
||||
private final class CachingSequenceDictionary {
|
||||
final private SAMSequenceDictionary dict;
|
||||
|
||||
// cache
|
||||
SAMSequenceRecord lastSSR = null;
|
||||
String lastContig = "";
|
||||
int lastIndex = -1;
|
||||
|
||||
@Requires({"dict != null", "dict.size() > 0"})
|
||||
public CachingSequenceDictionary(SAMSequenceDictionary dict) {
|
||||
this.dict = dict;
|
||||
}
|
||||
|
||||
public final String internContigName(final String contig) {
|
||||
return getContigInfo(contig).getSequenceName();
|
||||
}
|
||||
|
||||
@Ensures("result > 0")
|
||||
public final int getNSequences() {
|
||||
return dict.size();
|
||||
}
|
||||
|
||||
@Requires("contig != null")
|
||||
public final boolean hasContig(final String contig) {
|
||||
return contig.equals(lastContig) || dict.getSequence(contig) != null;
|
||||
}
|
||||
|
||||
@Requires("index >= 0")
|
||||
public final boolean hasContig(final int index) {
|
||||
return lastIndex == index || dict.getSequence(index) != null;
|
||||
}
|
||||
|
||||
@Requires("contig != null")
|
||||
@Ensures("result != null")
|
||||
public final SAMSequenceRecord getSequence(final String contig) {
|
||||
if ( isCached(contig) )
|
||||
return lastSSR;
|
||||
else
|
||||
return updateCache(contig, -1);
|
||||
}
|
||||
|
||||
@Requires("index >= 0")
|
||||
@Ensures("result != null")
|
||||
public final SAMSequenceRecord getSequence(final int index) {
|
||||
if ( isCached(index) )
|
||||
return lastSSR;
|
||||
else
|
||||
return updateCache(null, index);
|
||||
}
|
||||
|
||||
@Requires("contig != null")
|
||||
@Ensures("result >= 0")
|
||||
public final int getSequenceIndex(final String contig) {
|
||||
if ( ! isCached(contig) ) {
|
||||
updateCache(contig, -1);
|
||||
}
|
||||
|
||||
return lastIndex;
|
||||
}
|
||||
|
||||
@Requires({"contig != null", "lastContig != null"})
|
||||
private boolean isCached(final String contig) {
|
||||
return lastContig.equals(contig);
|
||||
}
|
||||
|
||||
@Requires({"lastIndex != -1", "index >= 0"})
|
||||
private boolean isCached(final int index) {
|
||||
return lastIndex == index;
|
||||
}
|
||||
|
||||
/**
|
||||
* The key algorithm. Given a new record, update the last used record, contig
|
||||
* name, and index.
|
||||
*
|
||||
* @param contig
|
||||
* @param index
|
||||
* @return
|
||||
*/
|
||||
@Requires("contig != null || index >= 0")
|
||||
@Ensures("result != null")
|
||||
private SAMSequenceRecord updateCache(final String contig, int index ) {
|
||||
SAMSequenceRecord rec = contig == null ? dict.getSequence(index) : dict.getSequence(contig);
|
||||
if ( rec == null ) {
|
||||
throw new ReviewedStingException("BUG: requested unknown contig=" + contig + " index=" + index);
|
||||
} else {
|
||||
lastSSR = rec;
|
||||
lastContig = rec.getSequenceName();
|
||||
lastIndex = rec.getSequenceIndex();
|
||||
return rec;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* set our internal reference contig order
|
||||
* @param refFile the reference file
|
||||
|
|
@ -205,16 +108,18 @@ public final class GenomeLocParser {
|
|||
/**
|
||||
* Create a genome loc parser based on seqDict with the specified level of validation
|
||||
* @param seqDict the sequence dictionary to use when creating genome locs
|
||||
* @param validationLevel how much validation should we do of the genome locs at runtime?
|
||||
* @param validationLevel how much validation should we do of the genome locs at runtime? Purely for testing purposes
|
||||
*/
|
||||
public GenomeLocParser(SAMSequenceDictionary seqDict, final ValidationLevel validationLevel) {
|
||||
this.validationLevel = validationLevel;
|
||||
protected GenomeLocParser(SAMSequenceDictionary seqDict, final ValidationLevel validationLevel) {
|
||||
if (validationLevel == null)
|
||||
throw new IllegalArgumentException("validation level cannot be null");
|
||||
if (seqDict == null) { // we couldn't load the reference dictionary
|
||||
//logger.info("Failed to load reference dictionary, falling back to lexicographic order for contigs");
|
||||
throw new UserException.CommandLineException("Failed to load reference dictionary");
|
||||
}
|
||||
|
||||
SINGLE_MASTER_SEQUENCE_DICTIONARY = seqDict;
|
||||
this.validationLevel = validationLevel;
|
||||
this.SINGLE_MASTER_SEQUENCE_DICTIONARY = seqDict;
|
||||
if ( logger.isDebugEnabled() ) {
|
||||
logger.debug(String.format("Prepared reference sequence contig dictionary"));
|
||||
for (SAMSequenceRecord contig : seqDict.getSequences()) {
|
||||
|
|
@ -227,17 +132,13 @@ public final class GenomeLocParser {
|
|||
* Determines whether the given contig is valid with respect to the sequence dictionary
|
||||
* already installed in the GenomeLoc.
|
||||
*
|
||||
* @param contig a potentially null string name for the contig
|
||||
* @return True if the contig is valid. False otherwise.
|
||||
*/
|
||||
public final boolean contigIsInDictionary(String contig) {
|
||||
public final boolean contigIsInDictionary(final String contig) {
|
||||
return contig != null && getContigInfo().hasContig(contig);
|
||||
}
|
||||
|
||||
public final boolean indexIsInDictionary(final int index) {
|
||||
return index >= 0 && getContigInfo().hasContig(index);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* get the contig's SAMSequenceRecord
|
||||
*
|
||||
|
|
@ -278,7 +179,7 @@ public final class GenomeLocParser {
|
|||
* @return
|
||||
*/
|
||||
public final SAMSequenceDictionary getContigs() {
|
||||
return getContigInfo().dict;
|
||||
return getContigInfo().getDictionary();
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -286,14 +187,13 @@ public final class GenomeLocParser {
|
|||
// Low-level creation functions
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* create a genome loc, given the contig name, start, and stop
|
||||
* @see #createGenomeLoc(String, int, int, int, boolean) for exact details of the creation.
|
||||
*
|
||||
* @param contig the contig name
|
||||
* @param start the starting position
|
||||
* @param stop the stop position
|
||||
*
|
||||
* @return a new genome loc
|
||||
* Note that because this function doesn't take the contig index as an argument for contig, it
|
||||
* has a slight performance penalty over the version that does take the contig index. Does not
|
||||
* require the created genome loc on the reference genome
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
@ThrowEnsures({"UserException.MalformedGenomeLoc", "!isValidGenomeLoc(contig, start, stop)"})
|
||||
|
|
@ -301,34 +201,61 @@ public final class GenomeLocParser {
|
|||
return createGenomeLoc(contig, getContigIndex(contig), start, stop);
|
||||
}
|
||||
|
||||
public GenomeLoc createGenomeLoc(String contig, final int start, final int stop, boolean mustBeOnReference) {
|
||||
/**
|
||||
* @see #createGenomeLoc(String, int, int, int, boolean) for exact details of the creation.
|
||||
*
|
||||
* Note that because this function doesn't take the contig index as an argument for contig, it
|
||||
* has a slight performance penalty over the version that does take the contig index.
|
||||
*/
|
||||
public GenomeLoc createGenomeLoc(final String contig, final int start, final int stop, boolean mustBeOnReference) {
|
||||
return createGenomeLoc(contig, getContigIndex(contig), start, stop, mustBeOnReference);
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #createGenomeLoc(String, int, int, int, boolean) for exact details of the creation.
|
||||
*
|
||||
* Doesn't require the start and stop to be on the genome
|
||||
*/
|
||||
@ThrowEnsures({"UserException.MalformedGenomeLoc", "!isValidGenomeLoc(contig, start, stop, false)"})
|
||||
public GenomeLoc createGenomeLoc(String contig, int index, final int start, final int stop) {
|
||||
return createGenomeLoc(contig, index, start, stop, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a GenomeLoc on contig, starting at start and ending (inclusive) at stop.
|
||||
*
|
||||
* @param contig the contig name
|
||||
* @param index the index into the GATK's SAMSequencingDictionary of contig (passed for efficiency to avoid the lookup)
|
||||
* @param start the starting position
|
||||
* @param stop the stop position of this loc, inclusive
|
||||
* @param mustBeOnReference if true, this factory will throw a UserException.MalformedGenomeLoc if start or stop isn't on the contig
|
||||
*
|
||||
* @return a non-null GenomeLoc
|
||||
*/
|
||||
@ThrowEnsures({"UserException.MalformedGenomeLoc", "!isValidGenomeLoc(contig, start, stop,mustBeOnReference)"})
|
||||
public GenomeLoc createGenomeLoc(String contig, int index, final int start, final int stop, boolean mustBeOnReference) {
|
||||
@Ensures("result != null")
|
||||
public GenomeLoc createGenomeLoc(final String contig, int index, final int start, final int stop, boolean mustBeOnReference) {
|
||||
// optimization: by interning the string we ensure that future comparisons use == not the full string comp
|
||||
final String interned = validateGenomeLoc(contig, index, start, stop, mustBeOnReference);
|
||||
return new GenomeLoc(interned, index, start, stop);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new genome loc, bounding start and stop by the start and end of contig
|
||||
* @param contig our contig
|
||||
* @param start our start as an arbitrary integer (may be negative, etc)
|
||||
* @param stop our stop as an arbitrary integer (may be negative, etc)
|
||||
* @throws ReviewedStingException if there's no way to create a meaningful genome loc given contig, start, and stop
|
||||
* @return a valid genome loc over contig
|
||||
* Create a new GenomeLoc, on contig, including the single position pos.
|
||||
*
|
||||
* Pos is not required to be on the reference
|
||||
*
|
||||
* @see #createGenomeLoc(String, int, int, int, boolean) for exact details of the creation.
|
||||
*
|
||||
* @param contig the contig name
|
||||
* @param pos the start and stop of the created genome loc
|
||||
*
|
||||
* @return a genome loc representing a single base at the specified postion on the contig
|
||||
*/
|
||||
public GenomeLoc createGenomeLocOnContig(final String contig, final int start, final int stop) {
|
||||
final GenomeLoc myLoc = createGenomeLoc(contig, start, stop);
|
||||
final GenomeLoc contigLoc = createOverEntireContig(contig);
|
||||
return myLoc.intersect(contigLoc);
|
||||
@Ensures("result != null")
|
||||
@ThrowEnsures({"UserException.MalformedGenomeLoc", "!isValidGenomeLoc(contig, pos, pos, true)"})
|
||||
public GenomeLoc createGenomeLoc(final String contig, final int pos) {
|
||||
return createGenomeLoc(contig, getContigIndex(contig), pos, pos);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -472,7 +399,7 @@ public final class GenomeLocParser {
|
|||
*/
|
||||
@Requires("pos != null")
|
||||
@Ensures("result >= 0")
|
||||
private int parsePosition(final String pos) {
|
||||
protected int parsePosition(final String pos) {
|
||||
if(pos.indexOf('-') != -1) {
|
||||
throw new NumberFormatException("Position: '" + pos + "' can't contain '-'." );
|
||||
}
|
||||
|
|
@ -533,89 +460,34 @@ public final class GenomeLocParser {
|
|||
}
|
||||
|
||||
/**
|
||||
* Creates a GenomeLoc corresponding to the variant context vc. If includeSymbolicEndIfPossible
|
||||
* is true, and VC is a symbolic allele the end of the created genome loc will be the value
|
||||
* of the END info field key, if it exists, or vc.getEnd() if not.
|
||||
*
|
||||
* @param vc
|
||||
* @param includeSymbolicEndIfPossible
|
||||
* @return
|
||||
* @see GenomeLoc.setStart
|
||||
*/
|
||||
public GenomeLoc createGenomeLoc(final VariantContext vc, boolean includeSymbolicEndIfPossible) {
|
||||
if ( includeSymbolicEndIfPossible && vc.isSymbolic() ) {
|
||||
int end = vc.getAttributeAsInt(VCFConstants.END_KEY, vc.getEnd());
|
||||
return createGenomeLoc(vc.getChr(), vc.getStart(), end);
|
||||
}
|
||||
else
|
||||
return createGenomeLoc(vc.getChr(), vc.getStart(), vc.getEnd());
|
||||
}
|
||||
|
||||
public GenomeLoc createGenomeLoc(final VariantContext vc) {
|
||||
return createGenomeLoc(vc, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* create a new genome loc, given the contig name, and a single position. Must be on the reference
|
||||
*
|
||||
* @param contig the contig name
|
||||
* @param pos the postion
|
||||
*
|
||||
* @return a genome loc representing a single base at the specified postion on the contig
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
@ThrowEnsures({"UserException.MalformedGenomeLoc", "!isValidGenomeLoc(contig, pos, pos, true)"})
|
||||
public GenomeLoc createGenomeLoc(final String contig, final int pos) {
|
||||
return createGenomeLoc(contig, getContigIndex(contig), pos, pos);
|
||||
}
|
||||
|
||||
/**
|
||||
* create a new genome loc from an existing loc, with a new start position
|
||||
* Note that this function will NOT explicitly check the ending offset, in case someone wants to
|
||||
* set the start of a new GenomeLoc pertaining to a read that goes off the end of the contig.
|
||||
*
|
||||
* @param loc the old location
|
||||
* @param start a new start position
|
||||
*
|
||||
* @return the newly created genome loc
|
||||
*/
|
||||
public GenomeLoc setStart(GenomeLoc loc, int start) {
|
||||
@Deprecated
|
||||
public GenomeLoc setStart(final GenomeLoc loc, final int start) {
|
||||
return createGenomeLoc(loc.getContig(), loc.getContigIndex(), start, loc.getStop());
|
||||
}
|
||||
|
||||
/**
|
||||
* create a new genome loc from an existing loc, with a new stop position
|
||||
* Note that this function will NOT explicitly check the ending offset, in case someone wants to
|
||||
* set the stop of a new GenomeLoc pertaining to a read that goes off the end of the contig.
|
||||
*
|
||||
* @param loc the old location
|
||||
* @param stop a new stop position
|
||||
*
|
||||
* @return
|
||||
* @see GenomeLoc.setStop
|
||||
*/
|
||||
public GenomeLoc setStop(GenomeLoc loc, int stop) {
|
||||
@Deprecated
|
||||
public GenomeLoc setStop(final GenomeLoc loc, final int stop) {
|
||||
return createGenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start, stop);
|
||||
}
|
||||
|
||||
/**
|
||||
* return a new genome loc, with an incremented position
|
||||
*
|
||||
* @param loc the old location
|
||||
*
|
||||
* @return a new genome loc
|
||||
* @see GenomeLoc.incPos
|
||||
*/
|
||||
public GenomeLoc incPos(GenomeLoc loc) {
|
||||
@Deprecated
|
||||
public GenomeLoc incPos(final GenomeLoc loc) {
|
||||
return incPos(loc, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* return a new genome loc, with an incremented position
|
||||
*
|
||||
* @param loc the old location
|
||||
* @param by how much to move the start and stop by
|
||||
*
|
||||
* @return a new genome loc
|
||||
* @see GenomeLoc.incPos
|
||||
*/
|
||||
public GenomeLoc incPos(GenomeLoc loc, int by) {
|
||||
@Deprecated
|
||||
public GenomeLoc incPos(final GenomeLoc loc, final int by) {
|
||||
return createGenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start + by, loc.stop + by);
|
||||
}
|
||||
|
||||
|
|
@ -626,7 +498,7 @@ public final class GenomeLocParser {
|
|||
*/
|
||||
@Requires("contigName != null")
|
||||
@Ensures("result != null")
|
||||
public GenomeLoc createOverEntireContig(String contigName) {
|
||||
public GenomeLoc createOverEntireContig(final String contigName) {
|
||||
SAMSequenceRecord contig = getContigInfo().getSequence(contigName);
|
||||
return createGenomeLoc(contigName,contig.getSequenceIndex(),1,contig.getSequenceLength(), true);
|
||||
}
|
||||
|
|
@ -638,12 +510,12 @@ public final class GenomeLocParser {
|
|||
* @return The contiguous loc of up to maxBasePairs length or null if the loc is already at the start of the contig.
|
||||
*/
|
||||
@Requires({"loc != null", "maxBasePairs > 0"})
|
||||
public GenomeLoc createGenomeLocAtStart(GenomeLoc loc, int maxBasePairs) {
|
||||
public GenomeLoc createGenomeLocAtStart(final GenomeLoc loc, final int maxBasePairs) {
|
||||
if (GenomeLoc.isUnmapped(loc))
|
||||
return null;
|
||||
String contigName = loc.getContig();
|
||||
SAMSequenceRecord contig = getContigInfo().getSequence(contigName);
|
||||
int contigIndex = contig.getSequenceIndex();
|
||||
final String contigName = loc.getContig();
|
||||
final SAMSequenceRecord contig = getContigInfo().getSequence(contigName);
|
||||
final int contigIndex = contig.getSequenceIndex();
|
||||
|
||||
int start = loc.getStart() - maxBasePairs;
|
||||
int stop = loc.getStart() - 1;
|
||||
|
|
@ -662,19 +534,12 @@ public final class GenomeLocParser {
|
|||
* @param padding The number of base pairs to pad on either end
|
||||
* @return The contiguous loc of length up to the original length + 2*padding (depending on the start/end of the contig).
|
||||
*/
|
||||
@Requires({"loc != null", "padding > 0"})
|
||||
@Requires({"loc != null", "padding >= 0"})
|
||||
public GenomeLoc createPaddedGenomeLoc(final GenomeLoc loc, final int padding) {
|
||||
if (GenomeLoc.isUnmapped(loc))
|
||||
if (GenomeLoc.isUnmapped(loc) || padding == 0)
|
||||
return loc;
|
||||
final String contigName = loc.getContig();
|
||||
final SAMSequenceRecord contig = getContigInfo().getSequence(contigName);
|
||||
final int contigIndex = contig.getSequenceIndex();
|
||||
final int contigLength = contig.getSequenceLength();
|
||||
|
||||
final int start = Math.max(1, loc.getStart() - padding);
|
||||
final int stop = Math.min(contigLength, loc.getStop() + padding);
|
||||
|
||||
return createGenomeLoc(contigName, contigIndex, start, stop, true);
|
||||
else
|
||||
return createGenomeLocOnContig(loc.getContig(), loc.getContigIndex(), loc.getStart() - padding, loc.getStop() + padding);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -684,7 +549,7 @@ public final class GenomeLocParser {
|
|||
* @return The contiguous loc of up to maxBasePairs length or null if the loc is already at the end of the contig.
|
||||
*/
|
||||
@Requires({"loc != null", "maxBasePairs > 0"})
|
||||
public GenomeLoc createGenomeLocAtStop(GenomeLoc loc, int maxBasePairs) {
|
||||
public GenomeLoc createGenomeLocAtStop(final GenomeLoc loc, final int maxBasePairs) {
|
||||
if (GenomeLoc.isUnmapped(loc))
|
||||
return null;
|
||||
String contigName = loc.getContig();
|
||||
|
|
@ -702,4 +567,35 @@ public final class GenomeLocParser {
|
|||
|
||||
return createGenomeLoc(contigName, contigIndex, start, stop, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #createGenomeLocOnContig(String, int, int, int) with the contig index looked up from contig
|
||||
*/
|
||||
public GenomeLoc createGenomeLocOnContig(final String contig, final int start, final int stop) {
|
||||
return createGenomeLocOnContig(contig, getContigIndex(contig), start, stop);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new genome loc, bounding start and stop by the start and end of contig
|
||||
*
|
||||
* This function will return null if start and stop cannot be adjusted in any reasonable way
|
||||
* to be on the contig. For example, if start and stop are both past the end of the contig,
|
||||
* there's no way to fix this, and null will be returned.
|
||||
*
|
||||
* @param contig our contig
|
||||
* @param start our start as an arbitrary integer (may be negative, etc)
|
||||
* @param stop our stop as an arbitrary integer (may be negative, etc)
|
||||
* @return a valid genome loc over contig, or null if a meaningful genome loc cannot be created
|
||||
*/
|
||||
public GenomeLoc createGenomeLocOnContig(final String contig, final int contigIndex, final int start, final int stop) {
|
||||
final int contigLength = getContigInfo().getSequence(contigIndex).getSequenceLength();
|
||||
final int boundedStart = Math.max(1, start);
|
||||
final int boundedStop = Math.min(contigLength, stop);
|
||||
|
||||
if ( boundedStart > contigLength || boundedStop < 1 )
|
||||
// there's no meaningful way to create this genome loc, as the start and stop are off the contig
|
||||
return null;
|
||||
else
|
||||
return createGenomeLoc(contig, contigIndex, boundedStart, boundedStop);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,186 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
/**
|
||||
* A wrapper class that provides efficient most recently used caching for the global
|
||||
* SAMSequenceDictionary underlying all of the GATK engine capabilities. It is essential
|
||||
* that these class be as efficient as possible. It doesn't need to be thread-safe, as
|
||||
* GenomeLocParser uses a thread-local variable to ensure that each thread gets its own MRU
|
||||
* cache.
|
||||
*
|
||||
* The MRU elements are the SAMSequenceRecord, the lastContig, and the lastIndex. The
|
||||
* cached value is the actual SAMSequenceRecord of the most recently accessed value from
|
||||
* getSequence, along with local variables for the contig index and contig string.
|
||||
*/
|
||||
final class MRUCachingSAMSequenceDictionary {
|
||||
/**
|
||||
* Our sequence dictionary
|
||||
*/
|
||||
private final SAMSequenceDictionary dict;
|
||||
|
||||
SAMSequenceRecord lastSSR = null;
|
||||
String lastContig = "";
|
||||
int lastIndex = -1;
|
||||
|
||||
/**
|
||||
* Create a new MRUCachingSAMSequenceDictionary that provides information about sequences in dict
|
||||
* @param dict a non-null, non-empty sequencing dictionary
|
||||
*/
|
||||
@Ensures("lastSSR == null")
|
||||
public MRUCachingSAMSequenceDictionary(final SAMSequenceDictionary dict) {
|
||||
if ( dict == null ) throw new IllegalArgumentException("Dictionary cannot be null");
|
||||
if ( dict.size() == 0 ) throw new IllegalArgumentException("Dictionary cannot have size zero");
|
||||
|
||||
this.dict = dict;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get our sequence dictionary
|
||||
* @return a non-null SAMSequenceDictionary
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public SAMSequenceDictionary getDictionary() {
|
||||
return dict;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is contig present in the dictionary? Efficiently caching.
|
||||
* @param contig a non-null contig we want to test
|
||||
* @return true if contig is in dictionary, false otherwise
|
||||
*/
|
||||
@Requires("contig != null")
|
||||
public final boolean hasContig(final String contig) {
|
||||
return contig.equals(lastContig) || dict.getSequence(contig) != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is contig index present in the dictionary? Efficiently caching.
|
||||
* @param contigIndex an integer offset that might map to a contig in this dictionary
|
||||
* @return true if contigIndex is in dictionary, false otherwise
|
||||
*/
|
||||
@Requires("contigIndex >= 0")
|
||||
public final boolean hasContigIndex(final int contigIndex) {
|
||||
return lastIndex == contigIndex || dict.getSequence(contigIndex) != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as SAMSequenceDictionary.getSequence but uses a MRU cache for efficiency
|
||||
*
|
||||
* @param contig the contig name we want to get the sequence record of
|
||||
* @throws ReviewedStingException if contig isn't present in the dictionary
|
||||
* @return the sequence record for contig
|
||||
*/
|
||||
@Requires("contig != null")
|
||||
@Ensures("result != null")
|
||||
public final SAMSequenceRecord getSequence(final String contig) {
|
||||
if ( isCached(contig) )
|
||||
return lastSSR;
|
||||
else
|
||||
return updateCache(contig, -1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as SAMSequenceDictionary.getSequence but uses a MRU cache for efficiency
|
||||
*
|
||||
* @param index the contig index we want to get the sequence record of
|
||||
* @throws ReviewedStingException if contig isn't present in the dictionary
|
||||
* @return the sequence record for contig
|
||||
*/
|
||||
@Requires("index >= 0")
|
||||
@Ensures("result != null")
|
||||
public final SAMSequenceRecord getSequence(final int index) {
|
||||
if ( isCached(index) )
|
||||
return lastSSR;
|
||||
else
|
||||
return updateCache(null, index);
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as SAMSequenceDictionary.getSequenceIndex but uses a MRU cache for efficiency
|
||||
*
|
||||
* @param contig the contig we want to get the sequence record of
|
||||
* @throws ReviewedStingException if index isn't present in the dictionary
|
||||
* @return the sequence record index for contig
|
||||
*/
|
||||
@Requires("contig != null")
|
||||
@Ensures("result >= 0")
|
||||
public final int getSequenceIndex(final String contig) {
|
||||
if ( ! isCached(contig) ) {
|
||||
updateCache(contig, -1);
|
||||
}
|
||||
|
||||
return lastIndex;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is contig the MRU cached contig?
|
||||
* @param contig the contig to test
|
||||
* @return true if contig is the currently cached contig, false otherwise
|
||||
*/
|
||||
@Requires({"contig != null"})
|
||||
protected boolean isCached(final String contig) {
|
||||
return contig.equals(lastContig);
|
||||
}
|
||||
|
||||
/**
|
||||
* Is the contig index index the MRU cached index?
|
||||
* @param index the contig index to test
|
||||
* @return true if contig index is the currently cached contig index, false otherwise
|
||||
*/
|
||||
protected boolean isCached(final int index) {
|
||||
return lastIndex == index;
|
||||
}
|
||||
|
||||
/**
|
||||
* The key algorithm. Given a new record, update the last used record, contig
|
||||
* name, and index.
|
||||
*
|
||||
* @param contig the contig we want to look up. If null, index is used instead
|
||||
* @param index the contig index we want to look up. Only used if contig is null
|
||||
* @throws ReviewedStingException if index isn't present in the dictionary
|
||||
* @return the SAMSequenceRecord for contig / index
|
||||
*/
|
||||
@Requires("contig != null || index >= 0")
|
||||
@Ensures("result != null")
|
||||
private SAMSequenceRecord updateCache(final String contig, int index ) {
|
||||
SAMSequenceRecord rec = contig == null ? dict.getSequence(index) : dict.getSequence(contig);
|
||||
if ( rec == null ) {
|
||||
throw new ReviewedStingException("BUG: requested unknown contig=" + contig + " index=" + index);
|
||||
} else {
|
||||
lastSSR = rec;
|
||||
lastContig = rec.getSequenceName();
|
||||
lastIndex = rec.getSequenceIndex();
|
||||
return rec;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -29,17 +29,31 @@ package org.broadinstitute.sting.utils;
|
|||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.broad.tribble.BasicFeature;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
|
||||
import static org.testng.Assert.assertEquals;
|
||||
import static org.testng.Assert.assertTrue;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
import static org.testng.Assert.assertEquals;
|
||||
import static org.testng.Assert.assertTrue;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
|
|
@ -49,10 +63,11 @@ import org.testng.annotations.Test;
|
|||
*/
|
||||
public class GenomeLocParserUnitTest extends BaseTest {
|
||||
private GenomeLocParser genomeLocParser;
|
||||
private SAMFileHeader header;
|
||||
|
||||
@BeforeClass
|
||||
public void init() {
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10);
|
||||
header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10);
|
||||
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||
}
|
||||
|
||||
|
|
@ -231,7 +246,16 @@ public class GenomeLocParserUnitTest extends BaseTest {
|
|||
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",1,11)); // past the end of the contig
|
||||
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",-1,10)); // bad start
|
||||
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",1,-2)); // bad stop
|
||||
assertTrue( genomeLocParser.isValidGenomeLoc("chr1",-1,2, false)); // bad stop
|
||||
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",10,11)); // bad start, past end
|
||||
assertTrue( genomeLocParser.isValidGenomeLoc("chr1",10,11, false)); // bad start, past end
|
||||
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",2,1)); // stop < start
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = ReviewedStingException.class)
|
||||
public void testValidateGenomeLoc() {
|
||||
// bad contig index
|
||||
genomeLocParser.validateGenomeLoc("chr1", 1, 1, 2, false);
|
||||
}
|
||||
|
||||
private static class FlankingGenomeLocTestData extends TestDataProvider {
|
||||
|
|
@ -333,4 +357,153 @@ public class GenomeLocParserUnitTest extends BaseTest {
|
|||
data.toString(), data.original, actual, data.flankStop);
|
||||
assertEquals(actual, data.flankStop, description);
|
||||
}
|
||||
|
||||
@DataProvider(name = "parseGenomeLoc")
|
||||
public Object[][] makeParsingTest() {
|
||||
final List<Object[]> tests = new LinkedList<Object[]>();
|
||||
|
||||
tests.add(new Object[]{ "chr1:10", "chr1", 10 });
|
||||
tests.add(new Object[]{ "chr1:100", "chr1", 100 });
|
||||
tests.add(new Object[]{ "chr1:1000", "chr1", 1000 });
|
||||
tests.add(new Object[]{ "chr1:1,000", "chr1", 1000 });
|
||||
tests.add(new Object[]{ "chr1:10000", "chr1", 10000 });
|
||||
tests.add(new Object[]{ "chr1:10,000", "chr1", 10000 });
|
||||
tests.add(new Object[]{ "chr1:100000", "chr1", 100000 });
|
||||
tests.add(new Object[]{ "chr1:100,000", "chr1", 100000 });
|
||||
tests.add(new Object[]{ "chr1:1000000", "chr1", 1000000 });
|
||||
tests.add(new Object[]{ "chr1:1,000,000", "chr1", 1000000 });
|
||||
tests.add(new Object[]{ "chr1:1000,000", "chr1", 1000000 });
|
||||
tests.add(new Object[]{ "chr1:1,000000", "chr1", 1000000 });
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test( dataProvider = "parseGenomeLoc")
|
||||
public void testParsingPositions(final String string, final String contig, final int start) {
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10000000);
|
||||
GenomeLocParser genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||
final GenomeLoc loc = genomeLocParser.parseGenomeLoc(string);
|
||||
Assert.assertEquals(loc.getContig(), contig);
|
||||
Assert.assertEquals(loc.getStart(), start);
|
||||
Assert.assertEquals(loc.getStop(), start);
|
||||
}
|
||||
|
||||
@Test( )
|
||||
public void testCreationFromSAMRecord() {
|
||||
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 1, 5);
|
||||
final GenomeLoc loc = genomeLocParser.createGenomeLoc(read);
|
||||
Assert.assertEquals(loc.getContig(), read.getReferenceName());
|
||||
Assert.assertEquals(loc.getContigIndex(), (int)read.getReferenceIndex());
|
||||
Assert.assertEquals(loc.getStart(), read.getAlignmentStart());
|
||||
Assert.assertEquals(loc.getStop(), read.getAlignmentEnd());
|
||||
}
|
||||
|
||||
@Test( )
|
||||
public void testCreationFromSAMRecordUnmapped() {
|
||||
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 1, 5);
|
||||
read.setReadUnmappedFlag(true);
|
||||
read.setReferenceIndex(-1);
|
||||
final GenomeLoc loc = genomeLocParser.createGenomeLoc(read);
|
||||
Assert.assertTrue(loc.isUnmapped());
|
||||
}
|
||||
|
||||
@Test( )
|
||||
public void testCreationFromSAMRecordUnmappedButOnGenome() {
|
||||
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 1, 5);
|
||||
read.setReadUnmappedFlag(true);
|
||||
read.setCigarString("*");
|
||||
final GenomeLoc loc = genomeLocParser.createGenomeLoc(read);
|
||||
Assert.assertEquals(loc.getContig(), read.getReferenceName());
|
||||
Assert.assertEquals(loc.getContigIndex(), (int)read.getReferenceIndex());
|
||||
Assert.assertEquals(loc.getStart(), read.getAlignmentStart());
|
||||
Assert.assertEquals(loc.getStop(), read.getAlignmentStart());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreationFromFeature() {
|
||||
final Feature feature = new BasicFeature("chr1", 1, 5);
|
||||
final GenomeLoc loc = genomeLocParser.createGenomeLoc(feature);
|
||||
Assert.assertEquals(loc.getContig(), feature.getChr());
|
||||
Assert.assertEquals(loc.getStart(), feature.getStart());
|
||||
Assert.assertEquals(loc.getStop(), feature.getEnd());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreationFromVariantContext() {
|
||||
final VariantContext feature = new VariantContextBuilder("x", "chr1", 1, 5, Arrays.asList(Allele.create("AAAAA", true))).make();
|
||||
final GenomeLoc loc = genomeLocParser.createGenomeLoc(feature);
|
||||
Assert.assertEquals(loc.getContig(), feature.getChr());
|
||||
Assert.assertEquals(loc.getStart(), feature.getStart());
|
||||
Assert.assertEquals(loc.getStop(), feature.getEnd());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testcreateGenomeLocOnContig() throws FileNotFoundException {
|
||||
final CachingIndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||
final SAMSequenceDictionary dict = seq.getSequenceDictionary();
|
||||
final GenomeLocParser genomeLocParser = new GenomeLocParser(dict);
|
||||
|
||||
for ( final SAMSequenceRecord rec : dict.getSequences() ) {
|
||||
final GenomeLoc loc = genomeLocParser.createOverEntireContig(rec.getSequenceName());
|
||||
Assert.assertEquals(loc.getContig(), rec.getSequenceName());
|
||||
Assert.assertEquals(loc.getStart(), 1);
|
||||
Assert.assertEquals(loc.getStop(), rec.getSequenceLength());
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "GenomeLocOnContig")
|
||||
public Object[][] makeGenomeLocOnContig() {
|
||||
final List<Object[]> tests = new LinkedList<Object[]>();
|
||||
|
||||
final int contigLength = header.getSequence(0).getSequenceLength();
|
||||
for ( int start = -10; start < contigLength + 10; start++ ) {
|
||||
for ( final int len : Arrays.asList(1, 10, 20) ) {
|
||||
tests.add(new Object[]{ "chr1", start, start + len });
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test( dataProvider = "GenomeLocOnContig")
|
||||
public void testGenomeLocOnContig(final String contig, final int start, final int stop) {
|
||||
final int contigLength = header.getSequence(0).getSequenceLength();
|
||||
final GenomeLoc loc = genomeLocParser.createGenomeLocOnContig(contig, start, stop);
|
||||
|
||||
if ( stop < 1 || start > contigLength )
|
||||
Assert.assertNull(loc, "GenomeLoc should be null if the start/stops are not meaningful");
|
||||
else {
|
||||
Assert.assertNotNull(loc);
|
||||
Assert.assertEquals(loc.getContig(), contig);
|
||||
Assert.assertEquals(loc.getStart(), Math.max(start, 1));
|
||||
Assert.assertEquals(loc.getStop(), Math.min(stop, contigLength));
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "GenomeLocPadding")
|
||||
public Object[][] makeGenomeLocPadding() {
|
||||
final List<Object[]> tests = new LinkedList<Object[]>();
|
||||
|
||||
final int contigLength = header.getSequence(0).getSequenceLength();
|
||||
for ( int pad = 0; pad < contigLength + 1; pad++) {
|
||||
for ( int start = 1; start < contigLength; start++ ) {
|
||||
for ( int stop = start; stop < contigLength; stop++ ) {
|
||||
tests.add(new Object[]{ genomeLocParser.createGenomeLoc("chr1", start, stop), pad});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test( dataProvider = "GenomeLocPadding")
|
||||
public void testGenomeLocPadding(final GenomeLoc input, final int pad) {
|
||||
final int contigLength = header.getSequence(0).getSequenceLength();
|
||||
final GenomeLoc padded = genomeLocParser.createPaddedGenomeLoc(input, pad);
|
||||
|
||||
Assert.assertNotNull(padded);
|
||||
Assert.assertEquals(padded.getContig(), input.getContig());
|
||||
Assert.assertEquals(padded.getStart(), Math.max(input.getStart() - pad, 1));
|
||||
Assert.assertEquals(padded.getStop(), Math.min(input.getStop() + pad, contigLength));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,97 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils;
|
||||
|
||||
|
||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
import static org.testng.Assert.assertEquals;
|
||||
import static org.testng.Assert.assertTrue;
|
||||
|
||||
public class MRUCachingSAMSequencingDictionaryUnitTest extends BaseTest {
|
||||
private static ReferenceSequenceFile seq;
|
||||
private static SAMSequenceDictionary dict;
|
||||
|
||||
@BeforeClass
|
||||
public void init() throws FileNotFoundException {
|
||||
// sequence
|
||||
seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||
dict = seq.getSequenceDictionary();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBasic() {
|
||||
final MRUCachingSAMSequenceDictionary caching = new MRUCachingSAMSequenceDictionary(dict);
|
||||
|
||||
Assert.assertEquals(caching.getDictionary(), dict, "Dictionary not the one I expected");
|
||||
|
||||
for ( final SAMSequenceRecord rec : dict.getSequences() ) {
|
||||
Assert.assertFalse(caching.isCached(rec.getSequenceIndex()), "Expected index to not be cached");
|
||||
Assert.assertFalse(caching.isCached(rec.getSequenceName()), "Expected contig to not be cached");
|
||||
|
||||
Assert.assertEquals(caching.getSequence(rec.getSequenceName()), rec, "Couldn't query for sequence");
|
||||
Assert.assertEquals(caching.getSequence(rec.getSequenceIndex()), rec, "Couldn't query for sequence index");
|
||||
Assert.assertEquals(caching.hasContig(rec.getSequenceName()), true, "hasContig query for sequence");
|
||||
Assert.assertEquals(caching.hasContigIndex(rec.getSequenceIndex()), true, "hasContigIndex query for sequence");
|
||||
Assert.assertEquals(caching.getSequenceIndex(rec.getSequenceName()), rec.getSequenceIndex(), "Couldn't query for sequence");
|
||||
|
||||
Assert.assertEquals(caching.hasContig(rec.getSequenceName() + "asdfadsfa"), false, "hasContig query for unknown sequence");
|
||||
Assert.assertEquals(caching.hasContigIndex(dict.getSequences().size()), false, "hasContigIndex query for unknown index");
|
||||
|
||||
Assert.assertTrue(caching.isCached(rec.getSequenceIndex()), "Expected index to be cached");
|
||||
Assert.assertTrue(caching.isCached(rec.getSequenceName()), "Expected contig to be cached");
|
||||
}
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = ReviewedStingException.class)
|
||||
public void testBadGetSequence() {
|
||||
final MRUCachingSAMSequenceDictionary caching = new MRUCachingSAMSequenceDictionary(dict);
|
||||
caching.getSequence("notInDictionary");
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = ReviewedStingException.class)
|
||||
public void testBadGetSequenceIndex() {
|
||||
final MRUCachingSAMSequenceDictionary caching = new MRUCachingSAMSequenceDictionary(dict);
|
||||
caching.getSequence(dict.getSequences().size());
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue