Windowed access to the reference.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1383 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
65e9dcf5b7
commit
48713e154c
|
|
@ -25,6 +25,9 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.contexts;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
/**
|
||||
* The section of the reference that overlaps with the given
|
||||
* read / locus.
|
||||
|
|
@ -34,17 +37,65 @@ package org.broadinstitute.sting.gatk.contexts;
|
|||
*/
|
||||
|
||||
public class ReferenceContext {
|
||||
private char base;
|
||||
/**
|
||||
* The locus.
|
||||
*/
|
||||
private GenomeLoc locus;
|
||||
|
||||
public ReferenceContext( char base ) {
|
||||
this.base = base;
|
||||
/**
|
||||
* The window of reference information around the current locus.
|
||||
*/
|
||||
private GenomeLoc window;
|
||||
|
||||
/**
|
||||
* The bases in the window around the current locus.
|
||||
*/
|
||||
private char[] bases;
|
||||
|
||||
/**
|
||||
* Contructor for a simple, windowless reference context.
|
||||
* @param locus locus of interest.
|
||||
* @param base reference base at that locus.
|
||||
*/
|
||||
public ReferenceContext( GenomeLoc locus, char base ) {
|
||||
this( locus, locus, new char[] { base } );
|
||||
}
|
||||
|
||||
public ReferenceContext( GenomeLoc locus, GenomeLoc window, char[] bases ) {
|
||||
if( !window.containsP(locus) )
|
||||
throw new StingException("Invalid locus or window; window does not contain locus");
|
||||
|
||||
this.locus = locus;
|
||||
this.window = window;
|
||||
this.bases = bases;
|
||||
}
|
||||
|
||||
/**
|
||||
* The locus currently being examined.
|
||||
* @return The current locus.
|
||||
*/
|
||||
public GenomeLoc getLocus() {
|
||||
return locus;
|
||||
}
|
||||
|
||||
public GenomeLoc getWindow() {
|
||||
return window;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the base at the given locus.
|
||||
* @return The base at the given locus from the reference.§
|
||||
* @return The base at the given locus from the reference.
|
||||
*/
|
||||
public char getBase() {
|
||||
return base;
|
||||
return bases[(int)(locus.getStart() - window.getStart())];
|
||||
}
|
||||
|
||||
/**
|
||||
* All the bases in the window currently being examined.
|
||||
* @return All bases available. If the window is of size [0,0], the array will
|
||||
* contain only the base at the given locus.
|
||||
*/
|
||||
public char[] getBases() {
|
||||
return bases;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,10 +1,14 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Window;
|
||||
import org.broadinstitute.sting.gatk.walkers.Reference;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import net.sf.picard.reference.ReferenceSequence;
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
|
|
@ -39,6 +43,19 @@ public class LocusReferenceView extends ReferenceView {
|
|||
*/
|
||||
private final GenomeLoc bounds;
|
||||
|
||||
/**
|
||||
* Start of the expanded window for which the reference context should be provided,
|
||||
* relative to the locus in question.
|
||||
*/
|
||||
private final int windowStart;
|
||||
|
||||
|
||||
/**
|
||||
* Start of the expanded window for which the reference context should be provided,
|
||||
* relative to the locus in question.
|
||||
*/
|
||||
private final int windowStop;
|
||||
|
||||
/**
|
||||
* Track the reference sequence and the last point accessed. Used to
|
||||
* track state when traversing over the reference.
|
||||
|
|
@ -46,26 +63,65 @@ public class LocusReferenceView extends ReferenceView {
|
|||
private ReferenceSequence referenceSequence;
|
||||
|
||||
/**
|
||||
* Create a new locus reference view.
|
||||
* @param provider source for locus data.
|
||||
* Create a LocusReferenceView given no other contextual information about
|
||||
* the walkers, etc.
|
||||
* @param provider source for locus data.
|
||||
*/
|
||||
public LocusReferenceView( ShardDataProvider provider ) {
|
||||
super( provider );
|
||||
super(provider);
|
||||
bounds = provider.getShard().getGenomeLoc();
|
||||
this.referenceSequence = reference.getSubsequenceAt( bounds.getContig(),
|
||||
bounds.getStart(),
|
||||
bounds.getStop() );
|
||||
windowStart = windowStop = 0;
|
||||
initializeReferenceSequence(bounds);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the reference base associated with this particular point on the genome.
|
||||
* Create a new locus reference view.
|
||||
* @param provider source for locus data.
|
||||
*/
|
||||
public LocusReferenceView( Walker walker, ShardDataProvider provider ) {
|
||||
super( provider );
|
||||
bounds = provider.getShard().getGenomeLoc();
|
||||
|
||||
// Retrieve information about the window being accessed.
|
||||
if( walker.getClass().isAnnotationPresent(Reference.class) ) {
|
||||
Window window = walker.getClass().getAnnotation(Reference.class).window();
|
||||
|
||||
if( window.start() > 0 ) throw new StingException( "Reference window starts after current locus" );
|
||||
if( window.stop() < 0 ) throw new StingException( "Reference window ends before current locus" );
|
||||
|
||||
windowStart = window.start();
|
||||
windowStop = window.stop();
|
||||
}
|
||||
else {
|
||||
windowStart = 0;
|
||||
windowStop = 0;
|
||||
}
|
||||
|
||||
long expandedStart = getWindowStart( bounds );
|
||||
long expandedStop = getWindowStop( bounds );
|
||||
|
||||
initializeReferenceSequence(GenomeLocParser.createGenomeLoc(bounds.getContig(), expandedStart, expandedStop));
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize reference sequence data using the given locus.
|
||||
* @param locus
|
||||
*/
|
||||
private void initializeReferenceSequence( GenomeLoc locus ) {
|
||||
this.referenceSequence = reference.getSubsequenceAt( locus.getContig(), locus.getStart(), locus.getStop() );
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the reference context associated with this particular point on the genome.
|
||||
* @param genomeLoc Region for which to retrieve the base. GenomeLoc must represent a 1-base region.
|
||||
* @return The base at the position represented by this genomeLoc.
|
||||
*/
|
||||
public char getReferenceBase( GenomeLoc genomeLoc ) {
|
||||
public ReferenceContext getReferenceContext( GenomeLoc genomeLoc ) {
|
||||
validateLocation( genomeLoc );
|
||||
int offset = (int)(genomeLoc.getStart() - bounds.getStart());
|
||||
return StringUtil.bytesToString( referenceSequence.getBases(), offset, 1 ).charAt(0);
|
||||
|
||||
GenomeLoc window = GenomeLocParser.createGenomeLoc( genomeLoc.getContig(), getWindowStart(genomeLoc), getWindowStop(genomeLoc) );
|
||||
char[] bases = StringUtil.bytesToString( referenceSequence.getBases(), (int)(window.getStart() - getWindowStart(bounds)), (int)window.size() ).toCharArray();
|
||||
return new ReferenceContext( genomeLoc, window, bases );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -91,4 +147,22 @@ public class LocusReferenceView extends ReferenceView {
|
|||
throw new InvalidPositionException(
|
||||
String.format("Requested position %s not within interval %s", genomeLoc, bounds));
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the start of the expanded window, bounded if necessary by the contig.
|
||||
* @param locus The locus to expand.
|
||||
* @return The expanded window.
|
||||
*/
|
||||
private long getWindowStart( GenomeLoc locus ) {
|
||||
return Math.max( locus.getStart() + windowStart, 1 );
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the stop of the expanded window, bounded if necessary by the contig.
|
||||
* @param locus The locus to expand.
|
||||
* @return The expanded window.
|
||||
*/
|
||||
private long getWindowStop( GenomeLoc locus ) {
|
||||
return Math.min( locus.getStop() + windowStop, reference.getSequenceDictionary().getSequence(locus.getContig()).getSequenceLength() );
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ public class TraverseLoci extends TraversalEngine {
|
|||
LocusWalker<M, T> locusWalker = (LocusWalker<M, T>)walker;
|
||||
|
||||
LocusView locusView = getLocusView( walker, dataProvider );
|
||||
LocusReferenceView referenceView = new LocusReferenceView( dataProvider );
|
||||
LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
|
||||
ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider );
|
||||
|
||||
// We keep processing while the next reference location is within the interval
|
||||
|
|
@ -57,7 +57,7 @@ public class TraverseLoci extends TraversalEngine {
|
|||
// Iterate forward to get all reference ordered data covering this locus
|
||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation());
|
||||
|
||||
ReferenceContext refContext = new ReferenceContext( referenceView.getReferenceBase(locus.getLocation()) );
|
||||
ReferenceContext refContext = referenceView.getReferenceContext(locus.getLocation());
|
||||
|
||||
final boolean keepMeP = locusWalker.filter(tracker, refContext, locus);
|
||||
if (keepMeP) {
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ public class TraverseLocusWindows extends TraversalEngine {
|
|||
GenomeLoc interval = shard.getGenomeLoc();
|
||||
|
||||
ReadView readView = new ReadView( dataProvider );
|
||||
LocusReferenceView referenceView = new LocusReferenceView( dataProvider );
|
||||
LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
|
||||
ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider );
|
||||
|
||||
AlignmentContext locus = getLocusContext(readView.iterator(), interval);
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ import net.sf.samtools.SAMRecord;
|
|||
*/
|
||||
public class PrintLocusContextWalker extends LocusWalker<AlignmentContext, Integer> implements TreeReducible<Integer> {
|
||||
public AlignmentContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
out.printf( "In map: ref = %s, loc = %s, reads = %s%n", ref,
|
||||
out.printf( "In map: ref = %s, loc = %s, reads = %s%n", ref.getBase(),
|
||||
context.getLocation(),
|
||||
Arrays.deepToString( getReadNames(context.getReads()) ) );
|
||||
return context;
|
||||
|
|
|
|||
|
|
@ -22,7 +22,6 @@ import java.lang.annotation.*;
|
|||
@Documented
|
||||
@Inherited
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
@Target(ElementType.TYPE)
|
||||
public @interface RMD {
|
||||
String name();
|
||||
Class<? extends ReferenceOrderedDatum> type();
|
||||
|
|
|
|||
|
|
@ -0,0 +1,47 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import java.lang.annotation.*;
|
||||
|
||||
/**
|
||||
* Describes presentation, capabilities, and limitations of the reference
|
||||
* provided to the GATK.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
@Documented
|
||||
@Inherited
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
@Target(ElementType.TYPE)
|
||||
public @interface Reference {
|
||||
/**
|
||||
* Specifies the window expansion for the current walker.
|
||||
* @return The window to which the reference should be expanded. Defaults to [0,0] (no expansion).
|
||||
*/
|
||||
public Window window() default @Window;
|
||||
}
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import java.lang.annotation.*;
|
||||
|
||||
/**
|
||||
* Describes the size of the window into the genome. Has differing semantics based on
|
||||
* the data this annotation is used to describe.
|
||||
*
|
||||
* @author mhanna
|
||||
* @version 0.1
|
||||
*/
|
||||
@Documented
|
||||
@Inherited
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
public @interface Window {
|
||||
/**
|
||||
* Controls where the window should start and stop relative to
|
||||
* the locus currently being processed.
|
||||
* @return start point; default is 0, indicating 'supply only the reference base at the current locus'.
|
||||
*/
|
||||
public int start() default 0;
|
||||
|
||||
/**
|
||||
* Controls where the window should start and stop relative to
|
||||
* the locus currently being processed.
|
||||
* @return stop point; default is 0, indicating 'supply only the reference base at the current locus'.
|
||||
*/
|
||||
public int stop() default 0;
|
||||
}
|
||||
|
|
@ -271,6 +271,13 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable {
|
|||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* How many BPs are covered by this locus?
|
||||
* @return Number of BPs covered by this locus. According to the semantics of GenomeLoc, this should
|
||||
* never be < 1.
|
||||
*/
|
||||
public long size() {
|
||||
return stop - start + 1;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ public class LocusReferenceViewTest extends ReferenceViewTemplate {
|
|||
ShardDataProvider dataProvider = new ShardDataProvider(shard, null, sequenceFile, null);
|
||||
LocusReferenceView view = new LocusReferenceView(dataProvider);
|
||||
|
||||
view.getReferenceBase(shard.getGenomeLoc());
|
||||
view.getReferenceContext(shard.getGenomeLoc()).getBase();
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
@ -74,7 +74,7 @@ public class LocusReferenceViewTest extends ReferenceViewTemplate {
|
|||
ShardDataProvider dataProvider = new ShardDataProvider(shard, null, sequenceFile, null);
|
||||
LocusReferenceView view = new LocusReferenceView(dataProvider);
|
||||
|
||||
view.getReferenceBase(GenomeLocParser.createGenomeLoc(0, 51));
|
||||
view.getReferenceContext(GenomeLocParser.createGenomeLoc(0, 51)).getBase();
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -95,7 +95,7 @@ public class LocusReferenceViewTest extends ReferenceViewTemplate {
|
|||
|
||||
ReferenceSequence expectedAsSeq = sequenceFile.getSubsequenceAt(locus.getContig(), locus.getStart(), locus.getStop());
|
||||
char expected = StringUtil.bytesToString(expectedAsSeq.getBases()).charAt(0);
|
||||
char actual = view.getReferenceBase(locus);
|
||||
char actual = view.getReferenceContext(locus).getBase();
|
||||
|
||||
Assert.assertEquals(String.format("Value of base at position %s in shard %s does not match expected", locus.toString(), shard.getGenomeLoc()),
|
||||
expected,
|
||||
|
|
|
|||
Loading…
Reference in New Issue