ByReference traversals!
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@281 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
e3ac0cb500
commit
f031d882c6
|
|
@ -16,10 +16,7 @@ import org.broadinstitute.sting.gatk.refdata.HapMapAlleleFrequenciesROD;
|
|||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraverseByReads;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraverseByLoci;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraverseByLociByReference;
|
||||
import org.broadinstitute.sting.gatk.traversals.*;
|
||||
import org.broadinstitute.sting.utils.FastaSequenceFile2;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
|
@ -35,7 +32,7 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
|||
public static GenomeAnalysisTK Instance = null;
|
||||
|
||||
// parameters and their defaults
|
||||
public File INPUT_FILE;
|
||||
public File INPUT_FILE = null;
|
||||
public String MAX_READS_ARG = "-1";
|
||||
public String STRICTNESS_ARG = "strict";
|
||||
public File REF_FILE_ARG = null;
|
||||
|
|
@ -98,7 +95,8 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
|||
* Flags don't take an argument, the associated Boolean gets set to true if the flag appears on the command line.
|
||||
*/
|
||||
protected void setupArgs() {
|
||||
m_parser.addRequiredArg("input_file", "I", "SAM or BAM file", "INPUT_FILE");
|
||||
m_parser.addOptionalArg("input_file", "I", "SAM or BAM file", "INPUT_FILE");
|
||||
//m_parser.addRequiredArg("input_file", "I", "SAM or BAM file", "INPUT_FILE");
|
||||
m_parser.addOptionalArg("maximum_reads", "M", "Maximum number of reads to process before exiting", "MAX_READS_ARG");
|
||||
m_parser.addOptionalArg("validation_strictness", "S", "How strict should we be with validation (lenient|silent|strict)", "STRICTNESS_ARG");
|
||||
m_parser.addOptionalArg("reference_sequence", "R", "Reference sequence file", "REF_FILE_ARG");
|
||||
|
|
@ -196,10 +194,21 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
|||
// Try to get the walker specified
|
||||
try {
|
||||
LocusWalker<?, ?> walker = (LocusWalker<?, ?>) my_walker;
|
||||
if ( WALK_ALL_LOCI )
|
||||
this.engine = new TraverseByLociByReference(INPUT_FILE, REF_FILE_ARG, rods);
|
||||
else
|
||||
this.engine = new TraverseByLoci(INPUT_FILE, REF_FILE_ARG, rods);
|
||||
|
||||
if ( INPUT_FILE == null ) {
|
||||
if ( walker.requiresReads() )
|
||||
Utils.scareUser(String.format("Analysis %s requires reads, but none were given", Analysis_Name));
|
||||
this.engine = new TraverseByReference(null, REF_FILE_ARG, rods);
|
||||
} else {
|
||||
if ( walker.cannotHandleReads() )
|
||||
Utils.scareUser(String.format("Analysis %s doesn't support SAM/BAM reads, but a read file %s was provided", Analysis_Name, INPUT_FILE));
|
||||
|
||||
|
||||
if ( WALK_ALL_LOCI )
|
||||
this.engine = new TraverseByLociByReference(INPUT_FILE, REF_FILE_ARG, rods);
|
||||
else
|
||||
this.engine = new TraverseByLoci(INPUT_FILE, REF_FILE_ARG, rods);
|
||||
}
|
||||
}
|
||||
catch (java.lang.ClassCastException e) {
|
||||
// I guess we're a read walker LOL
|
||||
|
|
|
|||
|
|
@ -38,7 +38,8 @@ public class TraverseByLoci extends TraversalEngine {
|
|||
public <M,T> T traverse(Walker<M,T> walker, ArrayList<GenomeLoc> locations) {
|
||||
if ( walker instanceof LocusWalker ) {
|
||||
LocusWalker<M, T> locusWalker = (LocusWalker<M, T>)walker;
|
||||
return (T)this.traverseByLoci(locusWalker, locations);
|
||||
T sum = traverseByLoci(locusWalker, locations);
|
||||
return sum;
|
||||
} else {
|
||||
throw new IllegalArgumentException("Walker isn't a loci walker!");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,110 @@
|
|||
package org.broadinstitute.sting.gatk.traversals;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.io.File;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
/**
|
||||
* A simple, short-term solution to iterating over all reference positions over a series of
|
||||
* genomic locations. Simply overloads the superclass traverse function to go over the entire
|
||||
* interval's reference positions.
|
||||
*/
|
||||
public class TraverseByReference extends TraverseByLoci {
|
||||
|
||||
public TraverseByReference(File reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
||||
super(reads, ref, rods);
|
||||
|
||||
logger.debug("Creating TraverseByReference");
|
||||
|
||||
if ( reads != null )
|
||||
throw new IllegalArgumentException("By reference traversal doesn't support reads, but file was given " + reads);
|
||||
if ( ref == null )
|
||||
throw new IllegalArgumentException("By reference traversal requires reference file but none was given");
|
||||
}
|
||||
|
||||
/**
|
||||
* Traverse by loci -- the key driver of linearly ordered traversal of loci. Provides reads, RODs, and
|
||||
* the reference base for each locus in the reference to the LocusWalker walker. Supports all of the
|
||||
* interaction contract implied by the locus walker
|
||||
*
|
||||
* @param walker A locus walker object
|
||||
* @param <M> MapType -- the result of calling map() on walker
|
||||
* @param <T> ReduceType -- the result of calling reduce() on the walker
|
||||
* @return 0 on success
|
||||
*/
|
||||
protected <M, T> T traverseByLoci(LocusWalker<M, T> walker, ArrayList<GenomeLoc> locations) {
|
||||
logger.debug("Entering traverseByReference");
|
||||
|
||||
// initialize the walker object
|
||||
walker.initialize();
|
||||
|
||||
T sum = walker.reduceInit();
|
||||
if ( ! locations.isEmpty() ) {
|
||||
logger.debug("Doing interval-based traversal");
|
||||
|
||||
// we are doing interval-based traversals
|
||||
for ( GenomeLoc interval : locations ) {
|
||||
logger.debug(String.format("Processing locus %s", interval.toString()));
|
||||
sum = carryWalkerOverReference(walker, sum, interval);
|
||||
}
|
||||
}
|
||||
else {
|
||||
// We aren't locus oriented
|
||||
logger.debug("Doing non-interval-based traversal");
|
||||
sum = carryWalkerOverReference(walker, sum, null);
|
||||
}
|
||||
|
||||
printOnTraversalDone("reference", sum);
|
||||
walker.onTraversalDone(sum);
|
||||
return sum;
|
||||
}
|
||||
|
||||
protected <M, T> T carryWalkerOverReference( LocusWalker<M, T> walker,
|
||||
T sum,
|
||||
GenomeLoc interval ) {
|
||||
logger.debug(String.format("traverseByReference.carryWalkerOverReference Genomic interval is %s", interval));
|
||||
|
||||
boolean done = false;
|
||||
|
||||
List<SAMRecord> NO_READS = new ArrayList<SAMRecord>();
|
||||
List<Integer> NO_OFFSETS = new ArrayList<Integer>();
|
||||
|
||||
ReferenceIterator refSite = null;
|
||||
if ( interval != null )
|
||||
refSite = refIter.seekForward(interval); // jump to the first reference site
|
||||
else
|
||||
refSite = refIter.next();
|
||||
|
||||
// We keep processing while the next reference location is within the interval
|
||||
while ( (interval == null || interval.containsP(refSite.getLocation())) && ! done ) {
|
||||
this.nRecords++;
|
||||
GenomeLoc current = refSite.getLocation();
|
||||
|
||||
// Iterate forward to get all reference ordered data covering this locus
|
||||
final List<ReferenceOrderedDatum> rodData = getReferenceOrderedDataAtLocus(rodIters, current);
|
||||
|
||||
LocusContext locus = new LocusContext(current, NO_READS, NO_OFFSETS); // make the empty locus that has no reads
|
||||
locus.setReferenceContig(refSite.getCurrentContig());
|
||||
sum = walkAtLocus( walker, sum, locus, refSite, rodData );
|
||||
|
||||
if (this.maxReads > 0 && this.nRecords > this.maxReads) {
|
||||
logger.warn(String.format("Maximum number of reads encountered, terminating traversal " + this.nRecords));
|
||||
done = true;
|
||||
}
|
||||
|
||||
//printProgress("ref", locus.getLocation());
|
||||
refSite = refIter.next(); // update our location
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
}
|
||||
|
|
@ -18,6 +18,13 @@ public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, R
|
|||
return true; // We are keeping all the reads
|
||||
}
|
||||
|
||||
/**
|
||||
* These two functions state whether we're don't make any sense without reads (requiresRead())
|
||||
* or whether we can't take any reads at all (cannotHandleRead())
|
||||
*/
|
||||
public boolean requiresReads() { return false; }
|
||||
public boolean cannotHandleReads() { return false; }
|
||||
|
||||
// Map over the org.broadinstitute.sting.gatk.LocusContext
|
||||
public abstract MapType map(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context);
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,18 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: mdepristo
|
||||
* Date: Feb 22, 2009
|
||||
* Time: 2:52:28 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public abstract class RefWalker<MapType, ReduceType> extends LocusWalker<MapType, ReduceType> {
|
||||
public boolean requiresReads() { return false; }
|
||||
public boolean cannotHandleReads() { return true; }
|
||||
}
|
||||
|
|
@ -29,6 +29,8 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
Random random;
|
||||
PrintStream output;
|
||||
|
||||
public boolean requiresReads() { return true; }
|
||||
|
||||
public AlleleFrequencyEstimate map(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context)
|
||||
{
|
||||
// Convert context data into bases and 4-base quals
|
||||
|
|
|
|||
|
|
@ -19,6 +19,8 @@ public class CoverageBySample extends LocusWalker<String, String>
|
|||
{
|
||||
List<String> sample_names = null;
|
||||
|
||||
public boolean requiresReads() { return true; }
|
||||
|
||||
public void initialize()
|
||||
{
|
||||
GenomeAnalysisTK toolkit = this.getToolkit();
|
||||
|
|
|
|||
|
|
@ -17,6 +17,8 @@ public class SingleSampleGenotyper extends LocusWalker<Integer, Integer> {
|
|||
return true; // We are keeping all the reads
|
||||
}
|
||||
|
||||
public boolean requiresReads() { return true; }
|
||||
|
||||
protected class GenotypeLikelihoods
|
||||
{
|
||||
public double[] likelihoods;
|
||||
|
|
|
|||
Loading…
Reference in New Issue