Modifications:

@CoverageAndPowerWalker - removed a hanging colon that was being printed after the reference position

@VariantEvalWalker - added a command line argument for pool size for eventual use in doing pooled caller evaluations. As now, the variable is unused.

@AlignmentContext - altered the scope of class variables from private to protected in order that child objects might have access to them


New Additions:

Filtered Contexts

Sometimes we want to filter or partition reads by some aspect (quality score, read direction, current base, whatever) and use only those reads as
part of the alignment context. Prior to this I've been doing the split externally and creating a new AlignmentContext object. This new approach makes
it a bit easier, as each of these objects are children of AlignmentContext, and can be instantiated from a "raw" AlignmentContext.

@FilteredAlignmentContext is an abstract class that defines the behavior. The abstract method 'filter' is called on the input AlignmentContext, filtering
those reads and offsets by whatever you can think of. The filtered reads/offsets are then maintained in the reads and offsets fields. These classes can
be passed around as AlignmentContexts themselves. Writing a new kind of read-filtered alignment context boils down to implementing the filter method.

@ReverseReadsContext - a FilteredAlignmentContext that takes only reads in the reverse direction

@ForwardReadsContext - a FilteredAlignmentContext that takes only reads in the forward direction

@QualityScoreThresholdContext - a FilteredAlignmentContext that takes only reads above a given quality score threshold (defaults to 22 if none provided).

A unit test bamfile and associated unit tests for these are in the works.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1559 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2009-09-09 15:49:52 +00:00
parent d9588e6083
commit 9d69bd2c84
7 changed files with 175 additions and 4 deletions

View File

@ -41,9 +41,16 @@ import java.util.*;
* To change this template use File | Settings | File Templates.
*/
public class AlignmentContext {
private GenomeLoc loc = null;
private List<SAMRecord> reads = null;
private List<Integer> offsets = null;
protected GenomeLoc loc = null;
protected List<SAMRecord> reads = null;
protected List<Integer> offsets = null;
/**
* Default constructor for AlignmentContext object
* since private objects are already set to null we
* don't need to do anything
*/
public AlignmentContext() { /* private objects already set to null */ }
/**
* Create a new AlignmentContext object

View File

@ -0,0 +1,41 @@
package org.broadinstitute.sting.playground.contexts;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.Pair;
import java.util.List;
import net.sf.samtools.SAMRecord;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: Sep 9, 2009
* Time: 10:43:23 AM
* To change this template use File | Settings | File Templates.
*/
public abstract class FilteredAlignmentContext extends AlignmentContext{
public FilteredAlignmentContext() { /* super method is called */ }
/* A partitioned alignment context must have a constructor
* method that generates the object from another alignment
* context
*/
public FilteredAlignmentContext(AlignmentContext context) {
Pair<List<SAMRecord>, List<Integer>> partitionedReads = filter(context);
this.reads = partitionedReads.getFirst();
this.offsets = partitionedReads.getSecond();
this.loc = context.getLocation();
}
/*
* A new partitioned alignment object need only specify how the reads from an Alignmentcontext
* are to be partitioned, and return the new partition in a pair.
* @Param: context - an alignment context containing reads to be partitioned
*/
public abstract Pair<List<SAMRecord>, List<Integer>> filter(AlignmentContext context);
}

View File

@ -0,0 +1,34 @@
package org.broadinstitute.sting.playground.contexts;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import net.sf.samtools.SAMRecord;
import java.util.List;
import java.util.ArrayList;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: Sep 9, 2009
* Time: 11:01:53 AM
* To change this template use File | Settings | File Templates.
*/
public class ForwardReadsContext extends FilteredAlignmentContext {
public Pair<List<SAMRecord>,List<Integer>> filter(AlignmentContext context) {
List<SAMRecord> inReads = context.getReads();
List<Integer> inOffsets = context.getOffsets();
List<SAMRecord> filteredReads = new ArrayList<SAMRecord>();
List<Integer> filteredOffsets = new ArrayList<Integer>();
for( int i = 0; i < inReads.size(); i++ ) {
if( ! inReads.get(i).getReadNegativeStrandFlag() ) {
filteredReads.add(inReads.get(i));
filteredOffsets.add(inOffsets.get(i));
}
}
return new Pair<List<SAMRecord>,List<Integer>>(filteredReads,filteredOffsets);
}
}

View File

@ -0,0 +1,51 @@
package org.broadinstitute.sting.playground.contexts;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.Pair;
import java.util.List;
import java.util.ArrayList;
import net.sf.samtools.SAMRecord;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: Sep 9, 2009
* Time: 11:17:30 AM
* To change this template use File | Settings | File Templates.
*/
public class QualityScoreThresholdContext extends FilteredAlignmentContext{
/*
* @Param: qThresh - default value for thresholding
*/
protected byte qThresh = 22;
public QualityScoreThresholdContext(AlignmentContext context, byte qThresh) {
this.qThresh = qThresh;
Pair<List<SAMRecord>, List<Integer>> filteredRO = filter(context);
this.reads = filteredRO.getFirst();
this.offsets = filteredRO.getSecond();
this.loc = context.getLocation();
}
public byte getQualityScoreThreshold() {
return this.qThresh;
}
public Pair<List<SAMRecord>,List<Integer>> filter(AlignmentContext context) {
List<SAMRecord> inReads = context.getReads();
List<Integer> inOffsets = context.getOffsets();
List<SAMRecord> outReads = new ArrayList<SAMRecord>();
List<Integer> outOffsets = new ArrayList<Integer>();
for( int i = 0; i < inReads.size(); i++) {
if(inReads.get(i).getBaseQualities()[inOffsets.get(i)] >= this.qThresh) {
outReads.add(inReads.get(i));
outOffsets.add(inOffsets.get(i));
}
}
return new Pair<List<SAMRecord>,List<Integer>>(outReads,outOffsets);
}
}

View File

@ -0,0 +1,35 @@
package org.broadinstitute.sting.playground.contexts;
import net.sf.samtools.SAMRecord;
import java.util.List;
import java.util.ArrayList;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: Sep 9, 2009
* Time: 11:09:32 AM
* To change this template use File | Settings | File Templates.
*/
public class ReverseReadsContext extends FilteredAlignmentContext {
public Pair<List<SAMRecord>,List<Integer>> filter(AlignmentContext context) {
List<SAMRecord> inReads = context.getReads();
List<Integer> inOffsets = context.getOffsets();
List<SAMRecord> filteredReads = new ArrayList<SAMRecord>();
List<Integer> filteredOffsets = new ArrayList<Integer>();
for( int i = 0; i < inReads.size(); i++ ) {
if( inReads.get(i).getReadNegativeStrandFlag() ) {
filteredReads.add(inReads.get(i));
filteredOffsets.add(inOffsets.get(i));
}
}
return new Pair<List<SAMRecord>,List<Integer>>(filteredReads,filteredOffsets);
}
}

View File

@ -71,7 +71,7 @@ public class CoverageAndPowerWalker extends LocusWalker<Pair<Integer, Integer>,
Pair<Pair<List<SAMRecord>,List<SAMRecord>>,Pair<List<Integer>,List<Integer>>> readsByDirection = PoolUtils.splitReadsByReadDirection(filteredContext.getReads(),filteredContext.getOffsets());
if ( ! suppress_printing) {
Pair<double[],byte[]> powers = calculatePower(readsByDirection, useBootstrap, filteredContext);
out.printf("%s: %d %d %d %d %d %d %f %f %f%n", filteredContext.getLocation(), readsByDirection.getFirst().getFirst().size(), readsByDirection.getFirst().getSecond().size(),
out.printf("%s %d %d %d %d %d %d %f %f %f%n", filteredContext.getLocation(), readsByDirection.getFirst().getFirst().size(), readsByDirection.getFirst().getSecond().size(),
filteredContext.getReads().size(), powers.getSecond()[0], powers.getSecond()[1], powers.getSecond()[2],
powers.getFirst()[0], powers.getFirst()[1], powers.getFirst()[2]);
}

View File

@ -55,6 +55,9 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
@Argument(fullName="supressDateInformation", doc="This flag indicates that we want to suppress the date information from the output, so that if can be diff'ed against previous evals.", required=false)
public boolean supressDateInformation = false;
@Argument(fullName = "numPeopleInPool", shortName="S", doc="If using a variant file from a pooled caller, this field provides the number of individuals in each pool", required=false)
public int numPeopleInPool = 1;
String analysisFilenameBase = null;
final String knownSNPDBName = "dbSNP";