Updating the core LocusWalker tools to include the Sample infrastructure that I added last month. This commit touches a lot of files, but only significantly changes a few: LocusIteratorByState and ReadBackedPileup and associated classes.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4711 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
bthomas 2010-11-19 19:59:05 +00:00
parent c723db1f4b
commit 374c0deba2
26 changed files with 325 additions and 156 deletions

View File

@ -320,6 +320,8 @@ public abstract class AbstractGenomeAnalysisEngine {
validateSuppliedReads();
readsDataSource = createReadsDataSource(genomeLocParser);
sampleDataSource = new SampleDataSource(getSAMFileHeader(), argCollection.sampleFiles);
for (SamRecordFilter filter : filters)
if (filter instanceof SamRecordHeaderFilter)
((SamRecordHeaderFilter)filter).setHeader(this.getSAMFileHeader());
@ -744,6 +746,10 @@ public abstract class AbstractGenomeAnalysisEngine {
return unpackedReads;
}
public SampleDataSource getSampleMetadata() {
return this.sampleDataSource;
}
/**
* Get a sample by its ID
* If an alias is passed in, return the main sample object
@ -802,6 +808,14 @@ public abstract class AbstractGenomeAnalysisEngine {
return sampleDataSource.getChildren(sample);
}
/**
* Gets all the samples
* @return
*/
public Collection<Sample> getSamples() {
return sampleDataSource.getSamples();
}
/**
* Takes a list of sample names and returns their corresponding sample objects
*

View File

@ -25,6 +25,8 @@
package org.broadinstitute.sting.gatk.contexts;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -84,7 +86,7 @@ public class StratifiedAlignmentContext<RBP extends ReadBackedPileup> {
* @return a Map of sample name to StratifiedAlignmentContext
*
**/
public static <RBP extends ReadBackedPileup,PE extends PileupElement> Map<String, StratifiedAlignmentContext> splitContextBySample(RBP pileup) {
public static <RBP extends ReadBackedPileup,PE extends PileupElement> Map<Sample, StratifiedAlignmentContext> splitContextBySample(RBP pileup) {
return splitContextBySample(pileup, null);
}
@ -97,20 +99,20 @@ public class StratifiedAlignmentContext<RBP extends ReadBackedPileup> {
* @return a Map of sample name to StratifiedAlignmentContext
*
**/
public static <RBP extends ReadBackedPileup> Map<String, StratifiedAlignmentContext> splitContextBySample(RBP pileup, String assumedSingleSample) {
public static <RBP extends ReadBackedPileup> Map<Sample, StratifiedAlignmentContext> splitContextBySample(RBP pileup, Sample assumedSingleSample) {
GenomeLoc loc = pileup.getLocation();
HashMap<String, StratifiedAlignmentContext> contexts = new HashMap<String, StratifiedAlignmentContext>();
HashMap<Sample, StratifiedAlignmentContext> contexts = new HashMap<Sample, StratifiedAlignmentContext>();
for(String sampleName: pileup.getSamples()) {
RBP pileupBySample = (RBP)pileup.getPileupForSample(sampleName);
for(Sample sample: pileup.getSamples()) {
RBP pileupBySample = (RBP)pileup.getPileupForSample(sample);
// Don't add empty pileups to the split context.
if(pileupBySample.size() == 0)
continue;
if(sampleName != null)
contexts.put(sampleName,new StratifiedAlignmentContext<RBP>(loc,pileupBySample));
if(sample != null)
contexts.put(sample,new StratifiedAlignmentContext<RBP>(loc,pileupBySample));
else {
if(assumedSingleSample == null) {
throw new UserException.ReadMissingReadGroup(pileupBySample.iterator().next().getRead());
@ -122,7 +124,48 @@ public class StratifiedAlignmentContext<RBP extends ReadBackedPileup> {
return contexts;
}
/**
public static <RBP extends ReadBackedPileup,PE extends PileupElement> Map<String, StratifiedAlignmentContext> splitContextBySampleName(RBP pileup) {
return splitContextBySampleName(pileup, null);
}
/**
* Splits the given AlignmentContext into a StratifiedAlignmentContext per sample, but referencd by sample name instead
* of sample object.
*
* @param pileup the original pileup
*
* @return a Map of sample name to StratifiedAlignmentContext
*
**/
public static <RBP extends ReadBackedPileup> Map<String, StratifiedAlignmentContext> splitContextBySampleName(RBP pileup, String assumedSingleSample) {
GenomeLoc loc = pileup.getLocation();
HashMap<String, StratifiedAlignmentContext> contexts = new HashMap<String, StratifiedAlignmentContext>();
for(String sample: pileup.getSampleNames()) {
RBP pileupBySample = (RBP)pileup.getPileupForSampleName(sample);
// Don't add empty pileups to the split context.
if(pileupBySample.size() == 0)
continue;
if(sample != null)
contexts.put(sample,new StratifiedAlignmentContext<RBP>(loc,pileupBySample));
else {
if(assumedSingleSample == null) {
throw new UserException.ReadMissingReadGroup(pileupBySample.iterator().next().getRead());
}
contexts.put(assumedSingleSample,new StratifiedAlignmentContext<RBP>(loc,pileupBySample));
}
}
return contexts;
}
/**
* Splits the given AlignmentContext into a StratifiedAlignmentContext per read group.
*
* @param pileup the original pileup
@ -131,14 +174,14 @@ public class StratifiedAlignmentContext<RBP extends ReadBackedPileup> {
*
**/
public static <RBP extends ReadBackedPileup> Map<String,StratifiedAlignmentContext<RBP>> splitContextByReadGroup(RBP pileup) {
HashMap<String,StratifiedAlignmentContext<RBP>> contexts = new HashMap<String,StratifiedAlignmentContext<RBP>>();
for(String readGroupId: pileup.getReadGroups())
contexts.put(readGroupId,new StratifiedAlignmentContext<RBP>(pileup.getLocation(),(RBP)pileup.getPileupForReadGroup(readGroupId)));
return contexts;
HashMap<String,StratifiedAlignmentContext<RBP>> contexts = new HashMap<String,StratifiedAlignmentContext<RBP>>();
for(String readGroupId: pileup.getReadGroups())
contexts.put(readGroupId,new StratifiedAlignmentContext<RBP>(pileup.getLocation(),(RBP)pileup.getPileupForReadGroup(readGroupId)));
return contexts;
}
public static AlignmentContext joinContexts(Collection<StratifiedAlignmentContext> contexts) {
// validation
GenomeLoc loc = contexts.iterator().next().getLocation();
boolean isExtended = contexts.iterator().next().basePileup instanceof ReadBackedExtendedEventPileup;

View File

@ -194,6 +194,6 @@ public class Sample implements java.io.Serializable {
@Override
public int hashCode() {
return id.hashCode();
return id != null ? id.hashCode() : "".hashCode();
}
}

View File

@ -37,13 +37,13 @@ public class SampleDataSource {
/**
* SAMFileHeader that has been created for this analysis.
*/
private final SAMFileHeader header;
private SAMFileHeader header;
/**
* This is where Sample objects are stored. Samples are usually accessed by their ID, which is unique, so
* this is stored as a HashMap.
*/
private HashMap<String, Sample> samples = new HashMap<String, Sample>();
private final HashMap<String, Sample> samples = new HashMap<String, Sample>();
/**
* Samples can have "aliases", because sometimes the same sample is referenced by different IDs in different
@ -65,6 +65,7 @@ public class SampleDataSource {
* @param sampleFiles Sample files that were included on the command line
*/
public SampleDataSource(SAMFileHeader header, List<File> sampleFiles) {
this();
this.header = header;
// create empty sample object for each sample referenced in the SAM header
for (String sampleName : SampleUtils.getSAMFileSamples(header)) {
@ -83,10 +84,14 @@ public class SampleDataSource {
}
}
public SampleDataSource() {
samples.put(null, new Sample(null));
}
/**
* Hallucinates sample objects for all the samples in the SAM file and stores them
*/
private void getSamplesFromSAMFile() {
public void addSamplesFromSAMHeader(SAMFileHeader header) {
for (String sampleName : SampleUtils.getSAMFileSamples(header)) {
if (!hasSample(sampleName)) {
Sample newSample = new Sample(sampleName);
@ -100,7 +105,7 @@ public class SampleDataSource {
* Parse one sample file and integrate it with samples that are already there
* Fail quickly if we find any errors in the file
*/
private void addFile(File sampleFile) {
public void addFile(File sampleFile) {
BufferedReader reader;
try {
@ -466,6 +471,12 @@ public class SampleDataSource {
return children;
}
public Set<Sample> getSamples() {
HashSet<Sample> set = new HashSet<Sample>();
set.addAll(samples.values());
return set;
}
/**
* Takes a collection of sample names and returns their corresponding sample objects
* Note that, since a set is returned, if you pass in a list with duplicates names there will not be any duplicates in the returned set
@ -572,4 +583,9 @@ public class SampleDataSource {
return context.subContextFromGenotypes(genotypes.values());
}
public static SampleDataSource createEmptyDataSource() {
SAMFileHeader header = new SAMFileHeader();
return new SampleDataSource(header, null);
}
}

View File

@ -53,7 +53,7 @@ public class LinearMicroScheduler extends MicroScheduler {
// New experimental code for managing locus intervals.
if(shard.getShardType() == Shard.ShardType.LOCUS) {
LocusWalker lWalker = (LocusWalker)walker;
WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(), getReadIterator(shard), shard.getGenomeLocs(), lWalker.getDiscards());
WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(), getReadIterator(shard), shard.getGenomeLocs(), lWalker.getDiscards(), engine.getSampleMetadata());
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
ShardDataProvider dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),engine.getGenomeLocParser(),iterator.getLocus(),iterator,reference,rods);
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());

View File

@ -61,7 +61,7 @@ public class ShardTraverser implements Callable {
Object accumulator = walker.reduceInit();
LocusWalker lWalker = (LocusWalker)walker;
WindowMaker windowMaker = new WindowMaker(shard,microScheduler.getEngine().getGenomeLocParser(),microScheduler.getReadIterator(shard),shard.getGenomeLocs(),lWalker.getDiscards());
WindowMaker windowMaker = new WindowMaker(shard,microScheduler.getEngine().getGenomeLocParser(),microScheduler.getReadIterator(shard),shard.getGenomeLocs(),lWalker.getDiscards(), microScheduler.engine.getSampleMetadata()); // todo: microScheduler.engine is protected - is it okay to user it here?
ShardDataProvider dataProvider = null;
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {

View File

@ -1,5 +1,8 @@
package org.broadinstitute.sting.gatk.executive;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.gatk.iterators.*;
import org.broadinstitute.sting.gatk.ReadProperties;
@ -50,13 +53,16 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
* the given intervals.
* @param iterator The data source for this window.
* @param intervals The set of intervals over which to traverse.
* @param sampleData SampleDataSource that we can reference reads with
* @param discards a filter at that indicates read position relative to some locus?
*/
public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals, List<LocusIteratorFilter> discards ) {
public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals, List<LocusIteratorFilter> discards, SampleDataSource sampleData ) {
this.sourceInfo = shard.getReadProperties();
this.readIterator = iterator;
LocusIterator locusIterator = new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,discards);
LocusIterator locusIterator = new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleData,discards);
this.sourceIterator = new PeekableIterator<AlignmentContext>(locusIterator);
this.intervalIterator = intervals.size()>0 ? new PeekableIterator<GenomeLoc>(intervals.iterator()) : null;

View File

@ -33,6 +33,8 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.DownsamplingMethod;
import org.broadinstitute.sting.gatk.DownsampleType;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.*;
@ -65,8 +67,11 @@ public class LocusIteratorByState extends LocusIterator {
* Used to create new GenomeLocs.
*/
private final GenomeLocParser genomeLocParser;
private final Collection<String> sampleNames = new ArrayList<String>();
private final SampleDataSource sampleData;
private final ArrayList<Sample> samples;
private final ReadStateManager readStates;
static private class SAMRecordState {
@ -273,19 +278,23 @@ public class LocusIteratorByState extends LocusIterator {
// constructors and other basic operations
//
// -----------------------------------------------------------------------------------------------------------------
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser ) {
this(samIterator, readInformation, genomeLocParser, NO_FILTERS);
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, SampleDataSource sampleData ) {
this(samIterator, readInformation, genomeLocParser, sampleData, NO_FILTERS);
}
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, List<LocusIteratorFilter> filters ) {
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, SampleDataSource sampleData, List<LocusIteratorFilter> filters ) {
this.readInfo = readInformation;
this.genomeLocParser = genomeLocParser;
this.filters = filters;
// Aggregate all sample names.
sampleNames.addAll(SampleUtils.getSAMFileSamples(readInfo.getHeader()));
// Add a null sample name as a catch-all for reads without samples
if(!sampleNames.contains(null)) sampleNames.add(null);
readStates = new ReadStateManager(samIterator,readInformation.getDownsamplingMethod(),sampleNames);
// set up the sample data
this.sampleData = sampleData;
this.samples = new ArrayList<Sample>();
this.samples.addAll(sampleData.getSamples());
readStates = new ReadStateManager(samIterator,readInformation.getDownsamplingMethod(),sampleData);
}
public Iterator<AlignmentContext> iterator() {
@ -303,8 +312,8 @@ public class LocusIteratorByState extends LocusIterator {
}
public void printState() {
for(String sampleName: sampleNames) {
Iterator<SAMRecordState> iterator = readStates.iteratorForSample(sampleName);
for(Sample sample: samples) {
Iterator<SAMRecordState> iterator = readStates.iteratorForSample(sample);
while(iterator.hasNext()) {
SAMRecordState state = iterator.next();
logger.debug(String.format("printState():"));
@ -354,8 +363,8 @@ public class LocusIteratorByState extends LocusIterator {
// In this case, the subsequent call to next() will emit the normal pileup at the current base
// and shift the position.
if (readInfo.generateExtendedEvents() && hasExtendedEvents) {
Map<String,ReadBackedExtendedEventPileupImpl> fullExtendedEventPileup =
new HashMap<String,ReadBackedExtendedEventPileupImpl>();
Map<Sample,ReadBackedExtendedEventPileupImpl> fullExtendedEventPileup =
new HashMap<Sample,ReadBackedExtendedEventPileupImpl>();
SAMRecordState our1stState = readStates.getFirst();
// get current location on the reference and decrement it by 1: the indels we just stepped over
@ -363,10 +372,10 @@ public class LocusIteratorByState extends LocusIterator {
GenomeLoc loc = genomeLocParser.incPos(our1stState.getLocation(genomeLocParser),-1);
boolean hasBeenSampled = false;
for(String sampleName: sampleNames) {
Iterator<SAMRecordState> iterator = readStates.iteratorForSample(sampleName);
for(Sample sample: samples) {
Iterator<SAMRecordState> iterator = readStates.iteratorForSample(sample);
ArrayList<ExtendedEventPileupElement> indelPile = new ArrayList<ExtendedEventPileupElement>(readStates.size());
hasBeenSampled |= loc.getStart() <= readStates.readStatesBySample.get(sampleName).getDownsamplingExtent();
hasBeenSampled |= loc.getStart() <= readStates.readStatesBySample.get(sample).getDownsamplingExtent();
size = 0;
nDeletions = 0;
@ -419,7 +428,7 @@ public class LocusIteratorByState extends LocusIterator {
nMQ0Reads++;
}
}
if( indelPile.size() != 0 ) fullExtendedEventPileup.put(sampleName,new ReadBackedExtendedEventPileupImpl(loc,indelPile,size,maxDeletionLength,nInsertions,nDeletions,nMQ0Reads));
if( indelPile.size() != 0 ) fullExtendedEventPileup.put(sample,new ReadBackedExtendedEventPileupImpl(loc,indelPile,size,maxDeletionLength,nInsertions,nDeletions,nMQ0Reads));
}
hasExtendedEvents = false; // we are done with extended events prior to current ref base
// System.out.println("Indel(s) at "+loc);
@ -427,14 +436,14 @@ public class LocusIteratorByState extends LocusIterator {
nextAlignmentContext = new AlignmentContext(loc, new ReadBackedExtendedEventPileupImpl(loc, fullExtendedEventPileup), hasBeenSampled);
} else {
GenomeLoc location = getLocation();
Map<String,ReadBackedPileupImpl> fullPileup = new HashMap<String,ReadBackedPileupImpl>();
Map<Sample,ReadBackedPileupImpl> fullPileup = new HashMap<Sample,ReadBackedPileupImpl>();
// todo -- performance problem -- should be lazy, really
boolean hasBeenSampled = false;
for(String sampleName: sampleNames) {
Iterator<SAMRecordState> iterator = readStates.iteratorForSample(sampleName);
for(Sample sample: samples) {
Iterator<SAMRecordState> iterator = readStates.iteratorForSample(sample);
ArrayList<PileupElement> pile = new ArrayList<PileupElement>(readStates.size());
hasBeenSampled |= location.getStart() <= readStates.readStatesBySample.get(sampleName).getDownsamplingExtent();
hasBeenSampled |= location.getStart() <= readStates.readStatesBySample.get(sample).getDownsamplingExtent();
size = 0;
nDeletions = 0;
@ -464,7 +473,7 @@ public class LocusIteratorByState extends LocusIterator {
nMQ0Reads++;
}
}
if( pile.size() != 0 ) fullPileup.put(sampleName,new ReadBackedPileupImpl(location,pile,size,nDeletions,nMQ0Reads));
if( pile.size() != 0 ) fullPileup.put(sample,new ReadBackedPileupImpl(location,pile,size,nDeletions,nMQ0Reads));
}
updateReadStates(); // critical - must be called after we get the current state offsets and location
@ -507,8 +516,8 @@ public class LocusIteratorByState extends LocusIterator {
// }
private void updateReadStates() {
for(String sampleName: sampleNames) {
Iterator<SAMRecordState> it = readStates.iteratorForSample(sampleName);
for(Sample sample: samples) {
Iterator<SAMRecordState> it = readStates.iteratorForSample(sample);
while ( it.hasNext() ) {
SAMRecordState state = it.next();
CigarOperator op = state.stepForwardOnGenome();
@ -536,13 +545,13 @@ public class LocusIteratorByState extends LocusIterator {
private final SamplePartitioner samplePartitioner;
private final Map<String,PerSampleReadStateManager> readStatesBySample = new HashMap<String,PerSampleReadStateManager>();
private final Map<Sample,PerSampleReadStateManager> readStatesBySample = new HashMap<Sample,PerSampleReadStateManager>();
private final int targetCoverage;
private int totalReadStates = 0;
public ReadStateManager(Iterator<SAMRecord> source, DownsamplingMethod downsamplingMethod, Collection<String> sampleNames) {
public ReadStateManager(Iterator<SAMRecord> source, DownsamplingMethod downsamplingMethod, SampleDataSource sampleData) {
this.iterator = new PeekableIterator<SAMRecord>(source);
this.downsamplingMethod = downsamplingMethod.type != null ? downsamplingMethod : DownsamplingMethod.NONE;
switch(this.downsamplingMethod.type) {
@ -556,17 +565,17 @@ public class LocusIteratorByState extends LocusIterator {
}
Map<String,ReadSelector> readSelectors = new HashMap<String,ReadSelector>();
for(String sampleName: sampleNames) {
readStatesBySample.put(sampleName,new PerSampleReadStateManager());
readSelectors.put(sampleName,downsamplingMethod.type == DownsampleType.BY_SAMPLE ? new NRandomReadSelector(null,targetCoverage) : new AllReadsSelector());
for(Sample sample: samples) {
readStatesBySample.put(sample,new PerSampleReadStateManager());
readSelectors.put(sample.getId(),downsamplingMethod.type == DownsampleType.BY_SAMPLE ? new NRandomReadSelector(null,targetCoverage) : new AllReadsSelector());
}
samplePartitioner = new SamplePartitioner(readSelectors);
}
public Iterator<SAMRecordState> iteratorForSample(final String sampleName) {
public Iterator<SAMRecordState> iteratorForSample(final Sample sample) {
return new Iterator<SAMRecordState>() {
private Iterator<SAMRecordState> wrappedIterator = readStatesBySample.get(sampleName).iterator();
private Iterator<SAMRecordState> wrappedIterator = readStatesBySample.get(sample).iterator();
public boolean hasNext() {
return wrappedIterator.hasNext();
@ -592,8 +601,8 @@ public class LocusIteratorByState extends LocusIterator {
}
public SAMRecordState getFirst() {
for(String sampleName: sampleNames) {
PerSampleReadStateManager reads = readStatesBySample.get(sampleName);
for(Sample sample: samples) {
PerSampleReadStateManager reads = readStatesBySample.get(sample);
if(!reads.isEmpty())
return reads.peek();
}
@ -626,12 +635,12 @@ public class LocusIteratorByState extends LocusIterator {
}
samplePartitioner.complete();
for(String sampleName: sampleNames) {
ReadSelector aggregator = samplePartitioner.getSelectedReads(sampleName);
for(Sample sample: samples) {
ReadSelector aggregator = samplePartitioner.getSelectedReads(sample.getId());
Collection<SAMRecord> newReads = new ArrayList<SAMRecord>(aggregator.getSelectedReads());
PerSampleReadStateManager statesBySample = readStatesBySample.get(sampleName);
PerSampleReadStateManager statesBySample = readStatesBySample.get(sample);
int numReads = statesBySample.size();
int downsamplingExtent = aggregator.getDownsamplingExtent();
@ -998,6 +1007,9 @@ class NRandomReadSelector implements ReadSelector {
}
}
/**
* Note: stores reads by sample ID string, not by sample object
*/
class SamplePartitioner implements ReadSelector {
private final Map<String,ReadSelector> readsBySample;
private long readsSeen = 0;

View File

@ -211,9 +211,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
Map<String, StratifiedAlignmentContext> stratifiedContexts;
if ( BaseUtils.simpleBaseToBaseIndex(ref.getBase()) != -1 ) {
if ( ! context.hasExtendedEventPileup() ) {
stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup(), ASSUME_SINGLE_SAMPLE);
stratifiedContexts = StratifiedAlignmentContext.splitContextBySampleName(context.getBasePileup(), ASSUME_SINGLE_SAMPLE);
} else {
stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context.getExtendedEventPileup(), ASSUME_SINGLE_SAMPLE);
stratifiedContexts = StratifiedAlignmentContext.splitContextBySampleName(context.getExtendedEventPileup(), ASSUME_SINGLE_SAMPLE);
}
if ( stratifiedContexts != null ) {
annotatedVCs = engine.annotateContext(tracker, ref, stratifiedContexts, vc);

View File

@ -249,7 +249,7 @@ public class GenomicAnnotator extends RodWalker<Integer, Integer> implements Tre
(vc.isVariant() && !vc.isBiallelic()) ) {
results.add(vc);
} else {
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup());
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySampleName(context.getBasePileup());
if ( stratifiedContexts != null )
results.addAll(engine.annotateContext(tracker, ref, stratifiedContexts, vc));
else

View File

@ -150,7 +150,7 @@ public class UnifiedGenotyperEngine {
return null;
// stratify the AlignmentContext and cut by sample
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(pileup, UAC.ASSUME_SINGLE_SAMPLE);
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE);
if ( stratifiedContexts == null )
return null;
@ -177,7 +177,7 @@ public class UnifiedGenotyperEngine {
return null;
// stratify the AlignmentContext and cut by sample
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(pileup, UAC.ASSUME_SINGLE_SAMPLE);
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE);
if ( stratifiedContexts == null )
return null;
@ -187,7 +187,7 @@ public class UnifiedGenotyperEngine {
// annotate the call, if possible
if ( call != null && call.vc != null && annotationEngine != null ) {
// first off, we want to use the *unfiltered* context for the annotations
stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(rawContext.getBasePileup(), UAC.ASSUME_SINGLE_SAMPLE);
stratifiedContexts = StratifiedAlignmentContext.splitContextBySampleName(rawContext.getBasePileup(), UAC.ASSUME_SINGLE_SAMPLE);
Collection<VariantContext> variantContexts = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, call.vc);
call.vc = variantContexts.iterator().next(); //We know the collection will always have exactly 1 element.

View File

@ -32,6 +32,7 @@ import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
@ -1025,14 +1026,14 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
// filter the read-base pileup based on min base and mapping qualities:
pileup = pileup.getBaseAndMappingFilteredPileup(MIN_BASE_QUALITY_SCORE, MIN_MAPPING_QUALITY_SCORE);
if (pileup != null) {
for (String samp : pileup.getSamples()) {
ReadBackedPileup samplePileup = pileup.getPileupForSample(samp);
for (Sample sample : pileup.getSamples()) {
ReadBackedPileup samplePileup = pileup.getPileupForSample(sample);
ReadBasesAtPosition readBases = new ReadBasesAtPosition();
for (PileupElement p : samplePileup) {
if (!p.isDeletion()) // IGNORE deletions for now
readBases.putReadBase(p);
}
sampleReadBases.put(samp, readBases);
sampleReadBases.put(sample.getId(), readBases);
}
}
}

View File

@ -70,7 +70,7 @@ public class AlleleBalanceHistogramWalker extends LocusWalker<Map<String,Double>
}
private HashMap<String,Double> getAlleleBalanceBySample(VariantContext vc, ReferenceContext ref, AlignmentContext context) {
Map<String, StratifiedAlignmentContext> sampleContext = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup(),null);
Map<String, StratifiedAlignmentContext> sampleContext = StratifiedAlignmentContext.splitContextBySampleName(context.getBasePileup(),null);
HashMap<String,Double> balances = new HashMap<String,Double>();
System.out.println("----- "+ref.getLocus()+" -----");
int returnedBalances = 0;

View File

@ -467,7 +467,7 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
throw new ReviewedStingException("Parental bases have length zero at "+trio.toString());
}
Map<String,StratifiedAlignmentContext> splitContext = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup());
Map<String,StratifiedAlignmentContext> splitContext = StratifiedAlignmentContext.splitContextBySampleName(context.getBasePileup());
Double proportion = getAlleleProportion(parental,splitContext.get(trioStructure.child));
if ( proportion != null ) {
violation.addAttribute(MendelianInfoKey.ProportionOfParentAllele.getKey(), proportion);
@ -501,7 +501,7 @@ public class MendelianViolationClassifier extends LocusWalker<MendelianViolation
// look for tri-allelic sites mis-called as hom -- as a speedup we do this only at non-filtered, non genotype error sites
if ( ! trio.isFiltered() ) {
Map<String,StratifiedAlignmentContext> splitCon = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup());
Map<String,StratifiedAlignmentContext> splitCon = StratifiedAlignmentContext.splitContextBySampleName(context.getBasePileup());
Pair<Allele,Integer> triAl = getTriAllelicQuality(tracker, ref, trio, splitCon);
if ( triAl != null ) {
violation.addAttribute(MendelianInfoKey.TriAllelicBase.getKey(),triAl.first.toString());

View File

@ -300,7 +300,7 @@ public class UnifiedGenotyperEngine {
else if (rawContext.hasBasePileup())
pileup = rawContext.getBasePileup();
stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(pileup, UAC.ASSUME_SINGLE_SAMPLE);
stratifiedContexts = StratifiedAlignmentContext.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE);
Collection<VariantContext> variantContexts = annotationEngine.annotateContext(tracker, refContext, stratifiedContexts, vc);
vc = variantContexts.iterator().next(); //We know the collection will always have exactly 1 element.
@ -330,7 +330,7 @@ public class UnifiedGenotyperEngine {
return null;
// stratify the AlignmentContext and cut by sample
stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(pileup, UAC.ASSUME_SINGLE_SAMPLE);
stratifiedContexts = StratifiedAlignmentContext.splitContextBySampleName(pileup, UAC.ASSUME_SINGLE_SAMPLE);
} else if ( UAC.GLmodel == GenotypeLikelihoodsCalculationModel.Model.SNP && !rawContext.hasExtendedEventPileup() ) {
@ -339,7 +339,7 @@ public class UnifiedGenotyperEngine {
return null;
// stratify the AlignmentContext and cut by sample
stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(rawContext.getBasePileup(), UAC.ASSUME_SINGLE_SAMPLE);
stratifiedContexts = StratifiedAlignmentContext.splitContextBySampleName(rawContext.getBasePileup(), UAC.ASSUME_SINGLE_SAMPLE);
// filter the reads (and test for bad pileups)
if ( !filterPileup(stratifiedContexts, badBaseFilter) )

View File

@ -24,6 +24,7 @@
package org.broadinstitute.sting.utils.pileup;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.BaseUtils;
@ -32,6 +33,7 @@ import org.broadinstitute.sting.gatk.iterators.IterableIterator;
import java.util.*;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.exceptions.StingException;
/**
* A generic implementation of read-backed pileups.
@ -113,10 +115,10 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
calculateCachedData();
}
protected AbstractReadBackedPileup(GenomeLoc loc, Map<String,? extends AbstractReadBackedPileup<RBP,PE>> pileupsBySample) {
protected AbstractReadBackedPileup(GenomeLoc loc, Map<Sample,? extends AbstractReadBackedPileup<RBP,PE>> pileupsBySample) {
this.loc = loc;
PerSamplePileupElementTracker<PE> tracker = new PerSamplePileupElementTracker<PE>();
for(Map.Entry<String,? extends AbstractReadBackedPileup<RBP,PE>> pileupEntry: pileupsBySample.entrySet()) {
for(Map.Entry<Sample,? extends AbstractReadBackedPileup<RBP,PE>> pileupEntry: pileupsBySample.entrySet()) {
tracker.addElements(pileupEntry.getKey(),pileupEntry.getValue().pileupElementTracker);
addPileupToCumulativeStats(pileupEntry.getValue());
}
@ -212,10 +214,10 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
for(String sampleName: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sampleName);
for(Sample sample: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getPileupWithoutDeletions();
filteredTracker.addElements(sampleName,pileup.pileupElementTracker);
filteredTracker.addElements(sample,pileup.pileupElementTracker);
}
return (RBP)createNewPileup(loc,filteredTracker);
@ -250,10 +252,10 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
for(String sampleName: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sampleName);
for(Sample sample: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getOverlappingFragmentFilteredPileup();
filteredTracker.addElements(sampleName,pileup.pileupElementTracker);
filteredTracker.addElements(sample,pileup.pileupElementTracker);
}
return (RBP)createNewPileup(loc,filteredTracker);
}
@ -304,10 +306,10 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
for(String sampleName: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sampleName);
for(Sample sample: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getPileupWithoutMappingQualityZeroReads();
filteredTracker.addElements(sampleName,pileup.pileupElementTracker);
filteredTracker.addElements(sample,pileup.pileupElementTracker);
}
return (RBP)createNewPileup(loc,filteredTracker);
@ -333,10 +335,10 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
for(String sampleName: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sampleName);
for(Sample sample: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getPositiveStrandPileup();
filteredTracker.addElements(sampleName,pileup.pileupElementTracker);
filteredTracker.addElements(sample,pileup.pileupElementTracker);
}
return (RBP)createNewPileup(loc,filteredTracker);
}
@ -362,10 +364,10 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
for(String sampleName: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sampleName);
for(Sample sample: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getNegativeStrandPileup();
filteredTracker.addElements(sampleName,pileup.pileupElementTracker);
filteredTracker.addElements(sample,pileup.pileupElementTracker);
}
return (RBP)createNewPileup(loc,filteredTracker);
}
@ -392,10 +394,10 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
for(String sampleName: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sampleName);
for(Sample sample: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getFilteredPileup(filter);
filteredTracker.addElements(sampleName,pileup.pileupElementTracker);
filteredTracker.addElements(sample,pileup.pileupElementTracker);
}
return (RBP)createNewPileup(loc,filteredTracker);
@ -424,10 +426,10 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
for(String sampleName: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sampleName);
for(Sample sample: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getBaseAndMappingFilteredPileup(minBaseQ,minMapQ);
filteredTracker.addElements(sampleName,pileup.pileupElementTracker);
filteredTracker.addElements(sample,pileup.pileupElementTracker);
}
return (RBP)createNewPileup(loc,filteredTracker);
@ -491,11 +493,11 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
for(String sampleName: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sampleName);
for(Sample sample: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getPileupForReadGroup(targetReadGroupId);
if(pileup != null)
filteredTracker.addElements(sampleName,pileup.pileupElementTracker);
filteredTracker.addElements(sample,pileup.pileupElementTracker);
}
return (RBP)createNewPileup(loc,filteredTracker);
@ -517,10 +519,14 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
}
}
public Collection<String> getSamples() {
public Collection<String> getSampleNames() {
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
return tracker.getSamples();
Collection<String> sampleNames = new HashSet<String>();
for (Sample sample : tracker.getSamples()) {
sampleNames.add(sample.getId());
}
return sampleNames;
}
else {
Collection<String> sampleNames = new HashSet<String>();
@ -533,6 +539,16 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
}
}
@Override
public Collection<Sample> getSamples() {
if(!(pileupElementTracker instanceof PerSamplePileupElementTracker)) {
throw new StingException("Must be an instance of PerSampleElementTracker");
}
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
return tracker.getSamples();
}
/**
* Returns a pileup randomly downsampled to the desiredCoverage.
*
@ -558,8 +574,8 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
int current = 0;
for(String sampleName: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sampleName);
for(Sample sample: tracker.getSamples()) {
PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
List<PileupElement> filteredPileup = new ArrayList<PileupElement>();
for(PileupElement p: perSampleElements) {
@ -569,7 +585,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
if(!filteredPileup.isEmpty()) {
AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements);
filteredTracker.addElements(sampleName,pileup.pileupElementTracker);
filteredTracker.addElements(sample,pileup.pileupElementTracker);
}
current++;
@ -593,7 +609,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
}
@Override
public RBP getPileupForSample(String sampleName) {
public RBP getPileupForSampleName(String sampleName) {
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
return (RBP)createNewPileup(loc,tracker.getElements(sampleName));
@ -615,6 +631,29 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
}
}
@Override
public RBP getPileupForSample(Sample sample) {
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
return (RBP)createNewPileup(loc,tracker.getElements(sample));
}
else {
UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
for(PE p: pileupElementTracker) {
SAMRecord read = p.getRead();
if(sample != null) {
if(read.getReadGroup() != null && sample.getId().equals(read.getReadGroup().getSample()))
filteredTracker.add(p);
}
else {
if(read.getReadGroup() == null || read.getReadGroup().getSample() == null)
filteredTracker.add(p);
}
}
return filteredTracker.size()>0 ? (RBP)createNewPileup(loc,filteredTracker) : null;
}
}
// --------------------------------------------------------
//
// iterators
@ -693,8 +732,8 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
boolean hasSecondaryBases = false;
for(String sampleName: tracker.getSamples()) {
hasSecondaryBases |= createNewPileup(loc,tracker.getElements(sampleName)).hasSecondaryBases();
for(Sample sample: tracker.getSamples()) {
hasSecondaryBases |= createNewPileup(loc,tracker.getElements(sample)).hasSecondaryBases();
}
return hasSecondaryBases;
@ -722,8 +761,8 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
for(String sampleName: tracker.getSamples()) {
int[] countsBySample = createNewPileup(loc,tracker.getElements(sampleName)).getBaseCounts();
for(Sample sample: tracker.getSamples()) {
int[] countsBySample = createNewPileup(loc,tracker.getElements(sample)).getBaseCounts();
for(int i = 0; i < counts.length; i++)
counts[i] += countsBySample[i];
}

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.utils.pileup;
import net.sf.picard.util.PeekableIterator;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import java.util.PriorityQueue;
import java.util.Comparator;
@ -41,8 +42,8 @@ class MergingPileupElementIterator<PE extends PileupElement> implements Iterator
public MergingPileupElementIterator(PerSamplePileupElementTracker<PE> tracker) {
perSampleIterators = new PriorityQueue<PeekableIterator<PE>>(tracker.getSamples().size(),new PileupElementIteratorComparator());
for(String sampleName: tracker.getSamples()) {
PileupElementTracker<PE> trackerPerSample = tracker.getElements(sampleName);
for(Sample sample: tracker.getSamples()) {
PileupElementTracker<PE> trackerPerSample = tracker.getElements(sample);
if(trackerPerSample.size() != 0)
perSampleIterators.add(new PeekableIterator<PE>(trackerPerSample.iterator()));
}

View File

@ -24,6 +24,8 @@
package org.broadinstitute.sting.utils.pileup;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import java.util.*;
/**
@ -58,19 +60,21 @@ class UnifiedPileupElementTracker<PE extends PileupElement> extends PileupElemen
}
class PerSamplePileupElementTracker<PE extends PileupElement> extends PileupElementTracker<PE> {
private final Map<String,PileupElementTracker<PE>> pileup;
private final Map<Sample,PileupElementTracker<PE>> pileup;
private final Map<String, Sample> sampleNames = new HashMap<String, Sample>();
private int size = 0;
public PerSamplePileupElementTracker() {
pileup = new HashMap<String,PileupElementTracker<PE>>();
pileup = new HashMap<Sample,PileupElementTracker<PE>>();
}
public PerSamplePileupElementTracker(Map<String,AbstractReadBackedPileup<?,PE>> pileupsBySample) {
pileup = new HashMap<String,PileupElementTracker<PE>>();
for(Map.Entry<String,AbstractReadBackedPileup<?,PE>> entry: pileupsBySample.entrySet()) {
String sampleName = entry.getKey();
public PerSamplePileupElementTracker(Map<Sample,AbstractReadBackedPileup<?,PE>> pileupsBySample) {
pileup = new HashMap<Sample,PileupElementTracker<PE>>();
for(Map.Entry<Sample,AbstractReadBackedPileup<?,PE>> entry: pileupsBySample.entrySet()) {
Sample sample = entry.getKey();
AbstractReadBackedPileup<?,PE> pileupBySample = entry.getValue();
pileup.put(sampleName,pileupBySample.pileupElementTracker);
pileup.put(sample,pileupBySample.pileupElementTracker);
sampleNames.put(sample.getId(), sample);
}
}
@ -78,16 +82,21 @@ class PerSamplePileupElementTracker<PE extends PileupElement> extends PileupElem
* Gets a list of all the samples stored in this pileup.
* @return List of samples in this pileup.
*/
public Collection<String> getSamples() {
public Collection<Sample> getSamples() {
return pileup.keySet();
}
public PileupElementTracker<PE> getElements(final String sample) {
public PileupElementTracker<PE> getElements(final Sample sample) {
return pileup.get(sample);
}
public void addElements(final String sample, PileupElementTracker<PE> elements) {
public PileupElementTracker<PE> getElements(final String sampleName) {
return pileup.get(sampleNames.get(sampleName));
}
public void addElements(final Sample sample, PileupElementTracker<PE> elements) {
pileup.put(sample,elements);
sampleNames.put(sample.getId(), sample);
size += elements.size();
}

View File

@ -24,6 +24,7 @@
package org.broadinstitute.sting.utils.pileup;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair;
@ -122,14 +123,20 @@ public interface ReadBackedExtendedEventPileup extends ReadBackedPileup {
* Gets a list of all the samples stored in this pileup.
* @return List of samples in this pileup.
*/
public Collection<String> getSamples();
public Collection<String> getSampleNames();
/**
* Gets a list of all the samples stored in this pileup.
* @return List of samples in this pileup.
*/
public Collection<Sample> getSamples();
/**
* Gets the particular subset of this pileup with the given sample name.
* @param sampleName Name of the sample to use.
* @param sample Name of the sample to use.
* @return A subset of this pileup containing only reads with the given sample.
*/
public ReadBackedExtendedEventPileup getPileupForSample(String sampleName);
public ReadBackedExtendedEventPileup getPileupForSample(Sample sample);
public Iterable<ExtendedEventPileupElement> toExtendedIterable();
@ -212,5 +219,5 @@ public interface ReadBackedExtendedEventPileup extends ReadBackedPileup {
* Get an array of the mapping qualities
* @return
*/
public byte[] getMappingQuals();
public byte[] getMappingQuals();
}

View File

@ -23,6 +23,7 @@
*/
package org.broadinstitute.sting.utils.pileup;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair;
@ -55,7 +56,8 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<
this.nInsertions = nInsertions;
}
public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, Map<String,? extends ReadBackedExtendedEventPileupImpl> pileupElementsBySample) {
// this is the good new one
public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, Map<Sample,? extends ReadBackedExtendedEventPileupImpl> pileupElementsBySample) {
super(loc,pileupElementsBySample);
}
@ -221,5 +223,5 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<
} else {
return "-"+new String(refBases,1,length).toUpperCase();
}
}
}
}

View File

@ -24,6 +24,7 @@
package org.broadinstitute.sting.utils.pileup;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.gatk.iterators.IterableIterator;
import net.sf.samtools.SAMRecord;
@ -128,18 +129,31 @@ public interface ReadBackedPileup extends Iterable<PileupElement> {
*/
public ReadBackedPileup getPileupForReadGroup(String readGroupId);
/**
/**
* Gets a collection of all the samples stored in this pileup.
* @return Collection of samples in this pileup.
*/
public Collection<String> getSamples();
public Collection<Sample> getSamples();
/**
* Gets a collection of *names* of all the samples stored in this pileup.
* @return Collection of names
*/
public Collection<String> getSampleNames();
/**
* Gets the particular subset of this pileup with the given sample name.
* @param sampleName Name of the sample to use.
* @return A subset of this pileup containing only reads with the given sample.
*/
public ReadBackedPileup getPileupForSample(String sampleName);
public ReadBackedPileup getPileupForSampleName(String sampleName);
/**
* Gets the particular subset of this pileup with the given sample.
* @param sample Sample to use.
* @return A subset of this pileup containing only reads with the given sample.
*/
public ReadBackedPileup getPileupForSample(Sample sample);
/**
* Simple useful routine to count the number of deletion bases in this pileup

View File

@ -23,6 +23,7 @@
*/
package org.broadinstitute.sting.utils.pileup;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.utils.GenomeLoc;
import net.sf.samtools.SAMRecord;
@ -47,7 +48,7 @@ public class ReadBackedPileupImpl extends AbstractReadBackedPileup<ReadBackedPil
super(loc,pileupElements);
}
public ReadBackedPileupImpl(GenomeLoc loc, Map<String,ReadBackedPileupImpl> pileupElementsBySample) {
public ReadBackedPileupImpl(GenomeLoc loc, Map<Sample,ReadBackedPileupImpl> pileupElementsBySample) {
super(loc,pileupElementsBySample);
}

View File

@ -7,6 +7,7 @@ import net.sf.samtools.*;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.executive.WindowMaker;
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
import org.broadinstitute.sting.gatk.datasources.shards.LocusShard;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.MockLocusShard;
@ -52,7 +53,7 @@ public abstract class LocusViewTemplate extends BaseTest {
GenomeLoc shardBounds = genomeLocParser.createGenomeLoc("chr1", 1, 5);
Shard shard = new LocusShard(new SAMDataSource(Collections.<SAMReaderID>emptyList(),genomeLocParser),Collections.singletonList(shardBounds),Collections.<SAMReaderID,SAMFileSpan>emptyMap());
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS);
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS, new SampleDataSource());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, null, null);
@ -68,7 +69,7 @@ public abstract class LocusViewTemplate extends BaseTest {
GenomeLoc shardBounds = genomeLocParser.createGenomeLoc("chr1", 1, 5);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(shardBounds));
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS);
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS, new SampleDataSource());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
@ -83,7 +84,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS);
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS, new SampleDataSource());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -97,7 +98,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS);
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS, new SampleDataSource());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -111,7 +112,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS);
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS, new SampleDataSource());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -125,7 +126,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 6, 15)));
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS);
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS, new SampleDataSource());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -139,7 +140,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS);
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS, new SampleDataSource());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -154,7 +155,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS);
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS, new SampleDataSource());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -173,7 +174,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS);
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS, new SampleDataSource());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -192,7 +193,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS);
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS, new SampleDataSource());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -213,7 +214,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4, read5, read6);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(), LocusIteratorByState.NO_FILTERS);
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(), LocusIteratorByState.NO_FILTERS, new SampleDataSource());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);
@ -241,7 +242,7 @@ public abstract class LocusViewTemplate extends BaseTest {
read07, read08, read09, read10, read11, read12);
Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 6, 15)));
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS);
WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),LocusIteratorByState.NO_FILTERS, new SampleDataSource());
WindowMaker.WindowMakerIterator window = windowMaker.next();
LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
LocusView view = createView(dataProvider);

View File

@ -39,7 +39,7 @@ public class SampleDataSourceUnitTest extends BaseTest {
public void basicLoadSampleFileTest() {
File sampleFile = new File(sampleFilesDir + "basicSampleFile.yaml");
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
Assert.assertTrue(s.sampleCount() == 4);
Assert.assertTrue(s.sampleCount() == 5);
Sample sampleA = s.getSampleById("sampleA");
Sample sampleB = s.getSampleById("sampleB");
Assert.assertTrue(sampleB.getMother() == sampleA);
@ -78,7 +78,7 @@ public class SampleDataSourceUnitTest extends BaseTest {
File sampleFile = new File(sampleFilesDir + "basicSampleFileWithAlias.yaml");
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
// this file has two samples, but one has an alias. let's make sure that checks out...
Assert.assertTrue(s.sampleCount() == 2);
Assert.assertTrue(s.sampleCount() == 3);
Assert.assertTrue(s.getSampleById("sampleA") == s.getSampleById("sampleC"));
}
@ -129,20 +129,21 @@ public class SampleDataSourceUnitTest extends BaseTest {
files.add(new File(sampleFilesDir + "basicSampleFileExt.yaml"));
files.add(new File(sampleFilesDir + "basicSampleFileExt2.yaml"));
SampleDataSource s = new SampleDataSource(header, files);
Assert.assertTrue(s.sampleCount() == 5);
Assert.assertTrue(s.sampleCount() == 6);
Assert.assertTrue(s.getSampleById("sampleE").getProperty("propC").equals("valC"));
Assert.assertTrue(s.getSampleById("sampleA").getProperty("propA").equals("valA"));
}
/**
* testing getSamplesWithProperty
* in this file there are 4 samples - 2 with population "CEU", 1 with population "ABC", 1 with no population
* in this file there are 5 samples - 2 with population "CEU", 1 with population "ABC", 1 with no population,
* and then the default null sample
*/
@Test()
public void getSamplesWithPropertyTest() {
File sampleFile = new File(sampleFilesDir + "sampleFileWithProperties.yaml");
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
Assert.assertTrue(s.sampleCount() == 4);
Assert.assertTrue(s.sampleCount() == 5);
Set<Sample> ceuSamples = s.getSamplesWithProperty("population", "CEU");
Assert.assertTrue(ceuSamples.size() == 2);
@ -209,7 +210,7 @@ public class SampleDataSourceUnitTest extends BaseTest {
File sampleFile = new File(sampleFilesDir + "sampleFileWithProperties.yaml");
SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
Assert.assertTrue(s.sampleCount() == 4);
Assert.assertTrue(s.sampleCount() == 5);
List<Allele> alleleCollection = new ArrayList<Allele>();
Allele a1 = Allele.create("A", true);

View File

@ -9,6 +9,7 @@ import org.testng.Assert;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMReaderID;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -63,7 +64,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
List<SAMRecord> reads = Arrays.asList(before,during,after);
// create the iterator by state with the fake reads and fake records
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),readAttributes,genomeLocParser);
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),readAttributes,genomeLocParser, new SampleDataSource());
boolean foundExtendedEventPileup = false;
while (li.hasNext()) {
@ -115,7 +116,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
List<SAMRecord> reads = Arrays.asList(before,during,after);
// create the iterator by state with the fake reads and fake records
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),readAttributes,genomeLocParser);
li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),readAttributes,genomeLocParser, new SampleDataSource());
boolean foundExtendedEventPileup = false;
while (li.hasNext()) {

View File

@ -28,6 +28,7 @@ import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import org.testng.Assert;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.testng.annotations.Test;
@ -143,9 +144,9 @@ public class ReadBackedPileupUnitTest {
ReadBackedPileupImpl sample2Pileup = new ReadBackedPileupImpl(null,
Arrays.asList(read2,read4),
Arrays.asList(1,1));
Map<String,ReadBackedPileupImpl> sampleToPileupMap = new HashMap<String,ReadBackedPileupImpl>();
sampleToPileupMap.put(readGroupOne.getSample(),sample1Pileup);
sampleToPileupMap.put(readGroupTwo.getSample(),sample2Pileup);
Map<Sample,ReadBackedPileupImpl> sampleToPileupMap = new HashMap<Sample,ReadBackedPileupImpl>();
sampleToPileupMap.put(new Sample(readGroupOne.getSample()),sample1Pileup);
sampleToPileupMap.put(new Sample(readGroupTwo.getSample()),sample2Pileup);
ReadBackedPileup compositePileup = new ReadBackedPileupImpl(null,sampleToPileupMap);