Oops...forgot to commit the changes that allow primitive VCF streaming.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4979 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2011-01-12 21:54:51 +00:00
parent 8a6b126ea8
commit 6d855041ec
12 changed files with 136 additions and 17 deletions

View File

@ -243,7 +243,14 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
for (String fileName: argCollection.RODBindings) {
List<String> parameters = parser.getTags(fileName);
fileName = expandFileName(fileName);
RMDStorageType storageType = fileName.toLowerCase().endsWith("stdin") ? RMDStorageType.STREAM : RMDStorageType.FILE;
RMDStorageType storageType = null;
if(argCollection.rodInputType != null)
storageType = argCollection.rodInputType;
else if(fileName.toLowerCase().endsWith("stdin"))
storageType = RMDStorageType.STREAM;
else
storageType = RMDStorageType.FILE;
if(parameters.size() != 2)
throw new UserException("Invalid syntax for -B (reference-ordered data) input flag. " +

View File

@ -26,7 +26,9 @@
package org.broadinstitute.sting.gatk.arguments;
import net.sf.samtools.SAMFileReader;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.commandline.Argument;
@ -210,6 +212,11 @@ public class GATKArgumentCollection {
@Input(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching <TAG>:<STRING> or a .txt file containing the filter strings one per line.", required = false)
public List<String> readGroupBlackList = null;
@Element(required=false)
@Argument(fullName="rod_input_type",shortName="rit",doc="Indicates whether to use a file approach or a streaming approach to loading ROD data",required=false)
@Hidden
public RMDTriplet.RMDStorageType rodInputType = null;
/**
* marshal the data out to a object
*
@ -371,6 +378,9 @@ public class GATKArgumentCollection {
(other.performanceLog != null && !other.performanceLog.equals(this.performanceLog)))
return false;
if(rodInputType != other.rodInputType)
return false;
return true;
}

View File

@ -202,10 +202,8 @@ class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIter
this.addNewResource(iterator);
// Pull the proper header and sequence dictionary from the prepopulated track.
//this.header = iterator.getHeader();
//this.sequenceDictionary = iterator.getSequenceDictionary();
this.header = null;
this.sequenceDictionary = null;
this.header = iterator.getHeader();
this.sequenceDictionary = iterator.getSequenceDictionary();
}
/**
@ -232,7 +230,8 @@ class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIter
public LocationAwareSeekableRODIterator createNewResource() {
if(numIterators() > 0)
throw new ReviewedStingException("BUG: Tried to create multiple iterators over streaming ROD interface");
LocationAwareSeekableRODIterator iter = new SeekableRODIterator(referenceSequenceDictionary,genomeLocParser,builder.createInstanceOfTrack(fileDescriptor).getIterator());
RMDTrack track = builder.createInstanceOfTrack(fileDescriptor);
LocationAwareSeekableRODIterator iter = new SeekableRODIterator(track.getHeader(),track.getSequenceDictionary(),referenceSequenceDictionary,genomeLocParser,track.getIterator());
return (flashbackData) ? new FlashBackIterator(iter) : iter;
}
@ -344,9 +343,9 @@ class ReferenceOrderedQueryDataPool extends ResourcePool<RMDTrack,LocationAwareS
try {
if (position instanceof MappedStreamSegment) {
GenomeLoc pos = ((MappedStreamSegment) position).locus;
return new SeekableRODIterator(referenceSequenceDictionary,genomeLocParser,track.query(pos));
return new SeekableRODIterator(header,sequenceDictionary,referenceSequenceDictionary,genomeLocParser,track.query(pos));
} else {
return new SeekableRODIterator(referenceSequenceDictionary,genomeLocParser,track.getIterator());
return new SeekableRODIterator(header,sequenceDictionary,referenceSequenceDictionary,genomeLocParser,track.getIterator());
}
} catch (IOException e) {
throw new ReviewedStingException("Unable to create iterator for rod named " + fileDescriptor.getName(),e);

View File

@ -39,11 +39,18 @@ import java.util.List;
* To change this template use File | Settings | File Templates.
*/
public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
/**
* Header for the datasource backing this iterator.
*/
private final Object header;
/**
* The parser, used to construct new genome locs.
*/
private final GenomeLocParser parser;
private final SAMSequenceDictionary sequenceDictionary;
private PushbackIterator<GATKFeature> it;
List<GATKFeature> records = null; // here we will keep a pile of records overlaping with current position; when we iterate
// and step out of record's scope, we purge it from the list
@ -86,8 +93,10 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
// This implementation tracks the query history and makes next() illegal after a seekforward query of length > 1,
// but re-enables next() again after a length-1 query.
public SeekableRODIterator(SAMSequenceDictionary dictionary,GenomeLocParser parser,CloseableIterator<GATKFeature> it) {
public SeekableRODIterator(Object header,SAMSequenceDictionary rodDictionary,SAMSequenceDictionary referenceDictionary,GenomeLocParser parser,CloseableIterator<GATKFeature> it) {
this.header = header;
this.parser = parser;
this.sequenceDictionary = rodDictionary;
this.it = new PushbackIterator<GATKFeature>(it);
records = new LinkedList<GATKFeature>();
// the following is a trick: we would like the iterator to know the actual name assigned to
@ -99,9 +108,28 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
if (this.it.hasNext()) r = this.it.element();
name = (r==null?null:r.getName());
curr_contig = dictionary.getSequence(0).getSequenceName();
curr_contig = referenceDictionary.getSequence(0).getSequenceName();
}
/**
* Gets the header associated with the backing input stream.
* @return the ROD header.
*/
@Override
public Object getHeader() {
return header;
}
/**
* Gets the sequence dictionary associated with the backing input stream.
* @return sequence dictionary from the ROD header.
*/
@Override
public SAMSequenceDictionary getSequenceDictionary() {
return sequenceDictionary;
}
/**
* Returns true if the data we iterate over has records associated with (any, not necessarily adjacent)
* genomic position farther along the reference.

View File

@ -23,6 +23,7 @@
package org.broadinstitute.sting.gatk.refdata.utils;
import net.sf.samtools.SAMSequenceDictionary;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.Comparator;
@ -56,6 +57,25 @@ public class FlashBackIterator implements LocationAwareSeekableRODIterator {
this.iterator = iterator;
}
/**
* Gets the header associated with the backing input stream.
* @return the ROD header.
*/
@Override
public Object getHeader() {
return iterator.getHeader();
}
/**
* Gets the sequence dictionary associated with the backing input stream.
* @return sequence dictionary from the ROD header.
*/
@Override
public SAMSequenceDictionary getSequenceDictionary() {
return iterator.getSequenceDictionary();
}
/**
* peek at the next location
* @return

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.refdata.utils;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.utils.GenomeLoc;
@ -15,6 +16,10 @@ import java.util.List;
* combine iteration with a position aware interface
*/
public interface LocationAwareSeekableRODIterator extends CloseableIterator<RODRecordList> {
public Object getHeader();
public SAMSequenceDictionary getSequenceDictionary();
public GenomeLoc peekNextLocation();
public GenomeLoc position();

View File

@ -406,7 +406,7 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<DoCOutputType.Partiti
getToolkit().getGenomeLocParser(),
getToolkit().getArguments().unsafe);
RMDTrack refseq = builder.createInstanceOfTrack(RefSeqCodec.class,refSeqGeneList);
return new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
return new SeekableRODIterator(refseq.getHeader(),refseq.getSequenceDictionary(),getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
getToolkit().getGenomeLocParser(),refseq.getIterator());
}

View File

@ -252,7 +252,9 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
getToolkit().getArguments().unsafe);
RMDTrack refseq = builder.createInstanceOfTrack(RefSeqCodec.class,new File(RefseqFileName));
refseqIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
refseqIterator = new SeekableRODIterator(refseq.getHeader(),
refseq.getSequenceDictionary(),
getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
getToolkit().getGenomeLocParser(),
refseq.getIterator());
}

View File

@ -32,6 +32,7 @@ import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
@ -87,9 +88,13 @@ public class PickSequenomProbes extends RodWalker<String, String> {
ReferenceOrderedData snp_mask;
if ( SNP_MASK.contains(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)) {
RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser(),getToolkit().getArguments().unsafe);
CloseableIterator<GATKFeature> iter = builder.createInstanceOfTrack(DbSNPCodec.class,new java.io.File(SNP_MASK)).getIterator();
snpMaskIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser(),iter);
RMDTrack track = builder.createInstanceOfTrack(DbSNPCodec.class,new java.io.File(SNP_MASK));
snpMaskIterator = new SeekableRODIterator(track.getHeader(),
track.getSequenceDictionary(),
getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
getToolkit().getGenomeLocParser(),
track.getIterator());
} else {
// TODO: fix me when Plink is back
throw new IllegalArgumentException("We currently do not support other snp_mask tracks (like Plink)");

View File

@ -38,8 +38,11 @@ public class IndelAnnotator extends RodWalker<Integer,Long> {
getToolkit().getArguments().unsafe);
RMDTrack refseq = builder.createInstanceOfTrack(RefSeqCodec.class,new File(RefseqFileName));
refseqIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
getToolkit().getGenomeLocParser(),refseq.getIterator());
refseqIterator = new SeekableRODIterator(refseq.getHeader(),
refseq.getSequenceDictionary(),
getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
getToolkit().getGenomeLocParser(),
refseq.getIterator());
logger.info("Using RefSeq annotations from " + RefseqFileName);
}

View File

@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.datasources.providers;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMSequenceDictionary;
import org.testng.Assert;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
@ -109,6 +110,25 @@ class FakePeekingRODIterator implements LocationAwareSeekableRODIterator {
this.location = genomeLocParser.createGenomeLoc(startingLoc.getContig(), startingLoc.getStart() + 1, startingLoc.getStop() + 1);
}
/**
* Gets the header associated with the backing input stream.
* @return the ROD header.
*/
@Override
public Object getHeader() {
return null;
}
/**
* Gets the sequence dictionary associated with the backing input stream.
* @return sequence dictionary from the ROD header.
*/
@Override
public SAMSequenceDictionary getSequenceDictionary() {
return null;
}
@Override
public GenomeLoc peekNextLocation() {
System.err.println("Peek Next -> " + location);

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.refdata.utils;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMSequenceDictionary;
import org.testng.Assert;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
@ -157,6 +158,25 @@ class FakeSeekableRODIterator implements LocationAwareSeekableRODIterator {
this.location = genomeLocParser.createGenomeLoc(startingLoc.getContig(), startingLoc.getStart() + 1, startingLoc.getStop() + 1);
}
/**
* Gets the header associated with the backing input stream.
* @return the ROD header.
*/
@Override
public Object getHeader() {
return null;
}
/**
* Gets the sequence dictionary associated with the backing input stream.
* @return sequence dictionary from the ROD header.
*/
@Override
public SAMSequenceDictionary getSequenceDictionary() {
return null;
}
@Override
public GenomeLoc peekNextLocation() {
System.err.println("Peek Next -> " + location);