Oops...forgot to commit the changes that allow primitive VCF streaming.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4979 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
8a6b126ea8
commit
6d855041ec
|
|
@ -243,7 +243,14 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
|||
for (String fileName: argCollection.RODBindings) {
|
||||
List<String> parameters = parser.getTags(fileName);
|
||||
fileName = expandFileName(fileName);
|
||||
RMDStorageType storageType = fileName.toLowerCase().endsWith("stdin") ? RMDStorageType.STREAM : RMDStorageType.FILE;
|
||||
|
||||
RMDStorageType storageType = null;
|
||||
if(argCollection.rodInputType != null)
|
||||
storageType = argCollection.rodInputType;
|
||||
else if(fileName.toLowerCase().endsWith("stdin"))
|
||||
storageType = RMDStorageType.STREAM;
|
||||
else
|
||||
storageType = RMDStorageType.FILE;
|
||||
|
||||
if(parameters.size() != 2)
|
||||
throw new UserException("Invalid syntax for -B (reference-ordered data) input flag. " +
|
||||
|
|
|
|||
|
|
@ -26,7 +26,9 @@
|
|||
package org.broadinstitute.sting.gatk.arguments;
|
||||
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import org.broadinstitute.sting.commandline.Hidden;
|
||||
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
|
@ -210,6 +212,11 @@ public class GATKArgumentCollection {
|
|||
@Input(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching <TAG>:<STRING> or a .txt file containing the filter strings one per line.", required = false)
|
||||
public List<String> readGroupBlackList = null;
|
||||
|
||||
@Element(required=false)
|
||||
@Argument(fullName="rod_input_type",shortName="rit",doc="Indicates whether to use a file approach or a streaming approach to loading ROD data",required=false)
|
||||
@Hidden
|
||||
public RMDTriplet.RMDStorageType rodInputType = null;
|
||||
|
||||
/**
|
||||
* marshal the data out to a object
|
||||
*
|
||||
|
|
@ -371,6 +378,9 @@ public class GATKArgumentCollection {
|
|||
(other.performanceLog != null && !other.performanceLog.equals(this.performanceLog)))
|
||||
return false;
|
||||
|
||||
if(rodInputType != other.rodInputType)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -202,10 +202,8 @@ class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIter
|
|||
this.addNewResource(iterator);
|
||||
|
||||
// Pull the proper header and sequence dictionary from the prepopulated track.
|
||||
//this.header = iterator.getHeader();
|
||||
//this.sequenceDictionary = iterator.getSequenceDictionary();
|
||||
this.header = null;
|
||||
this.sequenceDictionary = null;
|
||||
this.header = iterator.getHeader();
|
||||
this.sequenceDictionary = iterator.getSequenceDictionary();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -232,7 +230,8 @@ class ReferenceOrderedDataPool extends ResourcePool<LocationAwareSeekableRODIter
|
|||
public LocationAwareSeekableRODIterator createNewResource() {
|
||||
if(numIterators() > 0)
|
||||
throw new ReviewedStingException("BUG: Tried to create multiple iterators over streaming ROD interface");
|
||||
LocationAwareSeekableRODIterator iter = new SeekableRODIterator(referenceSequenceDictionary,genomeLocParser,builder.createInstanceOfTrack(fileDescriptor).getIterator());
|
||||
RMDTrack track = builder.createInstanceOfTrack(fileDescriptor);
|
||||
LocationAwareSeekableRODIterator iter = new SeekableRODIterator(track.getHeader(),track.getSequenceDictionary(),referenceSequenceDictionary,genomeLocParser,track.getIterator());
|
||||
return (flashbackData) ? new FlashBackIterator(iter) : iter;
|
||||
}
|
||||
|
||||
|
|
@ -344,9 +343,9 @@ class ReferenceOrderedQueryDataPool extends ResourcePool<RMDTrack,LocationAwareS
|
|||
try {
|
||||
if (position instanceof MappedStreamSegment) {
|
||||
GenomeLoc pos = ((MappedStreamSegment) position).locus;
|
||||
return new SeekableRODIterator(referenceSequenceDictionary,genomeLocParser,track.query(pos));
|
||||
return new SeekableRODIterator(header,sequenceDictionary,referenceSequenceDictionary,genomeLocParser,track.query(pos));
|
||||
} else {
|
||||
return new SeekableRODIterator(referenceSequenceDictionary,genomeLocParser,track.getIterator());
|
||||
return new SeekableRODIterator(header,sequenceDictionary,referenceSequenceDictionary,genomeLocParser,track.getIterator());
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new ReviewedStingException("Unable to create iterator for rod named " + fileDescriptor.getName(),e);
|
||||
|
|
|
|||
|
|
@ -39,11 +39,18 @@ import java.util.List;
|
|||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
|
||||
/**
|
||||
* Header for the datasource backing this iterator.
|
||||
*/
|
||||
private final Object header;
|
||||
|
||||
/**
|
||||
* The parser, used to construct new genome locs.
|
||||
*/
|
||||
private final GenomeLocParser parser;
|
||||
|
||||
private final SAMSequenceDictionary sequenceDictionary;
|
||||
|
||||
private PushbackIterator<GATKFeature> it;
|
||||
List<GATKFeature> records = null; // here we will keep a pile of records overlaping with current position; when we iterate
|
||||
// and step out of record's scope, we purge it from the list
|
||||
|
|
@ -86,8 +93,10 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
|
|||
// This implementation tracks the query history and makes next() illegal after a seekforward query of length > 1,
|
||||
// but re-enables next() again after a length-1 query.
|
||||
|
||||
public SeekableRODIterator(SAMSequenceDictionary dictionary,GenomeLocParser parser,CloseableIterator<GATKFeature> it) {
|
||||
public SeekableRODIterator(Object header,SAMSequenceDictionary rodDictionary,SAMSequenceDictionary referenceDictionary,GenomeLocParser parser,CloseableIterator<GATKFeature> it) {
|
||||
this.header = header;
|
||||
this.parser = parser;
|
||||
this.sequenceDictionary = rodDictionary;
|
||||
this.it = new PushbackIterator<GATKFeature>(it);
|
||||
records = new LinkedList<GATKFeature>();
|
||||
// the following is a trick: we would like the iterator to know the actual name assigned to
|
||||
|
|
@ -99,9 +108,28 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
|
|||
if (this.it.hasNext()) r = this.it.element();
|
||||
name = (r==null?null:r.getName());
|
||||
|
||||
curr_contig = dictionary.getSequence(0).getSequenceName();
|
||||
curr_contig = referenceDictionary.getSequence(0).getSequenceName();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the header associated with the backing input stream.
|
||||
* @return the ROD header.
|
||||
*/
|
||||
@Override
|
||||
public Object getHeader() {
|
||||
return header;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the sequence dictionary associated with the backing input stream.
|
||||
* @return sequence dictionary from the ROD header.
|
||||
*/
|
||||
@Override
|
||||
public SAMSequenceDictionary getSequenceDictionary() {
|
||||
return sequenceDictionary;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns true if the data we iterate over has records associated with (any, not necessarily adjacent)
|
||||
* genomic position farther along the reference.
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.refdata.utils;
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
|
@ -56,6 +57,25 @@ public class FlashBackIterator implements LocationAwareSeekableRODIterator {
|
|||
this.iterator = iterator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the header associated with the backing input stream.
|
||||
* @return the ROD header.
|
||||
*/
|
||||
@Override
|
||||
public Object getHeader() {
|
||||
return iterator.getHeader();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the sequence dictionary associated with the backing input stream.
|
||||
* @return sequence dictionary from the ROD header.
|
||||
*/
|
||||
@Override
|
||||
public SAMSequenceDictionary getSequenceDictionary() {
|
||||
return iterator.getSequenceDictionary();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* peek at the next location
|
||||
* @return
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.refdata.utils;
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
|
@ -15,6 +16,10 @@ import java.util.List;
|
|||
* combine iteration with a position aware interface
|
||||
*/
|
||||
public interface LocationAwareSeekableRODIterator extends CloseableIterator<RODRecordList> {
|
||||
public Object getHeader();
|
||||
|
||||
public SAMSequenceDictionary getSequenceDictionary();
|
||||
|
||||
public GenomeLoc peekNextLocation();
|
||||
|
||||
public GenomeLoc position();
|
||||
|
|
|
|||
|
|
@ -406,7 +406,7 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<DoCOutputType.Partiti
|
|||
getToolkit().getGenomeLocParser(),
|
||||
getToolkit().getArguments().unsafe);
|
||||
RMDTrack refseq = builder.createInstanceOfTrack(RefSeqCodec.class,refSeqGeneList);
|
||||
return new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||
return new SeekableRODIterator(refseq.getHeader(),refseq.getSequenceDictionary(),getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||
getToolkit().getGenomeLocParser(),refseq.getIterator());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -252,7 +252,9 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
|
|||
getToolkit().getArguments().unsafe);
|
||||
RMDTrack refseq = builder.createInstanceOfTrack(RefSeqCodec.class,new File(RefseqFileName));
|
||||
|
||||
refseqIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||
refseqIterator = new SeekableRODIterator(refseq.getHeader(),
|
||||
refseq.getSequenceDictionary(),
|
||||
getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||
getToolkit().getGenomeLocParser(),
|
||||
refseq.getIterator());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ import org.broad.tribble.util.variantcontext.VariantContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
|
|
@ -87,9 +88,13 @@ public class PickSequenomProbes extends RodWalker<String, String> {
|
|||
ReferenceOrderedData snp_mask;
|
||||
if ( SNP_MASK.contains(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)) {
|
||||
RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser(),getToolkit().getArguments().unsafe);
|
||||
CloseableIterator<GATKFeature> iter = builder.createInstanceOfTrack(DbSNPCodec.class,new java.io.File(SNP_MASK)).getIterator();
|
||||
snpMaskIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),getToolkit().getGenomeLocParser(),iter);
|
||||
|
||||
RMDTrack track = builder.createInstanceOfTrack(DbSNPCodec.class,new java.io.File(SNP_MASK));
|
||||
snpMaskIterator = new SeekableRODIterator(track.getHeader(),
|
||||
track.getSequenceDictionary(),
|
||||
getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||
getToolkit().getGenomeLocParser(),
|
||||
track.getIterator());
|
||||
|
||||
} else {
|
||||
// TODO: fix me when Plink is back
|
||||
throw new IllegalArgumentException("We currently do not support other snp_mask tracks (like Plink)");
|
||||
|
|
|
|||
|
|
@ -38,8 +38,11 @@ public class IndelAnnotator extends RodWalker<Integer,Long> {
|
|||
getToolkit().getArguments().unsafe);
|
||||
RMDTrack refseq = builder.createInstanceOfTrack(RefSeqCodec.class,new File(RefseqFileName));
|
||||
|
||||
refseqIterator = new SeekableRODIterator(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||
getToolkit().getGenomeLocParser(),refseq.getIterator());
|
||||
refseqIterator = new SeekableRODIterator(refseq.getHeader(),
|
||||
refseq.getSequenceDictionary(),
|
||||
getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||
getToolkit().getGenomeLocParser(),
|
||||
refseq.getIterator());
|
||||
|
||||
logger.info("Using RefSeq annotations from " + RefseqFileName);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.datasources.providers;
|
|||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.testng.Assert;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
|
|
@ -109,6 +110,25 @@ class FakePeekingRODIterator implements LocationAwareSeekableRODIterator {
|
|||
this.location = genomeLocParser.createGenomeLoc(startingLoc.getContig(), startingLoc.getStart() + 1, startingLoc.getStop() + 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the header associated with the backing input stream.
|
||||
* @return the ROD header.
|
||||
*/
|
||||
@Override
|
||||
public Object getHeader() {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the sequence dictionary associated with the backing input stream.
|
||||
* @return sequence dictionary from the ROD header.
|
||||
*/
|
||||
@Override
|
||||
public SAMSequenceDictionary getSequenceDictionary() {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public GenomeLoc peekNextLocation() {
|
||||
System.err.println("Peek Next -> " + location);
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.refdata.utils;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.testng.Assert;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
|
|
@ -157,6 +158,25 @@ class FakeSeekableRODIterator implements LocationAwareSeekableRODIterator {
|
|||
this.location = genomeLocParser.createGenomeLoc(startingLoc.getContig(), startingLoc.getStart() + 1, startingLoc.getStop() + 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the header associated with the backing input stream.
|
||||
* @return the ROD header.
|
||||
*/
|
||||
@Override
|
||||
public Object getHeader() {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the sequence dictionary associated with the backing input stream.
|
||||
* @return sequence dictionary from the ROD header.
|
||||
*/
|
||||
@Override
|
||||
public SAMSequenceDictionary getSequenceDictionary() {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public GenomeLoc peekNextLocation() {
|
||||
System.err.println("Peek Next -> " + location);
|
||||
|
|
|
|||
Loading…
Reference in New Issue