adding changes so that we look at the headers already loaded by the engine for samples and other VCF utils, and not create readers for each file to get them (this caused Tribble to regerenate indices if the index file can't be written to disk).

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3518 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-06-09 17:21:12 +00:00
parent c1b7bcc786
commit ad98512f6c
13 changed files with 74 additions and 40 deletions

View File

@ -69,6 +69,11 @@ public class AnnotatorInputTableCodec implements FeatureCodec<AnnotatorInputTabl
return AnnotatorInputTableFeature.class;
}
@Override
public <HeaderType> HeaderType getHeader(Class<HeaderType> clazz) throws ClassCastException {
return null; // TODO: do we want the header to be a concrete type?
}
/**
* Parses the line into an AnnotatorInputTableFeature object.

View File

@ -71,6 +71,11 @@ public class SAMPileupCodec implements FeatureCodec<SAMPileupFeature> {
return SAMPileupFeature.class;
}
@Override
public <HeaderType> HeaderType getHeader(Class<HeaderType> clazz) throws ClassCastException {
return null; // we don't have a header
}
public SAMPileupFeature decode(String line) {
// 0 1 2 3 4 5 6 7
//* chrX 466 T Y 170 170 88 32 ... (piles of read bases and quals follow)

View File

@ -61,6 +61,11 @@ public class SAMReadCodec implements FeatureCodec<SAMReadFeature> {
return SAMReadFeature.class;
}
@Override
public <HeaderType> HeaderType getHeader(Class<HeaderType> clazz) throws ClassCastException {
return null; // we haven't stored the header
}
/**
* Decode a single line in a SAM text file.
* @param line line to decode.

View File

@ -280,4 +280,9 @@ public class VCF4Codec implements FeatureCodec {
public Class getFeatureType() {
return VariantContext.class;
}
@Override
public Object getHeader(Class clazz) throws ClassCastException {
return null; // TODO: fix this Aaron
}
}

View File

@ -84,18 +84,6 @@ public abstract class RMDTrack {
*/
public abstract CloseableIterator<GATKFeature> getIterator();
/**
* helper function for determining if we are the same track based on name and codec type
*
* @param name the name to match
* @param type the type to match
*
* @return true on a match, false if the name or type is different
*/
public boolean matchesNameAndType(String name, Type type) {
return (name.equals(this.name) && (type.getClass().isAssignableFrom(this.type.getClass())));
}
/**
* helper function for determining if we are the same track based on name and record type
*
@ -121,4 +109,15 @@ public abstract class RMDTrack {
public SAMSequenceDictionary getSequenceDictionary() {
return null; // default, others can override this
}
/**
* ask for the header, supplying the expected type. Overridden in track types
* @param clazz the class of the expected type
* @param <HeaderType> the expected type
* @return a object of type HeaderType
* @throws ClassCastException if the class provided doesn't match our header type
*/
public <HeaderType> HeaderType getHeader(Class<HeaderType> clazz) throws ClassCastException {
return null;
}
}

View File

@ -130,4 +130,15 @@ public class TribbleTrack extends RMDTrack implements QueryableTrack {
public SAMSequenceDictionary getSequenceDictionary() {
return dictionary;
}
/**
* ask for the header, supplying the expected type. Overridden in track types
* @param clazz the class of the expected type
* @param <HeaderType> the expected type
* @return a object of type HeaderType
* @throws ClassCastException if the class provided doesn't match our header type
*/
public <HeaderType> HeaderType getHeader(Class<HeaderType> clazz) throws ClassCastException {
return (HeaderType) (reader).getHeader(clazz);
}
}

View File

@ -92,6 +92,9 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
@Argument(fullName="quality_step", shortName="qStep", doc="Resolution in QUAL units for optimization and tranche calculations", required=false)
private double QUAL_STEP = 0.1;
// TODO: RYAN - remove me, even though this switch is apparently super awesome
private final static boolean AARONS_SUPER_AWESOME_SWITCH = true;
/////////////////////////////
// Private Member Variables
/////////////////////////////
@ -131,21 +134,30 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
// setup the header fields
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
hInfo.add(new VCFInfoHeaderLine("OQ", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "The original variant quality score"));
hInfo.add(new VCFHeaderLine("source", "VariantOptimizer"));
vcfWriter = new VCFWriter( new File(OUTPUT_PREFIX + ".vcf") );
final TreeSet<String> samples = new TreeSet<String>();
final List<ReferenceOrderedDataSource> dataSources = this.getToolkit().getRodDataSources();
for( final ReferenceOrderedDataSource source : dataSources ) {
final RMDTrack rod = source.getReferenceOrderedData();
if( rod.getRecordType().equals(VCFRecord.class) ) {
final VCFReader reader = new VCFReader(rod.getFile());
final Set<String> vcfSamples = reader.getHeader().getGenotypeSamples();
samples.addAll(vcfSamples);
reader.close();
if (AARONS_SUPER_AWESOME_SWITCH) {
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
hInfo.add(new VCFInfoHeaderLine("OQ", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "The original variant quality score"));
hInfo.add(new VCFHeaderLine("source", "VariantOptimizer"));
samples.addAll(SampleUtils.getUniqueSamplesFromRods(getToolkit()));
} else {
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
hInfo.add(new VCFInfoHeaderLine("OQ", 1, VCFInfoHeaderLine.INFO_TYPE.Float, "The original variant quality score"));
hInfo.add(new VCFHeaderLine("source", "VariantOptimizer"));
for( final ReferenceOrderedDataSource source : dataSources ) {
final RMDTrack rod = source.getReferenceOrderedData();
if( rod.getRecordType().equals(VCFRecord.class) ) {
final VCFReader reader = new VCFReader(rod.getFile());
final Set<String> vcfSamples = reader.getHeader().getGenotypeSamples();
samples.addAll(vcfSamples);
reader.close();
}
}
}
vcfWriter = new VCFWriter( new File(OUTPUT_PREFIX + ".vcf") );
final VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
vcfWriter.writeHeader(vcfHeader);

View File

@ -27,13 +27,12 @@ package org.broadinstitute.sting.utils;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMReadGroupRecord;
import org.broad.tribble.vcf.VCFCodec;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
import java.util.*;
@ -80,11 +79,8 @@ public class SampleUtils {
List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources();
for ( ReferenceOrderedDataSource source : dataSources ) {
RMDTrack rod = source.getReferenceOrderedData();
if ( rod.getRecordType().equals(VCFRecord.class) ) {
VCFReader reader = new VCFReader(rod.getFile());
samples.addAll(reader.getHeader().getGenotypeSamples());
reader.close();
}
if ( rod.getRecordType().equals(VCFRecord.class) )
samples.addAll(rod.getHeader(VCFHeader.class).getGenotypeSamples());
}
return samples;
@ -110,11 +106,9 @@ public class SampleUtils {
for ( ReferenceOrderedDataSource source : dataSources ) {
RMDTrack rod = source.getReferenceOrderedData();
if ( rod.getRecordType().equals(VCFRecord.class) ) {
VCFReader reader = new VCFReader(rod.getFile());
Set<String> vcfSamples = reader.getHeader().getGenotypeSamples();
Set<String> vcfSamples = rod.getHeader(VCFHeader.class).getGenotypeSamples();
for ( String sample : vcfSamples )
addUniqueSample(samples, sampleOverlapMap, rodNamesToSampleNames, sample, rod.getName());
reader.close();
}
}
}

View File

@ -69,7 +69,7 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
} catch (IOException e) {
throw new StingException("Unable to read VCF File from " + vcfFile, e);
}
mHeader = codec.getHeader();
mHeader = codec.getHeader(VCFHeader.class);
}
/**

View File

@ -74,9 +74,7 @@ public class VCFUtils {
for ( ReferenceOrderedDataSource source : dataSources ) {
RMDTrack rod = source.getReferenceOrderedData();
if ( rod.getRecordType().equals(VCFRecord.class) ) {
VCFReader reader = new VCFReader(rod.getFile());
fields.addAll(reader.getHeader().getMetaData());
reader.close();
fields.addAll(rod.getHeader(VCFHeader.class).getMetaData());
}
}

View File

@ -0,0 +1,3 @@
<ivy-module version="1.0">
<info organisation="org.broad" module="tribble" revision="100" status="integration" publication="201006091154200" />
</ivy-module>

View File

@ -1,3 +0,0 @@
<ivy-module version="1.0">
<info organisation="org.broad" module="tribble" revision="99" status="integration" publication="201006081154200" />
</ivy-module>