Starting the clean up of the sting.utils.genotype code which is all either moving to Tribble, moving to sting.utils.vcf, or being removed.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3994 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-08-10 02:16:05 +00:00
parent 2a4a4b0aab
commit 419a36f74c
23 changed files with 29 additions and 182 deletions

View File

@ -36,6 +36,7 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.SampleUtils;

View File

@ -44,7 +44,7 @@ import org.broadinstitute.sting.utils.classloader.PackageUtils;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.CommandLineUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import java.util.*;

View File

@ -38,7 +38,7 @@ import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.CommandLineUtils;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import java.util.*;

View File

@ -26,19 +26,18 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFHeader;
@ -67,9 +66,6 @@ public class BatchedCallsMerger extends LocusWalker<VariantContext, Integer> imp
// the calculation arguments
private UnifiedGenotyperEngine UG_engine = null;
// all samples to be used
private Set<String> samples = new HashSet<String>();
// mapping from rod name to set of samples coming from it
private Map<String, Set<String>> rodsToSamples = new HashMap<String, Set<String>>();
@ -85,24 +81,11 @@ public class BatchedCallsMerger extends LocusWalker<VariantContext, Integer> imp
Set<VCFHeaderLine> headerLines = new HashSet<VCFHeaderLine>();
// iterate to get all of the sample names
List<ReferenceOrderedDataSource> dataSources = getToolkit().getRodDataSources();
for ( ReferenceOrderedDataSource source : dataSources ) {
RMDTrack rod = source.getReferenceOrderedData();
// if it's one of our target rods
if ( targetRods.contains(rod.getName()) ) {
// read the samples and store them
VCFReader reader = new VCFReader(rod.getFile());
HashSet<String> mySamples = new HashSet<String>(reader.getHeader().getGenotypeSamples());
rodsToSamples.put(rod.getName(), mySamples);
samples.addAll(mySamples);
// while we're here, pull out the header lines
headerLines.addAll(reader.getHeader().getMetaData());
reader.close();
}
}
// get all of the sample names and meta data
Map<String, VCFHeader> headers = VCFUtils.getVCFHeadersFromRods(getToolkit(), targetRods);
Set<String> samples = SampleUtils.getSampleList(headers);
for ( VCFHeader header : headers.values() )
headerLines.addAll(header.getMetaData());
// update the engine
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, writer, null, null);

View File

@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.genotype.vcf.*;

View File

@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.utils.text.XReadLines;

View File

@ -39,7 +39,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import java.io.File;

View File

@ -39,6 +39,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import java.util.*;

View File

@ -26,7 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;

View File

@ -26,7 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.SampleUtils;

View File

@ -41,7 +41,7 @@ import org.broadinstitute.sting.gatk.walkers.RMD;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import java.util.*;

View File

@ -16,6 +16,7 @@ import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import java.io.File;

View File

@ -15,7 +15,7 @@ import org.broadinstitute.sting.gatk.walkers.Window;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import java.io.PrintStream;

View File

@ -21,7 +21,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.utils.pileup.PileupElement;

View File

@ -12,7 +12,7 @@ import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import java.io.File;

View File

@ -42,7 +42,7 @@ import org.broadinstitute.sting.gatk.walkers.RMD;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broad.tribble.vcf.*;

View File

@ -39,7 +39,7 @@ import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.pileup.PileupElement;

View File

@ -61,7 +61,7 @@ import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
/**

View File

@ -40,6 +40,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFFilterHeaderLine;

View File

@ -31,7 +31,7 @@ import org.broad.tribble.vcf.VCFHeader;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import java.util.*;

View File

@ -1,137 +0,0 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Iterator;
import org.broad.tribble.FeatureSource;
import org.broad.tribble.index.Index;
import org.broad.tribble.source.BasicFeatureSource;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
import org.broadinstitute.sting.utils.StingException;
/** The VCFReader class, which given a valid vcf file, parses out the header and VariantContexts */
public class VCFReader implements Iterator<VariantContext>, Iterable<VariantContext> {
// our VCF header
private VCFHeader mHeader;
// our iterator
private Iterator<VariantContext> iterator;
private FeatureSource<VariantContext> vcfReader = null;
/**
* Create a VCF reader, given a VCF file
*
* @param vcfFile the vcf file to write
*/
public VCFReader(File vcfFile) {
initialize(vcfFile, null, true);
}
/**
* Create a VCF reader, given a VCF file
*
* @param vcfFile the vcf file to write
* @param createIndexOnDisk do we need to create an index on disk?
*/
public VCFReader(File vcfFile, boolean createIndexOnDisk) {
initialize(vcfFile, null, createIndexOnDisk);
}
/**
* Create a VCF reader, given a VCF file
*
* @param vcfFile the vcf file to write
* @param transform the line transformer to use, if any
*/
public VCFReader(File vcfFile, VCFCodec.LineTransform transform) {
initialize(vcfFile, transform, true);
}
/**
* initialize the VCF reader
* @param vcfFile the VCF file to open
* @param transform the line transformer to use, if any
* @param createIndexOnDisk do we need to create an index on disk?
*/
private void initialize(File vcfFile, VCFCodec.LineTransform transform, boolean createIndexOnDisk) {
VCFCodec codec = new VCFCodec();
Index index = createIndex(vcfFile, createIndexOnDisk);
if (transform != null) codec.setTransformer(transform);
try {
vcfReader = new BasicFeatureSource(vcfFile.getAbsolutePath(),index,codec);
iterator= vcfReader.iterator();
} catch (FileNotFoundException e) {
throw new StingException("Unable to read VCF File from " + vcfFile, e);
} catch (IOException e) {
throw new StingException("Unable to read VCF File from " + vcfFile, e);
}
mHeader = (VCFHeader)vcfReader.getHeader();
}
/**
* create an index given:
* @param vcfFile the vcf file
* @param createIndexOnDisk do we create the index on disk (or only in memory?)
* @return an instance of an index
*/
private Index createIndex(File vcfFile, boolean createIndexOnDisk) {
Index index;
try {
index = TribbleRMDTrackBuilder.loadIndex(vcfFile, new VCFCodec(), createIndexOnDisk);
} catch (IOException e) {
throw new StingException("Unable to make required index for file " + vcfFile + " do you have write permissions to the directory?");
}
return index;
}
/** @return true if we have another VCF record to return */
public boolean hasNext() {
return (iterator.hasNext());
}
/**
* return the next available VariantContext. Make sure to check availability with a call to hasNext!
*
* @return a VariantContext, representing the next record in the file
*/
public VariantContext next() {
return iterator.next();
}
/** Remove is not supported */
public void remove() {
throw new UnsupportedOperationException("Unsupported operation");
}
/** @return get the header associated with this reader */
public VCFHeader getHeader() {
return this.mHeader;
}
public Iterator<VariantContext> iterator() {
return this;
}
/**
* close the files
*/
public void close() {
if (vcfReader != null) try {
vcfReader.close();
} catch (IOException e) {
throw new StingException("Unable to close vcfReader",e);
}
iterator = null;
}
}

View File

@ -23,16 +23,13 @@
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.genotype.vcf;
package org.broadinstitute.sting.utils.vcf;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.Utils;
import org.apache.log4j.Logger;
import java.util.*;
@ -108,7 +105,6 @@ public class VCFUtils {
public static Set<VCFHeaderLine> smartMergeHeaders(Collection<VCFHeader> headers, Logger logger) throws IllegalStateException {
HashMap<String, VCFHeaderLine> map = new HashMap<String, VCFHeaderLine>(); // from KEY.NAME -> line
HashSet<VCFHeaderLine> lines = new HashSet<VCFHeaderLine>();
// todo -- needs to remove all version headers from sources and add its own VCF version line
for ( VCFHeader source : headers ) {
@ -117,7 +113,7 @@ public class VCFUtils {
String key = line.getKey();
if ( line instanceof VCFNamedHeaderLine)
key = key + "." + ((VCFNamedHeaderLine) line).getName();
key = key + "" + ((VCFNamedHeaderLine) line).getName();
if ( map.containsKey(key) ) {
VCFHeaderLine other = map.get(key);

View File

@ -4,7 +4,7 @@ import org.broad.tribble.vcf.VCFCodec;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.genotype.vcf.VCFHeaderUnitTest;
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import org.junit.Assert;
import org.junit.Test;