1. Refactoring of GenoypeWriters so that parallelization now works again with VCF4.0. We now have just a single reference to the old VCF classes, and that one will be purged soon.

2. Moved Jared's VCFTool code into archive so that everything would compile.
3. Added the vcf reference base (needed for indels) as an attribute to the VariantContext from the reader.
4. TribbleRMDTrackBuilderUnitTest was complaining that a validation file didn'r exist, so I commented it out.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3835 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-07-20 06:16:45 +00:00
parent 70b07206a2
commit f742980864
34 changed files with 205 additions and 165 deletions

View File

@ -53,7 +53,9 @@ public class VCFHeader {
* @param genotypeSampleNames the genotype format field, and the sample names
*/
public VCFHeader(Set<VCFHeaderLine> metaData, Set<String> genotypeSampleNames) {
mMetaData = new TreeSet<VCFHeaderLine>(metaData);
mMetaData = new TreeSet<VCFHeaderLine>();
if ( metaData != null )
mMetaData.addAll(metaData);
for (String col : genotypeSampleNames) {
if (!col.equals("FORMAT"))
mGenotypeSampleNames.add(col);

View File

@ -83,7 +83,7 @@ public class VCFHeaderLine implements Comparable {
/**
* Should be overloaded in sub classes to do subclass specific
*
* @return
* @return the string encoding
*/
protected String toStringEncoding() {
return mKey + "=" + mValue;
@ -99,6 +99,13 @@ public class VCFHeaderLine implements Comparable {
return toString().compareTo(other.toString());
}
/**
* @param line the line
* @return true if the line is a VCF meta data line, or false if it is not
*/
public static boolean isHeaderLine(String line) {
return line != null && line.length() > 0 && VCFHeader.HEADER_INDICATOR.equals(line.substring(0,1));
}
/**
* create a string of a mapping pair for the target VCF version

View File

@ -163,6 +163,7 @@ import java.util.*;
public class VariantContext implements Feature { // to enable tribble intergration
protected InferredGeneticContext commonInfo = null;
public final static double NO_NEG_LOG_10PERROR = InferredGeneticContext.NO_NEG_LOG_10PERROR;
public final static String REFERENCE_BASE_FOR_INDEL_KEY = "REFERENCE_BASE_FOR_INDEL";
/** The location of this VariantContext */
private GenomeLoc loc;
@ -913,6 +914,16 @@ public class VariantContext implements Feature { // to enable tribble intergrati
//
// ---------------------------------------------------------------------------------------------------------
// the indel base that gets stripped off for indels
public boolean hasReferenceBaseForIndel() {
return hasAttribute(REFERENCE_BASE_FOR_INDEL_KEY);
}
// the indel base that gets stripped off for indels
public byte getReferenceBaseForIndel() {
return hasReferenceBaseForIndel() ? (Byte)getAttribute(REFERENCE_BASE_FOR_INDEL_KEY) : (byte)'N';
}
private void determineType() {
if ( type == null ) {
switch ( getNAlleles() ) {

View File

@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broad.tribble.vcf.VCFHeader;
/**
* Provides temporary storage for GenotypeWriters.
@ -71,11 +72,11 @@ public abstract class GenotypeWriterStorage<T extends GenotypeWriter> implements
this.stream = null;
writer = GenotypeWriterFactory.create(stub.getFormat(), file);
Set<String> samples = SampleUtils.getSAMFileSamples(stub.getSAMFileHeader());
GenotypeWriterFactory.writeHeader(writer, stub.getSAMFileHeader(), samples, null);
GenotypeWriterFactory.writeHeader(writer, stub.getSAMFileHeader(), new VCFHeader(null, samples));
}
public void addCall(VariantContext vc, String refAllele) {
writer.addCall(vc,refAllele);
public void addCall(VariantContext vc, byte ref) {
writer.addCall(vc, ref);
}
public void close() {

View File

@ -1,13 +1,10 @@
package org.broadinstitute.sting.gatk.io.storage;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFRecord;
import org.broad.tribble.vcf.VCFHeader;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriter;
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
import org.broadinstitute.sting.utils.genotype.vcf.VCFReader;
import java.io.File;
import java.util.Set;
/**
* Provides temporary and permanent storage for genotypes in VCF format.
@ -36,19 +33,18 @@ public class VCFGenotypeWriterStorage extends GenotypeWriterStorage<VCFGenotypeW
/**
* initialize this VCF header
*
* @param sampleNames the sample names
* @param headerInfo the optional header fields
* @param header the header
*/
public void writeHeader(Set<String> sampleNames, Set<VCFHeaderLine> headerInfo) {
((VCFGenotypeWriter)writer).writeHeader(sampleNames,headerInfo);
public void writeHeader(VCFHeader header) {
((VCFGenotypeWriter)writer).writeHeader(header);
}
/**
* Add a given VCF record to the given output.
* @param vcfRecord Record to add.
* Add a given VCF file to the writer.
* @param file file from which to add records
*/
public void addRecord(VCFRecord vcfRecord) {
((VCFGenotypeWriter)writer).addRecord(vcfRecord);
public void append(File file) {
((VCFGenotypeWriter)writer).append(file);
}
/**
@ -56,11 +52,7 @@ public class VCFGenotypeWriterStorage extends GenotypeWriterStorage<VCFGenotypeW
* @param target Target stream for the temporary storage. May not be null.
*/
public void mergeInto(VCFGenotypeWriter target) {
// make sure we pass false to the reader, so that it doesn't create an index on disk
VCFReader reader = new VCFReader(file,false);
while ( reader.hasNext() )
target.addRecord(reader.next());
reader.close();
file.delete();
target.append(file);
file.delete();
}
}

View File

@ -129,8 +129,8 @@ public abstract class GenotypeWriterStub<T extends GenotypeWriter> implements St
/**
* @{inheritDoc}
*/
public void addCall(VariantContext vc, String refAllele) {
outputTracker.getStorage(this).addCall(vc,refAllele);
public void addCall(VariantContext vc, byte ref) {
outputTracker.getStorage(this).addCall(vc,ref);
}
/**

View File

@ -1,14 +1,12 @@
package org.broadinstitute.sting.gatk.io.stubs;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFRecord;
import org.broad.tribble.vcf.VCFHeader;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriter;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import java.io.File;
import java.io.PrintStream;
import java.util.Set;
/**
* Stub providing a passthrough for VCF files.
@ -46,18 +44,17 @@ public class VCFGenotypeWriterStub extends GenotypeWriterStub<VCFGenotypeWriter>
/**
* initialize this VCF header
*
* @param sampleNames the sample names
* @param headerInfo the optional header fields
* @param header the header
*/
public void writeHeader(Set<String> sampleNames, Set<VCFHeaderLine> headerInfo) {
outputTracker.getStorage(this).writeHeader(sampleNames,headerInfo);
public void writeHeader(VCFHeader header) {
outputTracker.getStorage(this).writeHeader(header);
}
/**
* Add a given VCF record to the given output.
* @param vcfRecord Record to add.
* Add a given VCF file to the writer.
* @param file file from which to add records
*/
public void addRecord(VCFRecord vcfRecord) {
outputTracker.getStorage(this).addRecord(vcfRecord);
public void append(File file) {
outputTracker.getStorage(this).append(file);
}
}

View File

@ -417,6 +417,9 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec {
Set<String> filters = parseFilters(filter);
Map<String, Object> attributes = parseInfo(info, id);
// set the reference base for indels in the attributes
attributes.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, new Byte((byte)ref.charAt(0)));
// find out our current location, and clip the alleles down to their minimum length
Pair<GenomeLoc, List<Allele>> locAndAlleles;
if ( !isSingleNucleotideEvent(alleles) ) {

View File

@ -40,6 +40,7 @@ import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFHeader;
import java.util.*;
@ -108,7 +109,7 @@ public class BatchedCallsMerger extends LocusWalker<VariantContext, Integer> imp
UG_engine.samples = samples;
// initialize the header
GenotypeWriterFactory.writeHeader(writer, GenomeAnalysisEngine.instance.getSAMFileHeader(), samples, headerLines);
GenotypeWriterFactory.writeHeader(writer, GenomeAnalysisEngine.instance.getSAMFileHeader(), new VCFHeader(headerLines, samples));
}
public VariantContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
@ -175,7 +176,7 @@ public class BatchedCallsMerger extends LocusWalker<VariantContext, Integer> imp
return sum;
try {
writer.addCall(value, new String(value.getReference().getBases()));
writer.addCall(value, value.getReference().getBases()[0]);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(e.getMessage() + "; this is often caused by using the --assume_single_sample_reads argument with the wrong sample name");
}

View File

@ -123,7 +123,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
}
// initialize the header
GenotypeWriterFactory.writeHeader(writer, GenomeAnalysisEngine.instance.getSAMFileHeader(), UG_engine.samples, getHeaderInfo());
GenotypeWriterFactory.writeHeader(writer, GenomeAnalysisEngine.instance.getSAMFileHeader(), new VCFHeader(getHeaderInfo(), UG_engine.samples)) ;
}
private Set<VCFHeaderLine> getHeaderInfo() {
@ -211,7 +211,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
try {
// we are actually making a call
sum.nCallsMade++;
writer.addCall(value.vc, value.refAllele);
writer.addCall(value.vc, value.refBase);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(e.getMessage() + "; this is often caused by using the --assume_single_sample_reads argument with the wrong sample name");
}

View File

@ -217,7 +217,7 @@ public class UnifiedGenotyperEngine {
}
}
if ( call != null ) call.setRefAllele(ref);
if ( call != null ) call.setRefBase(ref);
return call;
}

View File

@ -12,7 +12,7 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
*/
public class VariantCallContext {
public VariantContext vc = null;
public String refAllele = null;
public byte refBase;
// Was the site called confidently, either reference or variant?
public boolean confidentlyCalled = false;
@ -22,9 +22,9 @@ public class VariantCallContext {
this.confidentlyCalled = confidentlyCalledP;
}
VariantCallContext(VariantContext vc, String refAllele, boolean confidentlyCalledP) {
VariantCallContext(VariantContext vc, byte ref, boolean confidentlyCalledP) {
this.vc = vc;
this.refAllele = refAllele;
this.refBase = ref;
this.confidentlyCalled = confidentlyCalledP;
}
@ -33,7 +33,7 @@ public class VariantCallContext {
this.confidentlyCalled = confidentlyCalledP;
}
public void setRefAllele(byte refAllele) {
this.refAllele = new String(new byte[]{refAllele});
public void setRefBase(byte ref) {
this.refBase = ref;
}
}

View File

@ -39,10 +39,9 @@ public interface GenotypeWriter {
/**
* Add a genotype, given a variant context
* @param vc the variant context representing the call to add
* @param refAllele witers are allowed to ignore it; however it is required for VCF writers, as the
* VCF format explicitly requires (previous) ref base for an indel. Currently, refAllele is expected to hold a single character
* @param refBase This is required for VCF writers, as the VCF format explicitly requires (previous) ref base for an indel.
*/
public void addCall(VariantContext vc, String refAllele);
public void addCall(VariantContext vc, byte refBase);
/** finish writing, closing any open files. */
public void close();

View File

@ -1,9 +1,7 @@
package org.broadinstitute.sting.utils.genotype;
import net.sf.samtools.SAMFileHeader;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFGenotypeRecord;
import org.broad.tribble.vcf.VCFHeaderVersion;
import org.broad.tribble.vcf.VCFHeader;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.geli.*;
import org.broadinstitute.sting.utils.genotype.glf.*;
@ -11,8 +9,6 @@ import org.broadinstitute.sting.utils.genotype.vcf.*;
import java.io.File;
import java.io.PrintStream;
import java.util.Set;
import java.util.HashSet;
/**
@ -64,13 +60,10 @@ public class GenotypeWriterFactory {
public static void writeHeader(GenotypeWriter writer,
SAMFileHeader header,
Set<String> sampleNames,
Set<VCFHeaderLine> headerInfo) {
VCFHeader vcfHeader) {
// VCF
if ( writer instanceof VCFGenotypeWriter ) {
if ( headerInfo == null )
headerInfo = new HashSet<VCFHeaderLine>(VCFGenotypeRecord.getSupportedHeaderStrings(VCFHeaderVersion.VCF3_3));
((VCFGenotypeWriter)writer).writeHeader(sampleNames, headerInfo);
((VCFGenotypeWriter)writer).writeHeader(vcfHeader);
}
// GELI
else if ( writer instanceof GeliGenotypeWriter ) {

View File

@ -107,9 +107,9 @@ public class GeliAdapter implements GeliGenotypeWriter {
* Add a genotype, given a variant context
*
* @param vc the variant context representing the call to add
* @param refAllele not used by this writer
* @param refBase not used by this writer
*/
public void addCall(VariantContext vc, String refAllele) {
public void addCall(VariantContext vc, byte refBase) {
if ( writer == null )
throw new IllegalStateException("The Geli Header must be written before calls can be added");

View File

@ -68,9 +68,9 @@ public class GeliTextWriter implements GeliGenotypeWriter {
* Add a genotype, given a variant context
*
* @param vc the variant context representing the call to add
* @param refAllele required by the inteface; not used by this writer.
* @param refBase required by the inteface; not used by this writer.
*/
public void addCall(VariantContext vc, String refAllele) {
public void addCall(VariantContext vc, byte refBase) {
char ref = vc.getReference().toString().charAt(0);

View File

@ -148,9 +148,9 @@ public class GLFWriter implements GLFGenotypeWriter {
* Add a genotype, given a variant context
*
* @param vc the variant context representing the call to add
* @param refAllele not used by this writer
* @param refBase not used by this writer
*/
public void addCall(VariantContext vc, String refAllele) {
public void addCall(VariantContext vc, byte refBase) {
if ( headerText == null )
throw new IllegalStateException("The GLF Header must be written before calls can be added");

View File

@ -1,10 +1,9 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFRecord;
import org.broad.tribble.vcf.VCFHeader;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import java.util.Set;
import java.io.File;
/**
* An extension of the GenotypeWriter interface with support
@ -17,14 +16,14 @@ public interface VCFGenotypeWriter extends GenotypeWriter {
/**
* initialize this VCF header
*
* @param sampleNames the sample names
* @param headerInfo the optional header fields
* @param header the header
*/
public void writeHeader(Set<String> sampleNames, Set<VCFHeaderLine> headerInfo);
public void writeHeader(VCFHeader header);
/**
* Add a given VCF record to the given output.
* @param vcfRecord Record to add.
* Add a given VCF file to the writer.
* @param file file from which to add records
*/
public void addRecord(VCFRecord vcfRecord);
public void append(File file);
}

View File

@ -1,96 +1,127 @@
/*
* Copyright (c) 2010.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
import org.broadinstitute.sting.utils.StingException;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFFormatHeaderLine;
import org.broad.tribble.vcf.VCFCodec;
import org.broad.tribble.source.BasicFeatureSource;
import org.broad.tribble.index.Index;
import java.io.File;
import java.io.OutputStream;
import java.util.*;
import java.io.*;
import java.util.HashSet;
import java.util.Iterator;
/**
* @author aaron, ebanks
* @author ebanks
* <p/>
* Class VCFGenotypeWriterAdapter
* <p/>
* Adapt the VCF writter to the genotype output system
*/
public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter {
// our VCF objects
private VCFWriter mWriter = null;
private VCFHeader mHeader = null;
private final Set<String> mSampleNames = new LinkedHashSet<String>();
// our log, which we want to capture anything from this class
protected static Logger logger = Logger.getLogger(VCFGenotypeWriterAdapter.class);
public class VCFGenotypeWriterAdapter extends VCFWriter implements VCFGenotypeWriter {
// allowed genotype format strings
private Set<String> allowedGenotypeFormatStrings = null;
private HashSet<String> allowedGenotypeFormatKeys = null;
public VCFGenotypeWriterAdapter(File writeTo) {
if (writeTo == null) throw new RuntimeException("VCF output file must not be null");
mWriter = new VCFWriter(writeTo);
super(writeTo);
}
public VCFGenotypeWriterAdapter(OutputStream writeTo) {
if (writeTo == null) throw new RuntimeException("VCF output stream must not be null");
mWriter = new VCFWriter(writeTo);
super(writeTo);
}
public void addRecord(VCFRecord vcfRecord) {
mWriter.addRecord(vcfRecord);
public void addCall(VariantContext vc, byte ref) {
vc = VariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatKeys);
add(vc, new byte[]{ref});
}
/**
* initialize this VCF header
*
* @param sampleNames the sample names
* @param headerInfo the optional header fields
*/
public void writeHeader(Set<String> sampleNames, Set<VCFHeaderLine> headerInfo) {
mSampleNames.addAll(sampleNames);
// set up the header fields
Set<VCFHeaderLine> hInfo = new TreeSet<VCFHeaderLine>();
hInfo.add(new VCFHeaderLine(VCFHeaderVersion.VCF3_3.getFormatString(), VCFHeaderVersion.VCF3_3.getVersionString()));
// set up the allowed genotype format fields
if ( headerInfo != null ) {
for ( VCFHeaderLine field : headerInfo ) {
hInfo.add(field);
if ( field instanceof VCFFormatHeaderLine) {
if ( allowedGenotypeFormatStrings == null )
allowedGenotypeFormatStrings = new HashSet<String>();
allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine)field).getName());
}
public void writeHeader(VCFHeader header) {
for ( VCFHeaderLine field : header.getMetaData() ) {
if ( field instanceof VCFFormatHeaderLine ) {
if ( allowedGenotypeFormatKeys == null )
allowedGenotypeFormatKeys = new HashSet<String>();
allowedGenotypeFormatKeys.add(((VCFFormatHeaderLine)field).getName());
}
}
// set up the sample names
mHeader = new VCFHeader(hInfo, mSampleNames);
mWriter.writeHeader(mHeader);
super.writeHeader(header);
}
/** finish writing, closing any open files. */
public void close() {
mWriter.close();
public void append(File file) {
// if we don't need to restrict the FORMAT fields, then read blindly
if ( allowedGenotypeFormatKeys == null )
blindAppend(file);
else
smartAppend(file);
}
/**
* Add a genotype, given a variant context
*
* @param vc the variant context representing the call to add
* @param refAllele currently has to be a single character representing the reference base (the base
* immediately preceding the event in case of indels)
*/
public void addCall(VariantContext vc, String refAllele) {
if ( mHeader == null )
throw new IllegalStateException("The VCF Header must be written before records can be added");
private void blindAppend(File file) {
try {
BufferedReader reader = new BufferedReader(new FileReader(file));
String line = reader.readLine();
while ( line != null ) {
if ( !VCFHeaderLine.isHeaderLine(line) ) {
mWriter.write(line);
mWriter.write("\n");
}
line = reader.readLine();
}
reader.close();
} catch (IOException e) {
throw new StingException("Error reading file " + file + " in VCFGenotypeWriter: ", e);
}
}
private void smartAppend(File file) {
try {
VCFCodec codec = new VCFCodec();
Index index = TribbleRMDTrackBuilder.loadIndex(file, codec, false);
BasicFeatureSource<VariantContext> vcfReader = new BasicFeatureSource(file.getAbsolutePath(),index,codec);
Iterator<VariantContext> iterator = vcfReader.iterator();
while ( iterator.hasNext() ) {
VariantContext vc = iterator.next();
vc = VariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatKeys);
add(vc, new byte[]{vc.getReferenceBaseForIndel()});
}
vcfReader.close();
} catch (FileNotFoundException e) {
throw new StingException("Error reading file " + file + " in VCFGenotypeWriter: ", e);
} catch (IOException e) {
throw new StingException("Error reading file " + file + " in VCFGenotypeWriter: ", e);
}
vc = VariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings);
mWriter.add(vc, new byte[]{(byte)refAllele.charAt(0)});
}
}

View File

@ -10,19 +10,19 @@ import org.broad.tribble.index.Index;
import org.broad.tribble.source.BasicFeatureSource;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.StingException;
/** The VCFReader class, which given a valid vcf file, parses out the header and VCF records */
public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
/** The VCFReader class, which given a valid vcf file, parses out the header and VariantContexts */
public class VCFReader implements Iterator<VariantContext>, Iterable<VariantContext> {
// our VCF header
private VCFHeader mHeader;
// our iterator
private Iterator<VCFRecord> iterator;
private Iterator<VariantContext> iterator;
// our feature reader; so we can close it
private FeatureSource<VCFRecord> vcfReader = null;
private FeatureSource<VariantContext> vcfReader = null;
/**
* Create a VCF reader, given a VCF file
@ -37,6 +37,7 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
* Create a VCF reader, given a VCF file
*
* @param vcfFile the vcf file to write
* @param createIndexOnDisk do we need to create an index on disk?
*/
public VCFReader(File vcfFile, boolean createIndexOnDisk) {
initialize(vcfFile, null, createIndexOnDisk);
@ -46,6 +47,7 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
* Create a VCF reader, given a VCF file
*
* @param vcfFile the vcf file to write
* @param transform the line transformer to use, if any
*/
public VCFReader(File vcfFile, VCFCodec.LineTransform transform) {
initialize(vcfFile, transform, true);
@ -79,7 +81,7 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
* @return an instance of an index
*/
private Index createIndex(File vcfFile, boolean createIndexOnDisk) {
Index index = null;
Index index;
try {
index = TribbleRMDTrackBuilder.loadIndex(vcfFile, new VCFCodec(), createIndexOnDisk);
} catch (IOException e) {
@ -96,11 +98,11 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
}
/**
* return the next available VCF record. Make sure to check availability with a call to hasNext!
* return the next available VariantContext. Make sure to check availability with a call to hasNext!
*
* @return a VCFRecord, representing the next record in the file
* @return a VariantContext, representing the next record in the file
*/
public VCFRecord next() {
public VariantContext next() {
return iterator.next();
}
@ -116,7 +118,7 @@ public class VCFReader implements Iterator<VCFRecord>, Iterable<VCFRecord> {
return this.mHeader;
}
public Iterator<VCFRecord> iterator() {
public Iterator<VariantContext> iterator() {
return this;
}

View File

@ -21,13 +21,13 @@ import java.util.*;
public class VCFWriter {
// the VCF header we're storing
private VCFHeader mHeader = null;
protected VCFHeader mHeader = null;
// the print stream we're writting to
private BufferedWriter mWriter;
protected BufferedWriter mWriter;
// were filters applied?
private boolean filtersWereAppliedToContext = false;
protected boolean filtersWereAppliedToContext = false;
/**
* create a VCF writer, given a file to write to
@ -164,7 +164,7 @@ public class VCFWriter {
mWriter.write(VCFConstants.FIELD_SEPARATOR);
// ALT
if ( vc.getAlternateAlleles().size() > 0 ) {
if ( vc.isVariant() ) {
Allele altAllele = vc.getAlternateAllele(0);
alleleMap.put(altAllele, "1");
String alt = makeAlleleString(altAllele, vc.isIndel(), refBases[0]);
@ -198,7 +198,7 @@ public class VCFWriter {
Map<String, String> infoFields = new TreeMap<String, String>();
for ( Map.Entry<String, Object> field : vc.getAttributes().entrySet() ) {
String key = field.getKey();
if ( key.equals("ID") )
if ( key.equals("ID") || key.equals(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY) )
continue;
String outputValue = formatVCFField(field.getValue());

View File

@ -18,11 +18,11 @@ public class VariantContextIntegrationTest extends WalkerTest {
static HashMap<String, String> expectations = new HashMap<String, String>();
static {
expectations.put("-L 1:1-10000 --printPerLocus", "63fd69e4ab430b79fb213dd27b58ae1c");
expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly", "276ed96efaaffc2fc1c3b3deb4e04d1d");
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsStartinAtCurrentPosition", "a37f7bc34c1824688d3e475945c19d5a");
expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", "1715a6e0daf873f2e2cd10cb56085174");
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType SNP", "bf33ab1ed65da7f56c02ca7956d9c31e");
expectations.put("-L 1:1-10000 --printPerLocus", "cdd4cb53670a6c0f26e21bc220579654");
expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly", "7e1d9e181cc489aff847528664cf35bf");
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsStartinAtCurrentPosition", "2226dc7ec9a21d8f0e86dc1b934b1d3e");
expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", "272a9dff802d1d9f391ad53bc8c23da8");
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType SNP", "7444609b931d10cfc1fdccca9b4a2ab5");
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL", "629ffd6b3b9ea1bce29cb715576f5c8a");
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL --onlyContextsStartinAtCurrentPosition", "d4b812b2fec231f8f5b61d6f26cf86a5");
expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType MIXED", "546e8e546f2cdfba31f91ed083137c42");

View File

@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.refdata;
import edu.mit.broad.picard.genotype.geli.GeliFileReader;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.util.CloseableIterator;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broad.tribble.gelitext.GeliTextCodec;
@ -15,8 +14,6 @@ import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter;
import org.broadinstitute.sting.utils.genotype.glf.GLFSingleCall;
import org.broadinstitute.sting.utils.genotype.glf.GLFWriter;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
@ -81,7 +78,7 @@ public class VariantContextAdaptorsUnitTest extends BaseTest {
geliText = (GeliTextFeature)codec.decode(line);
records.add(geliText); // we know they're all single calls in the reference file
VariantContext vc = VariantContextAdaptors.toVariantContext("Geli",geliText, null);
if (vc != null) gw.addCall(vc,null);
if (vc != null) gw.addCall(vc,(byte)' ');
line = readLine(reader);
}
gw.close(); // close the file
@ -144,7 +141,7 @@ public class VariantContextAdaptorsUnitTest extends BaseTest {
rodGELI gel = new rodGELI("myROD",iterator.next());
records.add(gel);
VariantContext vc = VariantContextAdaptors.toVariantContext("myROD",gel, null);
if (vc != null) gw.addCall(vc,null);
if (vc != null) gw.addCall(vc,(byte)' ');
}
iterator.close();
gw.close(); // close the file

View File

@ -60,7 +60,7 @@ public class TribbleRMDTrackBuilderUnitTest extends BaseTest {
Assert.assertTrue(classes.size() > 0);
}
@Test
//@Test
public void testBuilderIndexUnwriteable() {
File vcfFile = new File(validationDataLocation + "/ROD_validation/relic.vcf");
try {

View File

@ -61,7 +61,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
//
// --------------------------------------------------------------------------------------------------------------
//@Test
@Test
public void testParallelization() {
String md5 = "fc5798b2ef700e60fa032951bab9607d";
@ -73,7 +73,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,075,000 -nt 2", 1,
Arrays.asList(md5));
executeTest("test parallelization (multithread)", spec2);
executeTest("test parallelization (2 threads)", spec2);
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,075,000 -nt 4", 1,
Arrays.asList(md5));
executeTest("test parallelization (4 threads)", spec3);
}
// --------------------------------------------------------------------------------------------------------------