Unit, integration, and performance tests are all busted, so this is a good time to make a big commit...

Major cleanup of the genotype writer code from the calling end.  UG no longer supports making calls in anything but VCF, and that allows us to use the VCFWriter more generically now.  Putting the ball in Matt's court to finish collapsing everything.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3996 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-08-10 04:18:29 +00:00
parent 0f78f70ed4
commit ca5b274f16
28 changed files with 128 additions and 1212 deletions

View File

@ -1,67 +0,0 @@
package org.broadinstitute.sting.gatk.io.storage;
import org.broadinstitute.sting.utils.genotype.glf.GLFReader;
import org.broadinstitute.sting.utils.genotype.glf.GLFRecord;
import org.broadinstitute.sting.utils.genotype.glf.GLFGenotypeWriter;
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
import java.io.File;
/**
* Provides temporary and permanent storage for genotypes in GLF format.
*
* @author mhanna
* @version 0.1
*/
public class GLFGenotypeWriterStorage extends GenotypeWriterStorage<GLFGenotypeWriter> implements GLFGenotypeWriter {
/**
* Creates new (permanent) storage for GLF genotype writers.
* @param stub Stub containing appropriate input parameters.
*/
public GLFGenotypeWriterStorage(GenotypeWriterStub stub) {
super(stub);
}
/**
* Creates new (temporary) storage for GLF genotype writers.
* @param stub Stub containing appropriate input parameters.
* @param target Target file for output data.
*/
public GLFGenotypeWriterStorage(GenotypeWriterStub stub, File target) {
super(stub,target);
}
/**
* Write the geli header to the target file.
* @param headerText The header to write.
*/
public void writeHeader(String headerText) {
((GLFGenotypeWriter)writer).writeHeader(headerText);
}
/**
* add a GLF record to the output file
*
* @param contigName the contig name
* @param contigLength the contig length
* @param rec the GLF record to write.
*/
public void addGLFRecord(String contigName, int contigLength, GLFRecord rec) {
((GLFGenotypeWriter)writer).addGLFRecord(contigName,contigLength,rec);
}
/**
* Merges the stream backing up this temporary storage into the target.
* @param target Target stream for the temporary storage. May not be null.
*/
public void mergeInto(GLFGenotypeWriter target) {
GLFReader reader = new GLFReader(file);
while ( reader.hasNext() ) {
GLFRecord rec = reader.next();
target.addGLFRecord(rec.getContig(),(int)rec.getPosition(),rec);
}
reader.close();
file.delete();
}
}

View File

@ -1,65 +0,0 @@
package org.broadinstitute.sting.gatk.io.storage;
import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter;
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
import java.io.File;
import edu.mit.broad.picard.genotype.geli.GeliFileReader;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
import net.sf.samtools.SAMFileHeader;
/**
* Provides temporary and permanent storage for genotypes in Geli binary format.
*
* @author mhanna
* @version 0.1
*/
public class GeliBinaryGenotypeWriterStorage extends GenotypeWriterStorage<GeliGenotypeWriter> implements GeliGenotypeWriter {
/**
* Creates new (permanent) storage for geli binary genotype writers.
* @param stub Stub containing appropriate input parameters.
*/
public GeliBinaryGenotypeWriterStorage(GenotypeWriterStub stub) {
super(stub);
}
/**
* Creates new (temporary) storage for geli binary genotype writers.
* @param stub Stub containing appropriate input parameters.
* @param target Target file for output data.
*/
public GeliBinaryGenotypeWriterStorage(GenotypeWriterStub stub, File target) {
super(stub,target);
}
/**
* Write the geli header to the target file.
* @param header The header to write.
*/
public void writeHeader(SAMFileHeader header) {
((GeliGenotypeWriter)writer).writeHeader(header);
}
/**
* Writes the genotype likelihoods to the output.
* @param gl genotype likelihoods to write.
*/
public void addGenotypeLikelihoods(GenotypeLikelihoods gl) {
((GeliGenotypeWriter)writer).addGenotypeLikelihoods(gl);
}
/**
* Merges the stream backing up this temporary storage into the target.
* @param target Target stream for the temporary storage. May not be null.
*/
public void mergeInto(GeliGenotypeWriter target) {
GeliFileReader reader = new GeliFileReader(file);
while ( reader.hasNext() )
target.addGenotypeLikelihoods(reader.next());
reader.close();
file.delete();
}
}

View File

@ -1,64 +0,0 @@
package org.broadinstitute.sting.gatk.io.storage;
import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter;
import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub;
import java.io.File;
import edu.mit.broad.picard.genotype.geli.GeliFileReader;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
import net.sf.samtools.SAMFileHeader;
/**
* Provides temporary and permanent storage for genotypes in Geli text format.
*
* @author mhanna
* @version 0.1
*/
public class GeliTextGenotypeWriterStorage extends GenotypeWriterStorage<GeliGenotypeWriter> implements GeliGenotypeWriter {
/**
* Creates new (permanent) storage for geli text genotype writers.
* @param stub Stub containing appropriate input parameters.
*/
public GeliTextGenotypeWriterStorage(GenotypeWriterStub stub) {
super(stub);
}
/**
* Creates new (temporary) storage for geli text genotype writers.
* @param stub Stub containing appropriate input parameters.
* @param target Target file for output data.
*/
public GeliTextGenotypeWriterStorage(GenotypeWriterStub stub, File target) {
super(stub,target);
}
/**
* Write the geli header to the target file.
* @param header The header to write.
*/
public void writeHeader(SAMFileHeader header) {
((GeliGenotypeWriter)writer).writeHeader(header);
}
/**
* Writes the genotype likelihoods to the output.
* @param gl genotype likelihoods to write.
*/
public void addGenotypeLikelihoods(GenotypeLikelihoods gl) {
((GeliGenotypeWriter)writer).addGenotypeLikelihoods(gl);
}
/**
* Merges the stream backing up this temporary storage into the target.
* @param target Target stream for the temporary storage. May not be null.
*/
public void mergeInto(GeliGenotypeWriter target) {
GeliFileReader reader = new GeliFileReader(file);
while ( reader.hasNext() )
target.addGenotypeLikelihoods(reader.next());
reader.close();
file.delete();
}
}

View File

@ -55,9 +55,9 @@ public abstract class GenotypeWriterStorage<T extends GenotypeWriter> implements
this.file = stub.getFile();
this.stream = stub.getOutputStream();
if(file != null)
writer = GenotypeWriterFactory.create(stub.getFormat(), file);
writer = GenotypeWriterFactory.create(file);
else if(stream != null)
writer = GenotypeWriterFactory.create(stub.getFormat(), stream);
writer = GenotypeWriterFactory.create(stream);
else
throw new StingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");
}
@ -70,13 +70,13 @@ public abstract class GenotypeWriterStorage<T extends GenotypeWriter> implements
public GenotypeWriterStorage( GenotypeWriterStub stub, File file ) {
this.file = file;
this.stream = null;
writer = GenotypeWriterFactory.create(stub.getFormat(), file);
writer = GenotypeWriterFactory.create(file);
Set<String> samples = SampleUtils.getSAMFileSamples(stub.getSAMFileHeader());
GenotypeWriterFactory.writeHeader(writer, stub.getSAMFileHeader(), new VCFHeader(null, samples));
GenotypeWriterFactory.writeHeader(writer, new VCFHeader(null, samples));
}
public void addCall(VariantContext vc, byte ref) {
writer.addCall(vc, ref);
public void add(VariantContext vc, byte ref) {
writer.add(vc, ref);
}
public void close() {

View File

@ -63,7 +63,7 @@ public class StorageFactory {
* @return Storage object with a facade of type T.
*/
public static <T> Storage<T> createStorage( Stub<T> stub, File file ) {
Storage storage = null;
Storage storage;
if(stub instanceof OutputStreamStub) {
if( file != null )
@ -79,42 +79,10 @@ public class StorageFactory {
}
else if(stub instanceof GenotypeWriterStub) {
GenotypeWriterStub genotypeWriterStub = (GenotypeWriterStub)stub;
if( file != null ) {
switch(genotypeWriterStub.getFormat()) {
case GELI:
storage = new GeliTextGenotypeWriterStorage(genotypeWriterStub,file);
break;
case GELI_BINARY:
storage = new GeliBinaryGenotypeWriterStorage(genotypeWriterStub,file);
break;
case GLF:
storage = new GLFGenotypeWriterStorage(genotypeWriterStub,file);
break;
case VCF:
storage = new VCFGenotypeWriterStorage(genotypeWriterStub,file);
break;
default:
throw new StingException("Unsupported genotype file format: " + genotypeWriterStub.getFormat());
}
}
else {
switch(genotypeWriterStub.getFormat()) {
case GELI:
storage = new GeliTextGenotypeWriterStorage(genotypeWriterStub);
break;
case GELI_BINARY:
storage = new GeliBinaryGenotypeWriterStorage(genotypeWriterStub);
break;
case GLF:
storage = new GLFGenotypeWriterStorage(genotypeWriterStub);
break;
case VCF:
storage = new VCFGenotypeWriterStorage(genotypeWriterStub);
break;
default:
throw new StingException("Unsupported genotype file format: " + genotypeWriterStub.getFormat());
}
}
if( file != null )
storage = new VCFGenotypeWriterStorage(genotypeWriterStub,file);
else
storage = new VCFGenotypeWriterStorage(genotypeWriterStub);
}
else
throw new StingException("Unsupported stub type: " + stub.getClass().getName());

View File

@ -1,63 +0,0 @@
package org.broadinstitute.sting.gatk.io.stubs;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.broadinstitute.sting.utils.genotype.glf.GLFGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.glf.GLFRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import java.io.File;
import java.io.PrintStream;
/**
* Stub providing a passthrough for GLF files.
*
* @author mhanna
* @version 0.1
*/
public class GLFGenotypeWriterStub extends GenotypeWriterStub<GLFGenotypeWriter> implements GLFGenotypeWriter {
/**
* Construct a new stub with the given engine and target file.
* @param engine The engine, for extracting command-line arguments, etc.
* @param genotypeFile Target file into which to write genotyping data.
*/
public GLFGenotypeWriterStub(GenomeAnalysisEngine engine, File genotypeFile) {
super(engine,genotypeFile);
}
/**
* Construct a new stub with the given engine and target stream.
* @param engine The engine, for extracting command-line arguments, etc.
* @param genotypeStream Target stream into which to write genotyping data.
*/
public GLFGenotypeWriterStub(GenomeAnalysisEngine engine, PrintStream genotypeStream) {
super(engine,genotypeStream);
}
/**
* Gets the format of this stub. We may want to discontinue use of this method and rely on instanceof comparisons.
* @return GLF always.
*/
public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() {
return GenotypeWriterFactory.GENOTYPE_FORMAT.GLF;
}
/**
* Write the GLF header to the target file.
* @param headerText The header to write.
*/
public void writeHeader(String headerText) {
outputTracker.getStorage(this).writeHeader(headerText);
}
/**
* add a GLF record to the output file
*
* @param contigName the contig name
* @param contigLength the contig length
* @param rec the GLF record to write.
*/
public void addGLFRecord(String contigName, int contigLength, GLFRecord rec) {
outputTracker.getStorage(this).addGLFRecord(contigName, contigLength, rec);
}
}

View File

@ -1,51 +0,0 @@
package org.broadinstitute.sting.gatk.io.stubs;
import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import net.sf.samtools.SAMFileHeader;
import java.io.File;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
/**
* Stub providing a passthrough for geli binary files.
*
* @author mhanna
* @version 0.1
*/
public class GeliBinaryGenotypeWriterStub extends GenotypeWriterStub<GeliGenotypeWriter> implements GeliGenotypeWriter {
/**
* Construct a new stub with the given engine and target file.
* @param engine The engine, for extracting command-line arguments, etc.
* @param genotypeFile Target file into which to write genotyping data.
*/
public GeliBinaryGenotypeWriterStub(GenomeAnalysisEngine engine,File genotypeFile) {
super(engine,genotypeFile);
}
/**
* Gets the format of this stub. We may want to discontinue use of this method and rely on instanceof comparisons.
* @return GELI_BINARY always.
*/
public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() {
return GenotypeWriterFactory.GENOTYPE_FORMAT.GELI_BINARY;
}
/**
* Write the geli header to the target file.
* @param header The header to write.
*/
public void writeHeader(SAMFileHeader header) {
outputTracker.getStorage(this).writeHeader(header);
}
/**
* Writes the genotype likelihoods to the output.
* @param gl genotype likelihoods to write.
*/
public void addGenotypeLikelihoods(GenotypeLikelihoods gl) {
outputTracker.getStorage(this).addGenotypeLikelihoods(gl);
}
}

View File

@ -1,61 +0,0 @@
package org.broadinstitute.sting.gatk.io.stubs;
import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import java.io.File;
import java.io.PrintStream;
import net.sf.samtools.SAMFileHeader;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
/**
* Stub providing a passthrough for geli text files.
*
* @author mhanna
* @version 0.1
*/
public class GeliTextGenotypeWriterStub extends GenotypeWriterStub<GeliGenotypeWriter> implements GeliGenotypeWriter {
/**
* Construct a new stub with the given engine and target file.
* @param engine The engine, for extracting command-line arguments, etc.
* @param genotypeFile Target file into which to write genotyping data.
*/
public GeliTextGenotypeWriterStub(GenomeAnalysisEngine engine, File genotypeFile) {
super(engine,genotypeFile);
}
/**
* Construct a new stub with the given engine and target stream.
* @param engine The engine, for extracting command-line arguments, etc.
* @param genotypeStream Target stream into which to write genotyping data.
*/
public GeliTextGenotypeWriterStub(GenomeAnalysisEngine engine, PrintStream genotypeStream) {
super(engine,genotypeStream);
}
/**
* Gets the format of this stub. We may want to discontinue use of this method and rely on instanceof comparisons.
* @return GELI always.
*/
public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() {
return GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
}
/**
* Write the geli header to the target file.
* @param header The header to write.
*/
public void writeHeader(SAMFileHeader header) {
outputTracker.getStorage(this).writeHeader(header);
}
/**
* Writes the genotype likelihoods to the output.
* @param gl genotype likelihoods to write.
*/
public void addGenotypeLikelihoods(GenotypeLikelihoods gl) {
outputTracker.getStorage(this).addGenotypeLikelihoods(gl);
}
}

View File

@ -26,7 +26,6 @@
package org.broadinstitute.sting.gatk.io.stubs;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
@ -111,38 +110,8 @@ public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
String writerFileName = getArgumentValue(createGenotypeFileArgumentDefinition(source),matches);
File writerFile = writerFileName != null ? new File(writerFileName) : null;
// Get the format of the genotype object, if it exists.
String genotypeFormatText = getArgumentValue(createGenotypeFormatArgumentDefinition(source),matches);
GenotypeWriterFactory.GENOTYPE_FORMAT genotypeFormat = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
if(genotypeFormatText != null) {
try {
genotypeFormat = Enum.valueOf(GenotypeWriterFactory.GENOTYPE_FORMAT.class,genotypeFormatText);
}
catch(IllegalArgumentException ex) {
throw new StingException(String.format("Genotype format %s is invalid.",genotypeFormatText));
}
}
// Create a stub for the given object.
GenotypeWriterStub stub;
switch(genotypeFormat) {
case GELI:
stub = (writerFile != null) ? new GeliTextGenotypeWriterStub(engine, writerFile) : new GeliTextGenotypeWriterStub(engine,System.out);
break;
case GELI_BINARY:
if(writerFile == null)
throw new StingException("Geli binary files cannot be output to the console.");
stub = new GeliBinaryGenotypeWriterStub(engine, writerFile);
break;
case GLF:
stub = (writerFile != null) ? new GLFGenotypeWriterStub(engine, writerFile) : new GLFGenotypeWriterStub(engine,System.out);
break;
case VCF:
stub = (writerFile != null) ? new VCFGenotypeWriterStub(engine, writerFile) : new VCFGenotypeWriterStub(engine,System.out);
break;
default:
throw new StingException("Unable to create stub for file format " + genotypeFormat);
}
GenotypeWriterStub stub = (writerFile != null) ? new VCFGenotypeWriterStub(engine, writerFile) : new VCFGenotypeWriterStub(engine,System.out);
engine.addOutput(stub);

View File

@ -129,8 +129,8 @@ public abstract class GenotypeWriterStub<T extends GenotypeWriter> implements St
/**
* @{inheritDoc}
*/
public void addCall(VariantContext vc, byte ref) {
outputTracker.getStorage(this).addCall(vc,ref);
public void add(VariantContext vc, byte ref) {
outputTracker.getStorage(this).add(vc,ref);
}
/**

View File

@ -14,8 +14,6 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.CalledGenotype;
import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter;
import java.util.*;
@ -279,9 +277,9 @@ public class VariantContextAdaptors {
MutableGenotype call = new MutableGenotype(name, genotypeAlleles);
// set the likelihoods, depth, and RMS mapping quality values
call.putAttribute(CalledGenotype.POSTERIORS_ATTRIBUTE_KEY,geli.getLikelihoods());
call.putAttribute(GeliTextWriter.MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY,geli.getMaximumMappingQual());
call.putAttribute(GeliTextWriter.READ_COUNT_ATTRIBUTE_KEY,geli.getDepthOfCoverage());
//call.putAttribute(CalledGenotype.POSTERIORS_ATTRIBUTE_KEY,geli.getLikelihoods());
//call.putAttribute(GeliTextWriter.MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY,geli.getMaximumMappingQual());
//call.putAttribute(GeliTextWriter.READ_COUNT_ATTRIBUTE_KEY,geli.getDepthOfCoverage());
// add the call to the genotype list, and then use this list to create a VariantContext
genotypes.add(call);
@ -353,9 +351,9 @@ public class VariantContextAdaptors {
post[x] = Double.valueOf(geli.getLikelihood(DiploidGenotype.fromBases((byte) gTypes[x].charAt(0), (byte) gTypes[x].charAt(1))));
// set the likelihoods, depth, and RMS mapping quality values
call.putAttribute(CalledGenotype.POSTERIORS_ATTRIBUTE_KEY, post);
call.putAttribute(GeliTextWriter.MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY, (int) geli.getMaxMappingQuality());
call.putAttribute(GeliTextWriter.READ_COUNT_ATTRIBUTE_KEY, geli.getNumReads());
//call.putAttribute(CalledGenotype.POSTERIORS_ATTRIBUTE_KEY, post);
//call.putAttribute(GeliTextWriter.MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY, (int) geli.getMaxMappingQuality());
//call.putAttribute(GeliTextWriter.READ_COUNT_ATTRIBUTE_KEY, geli.getNumReads());
// add the call to the genotype list, and then use this list to create a VariantContext
genotypes.add(call);

View File

@ -25,7 +25,6 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -92,7 +91,7 @@ public class BatchedCallsMerger extends LocusWalker<VariantContext, Integer> imp
UG_engine.samples = samples;
// initialize the header
GenotypeWriterFactory.writeHeader(writer, GenomeAnalysisEngine.instance.getSAMFileHeader(), new VCFHeader(headerLines, samples));
GenotypeWriterFactory.writeHeader(writer, new VCFHeader(headerLines, samples));
}
public VariantContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
@ -159,7 +158,7 @@ public class BatchedCallsMerger extends LocusWalker<VariantContext, Integer> imp
return sum;
try {
writer.addCall(value, value.getReference().getBases()[0]);
writer.add(value, value.getReference().getBases()[0]);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(e.getMessage() + "; this is often caused by using the --assume_single_sample_reads argument with the wrong sample name");
}

View File

@ -31,7 +31,6 @@ import org.broad.tribble.vcf.VCFConstants;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.utils.genotype.CalledGenotype;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
@ -150,20 +149,17 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
myAlleles.add(altAllele);
}
CalledGenotype cg = new CalledGenotype(sample, myAlleles, AFbasedGenotype.second);
cg.setLikelihoods(GLs.get(sample).getLikelihoods());
cg.setReadBackedPileup(contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup());
cg.putAttribute(VCFConstants.DEPTH_KEY, contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size());
HashMap<String, Object> attributes = new HashMap<String, Object>();
attributes.put(VCFConstants.DEPTH_KEY, contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size());
cg.setPosteriors(GLs.get(sample).getPosteriors());
double[] likelihoods = GLs.get(sample).getLikelihoods();
String GL = String.format("%.2f,%.2f,%.2f",
likelihoods[refGenotype.ordinal()],
likelihoods[hetGenotype.ordinal()],
likelihoods[homGenotype.ordinal()]);
cg.putAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, GL);
attributes.put(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, GL);
calls.put(sample, cg);
calls.put(sample, new Genotype(sample, myAlleles, AFbasedGenotype.second, null, attributes, false));
}
return calls;

View File

@ -6,7 +6,6 @@ import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import java.io.PrintStream;
import java.util.Map;
@ -29,7 +28,6 @@ public abstract class GenotypeCalculationModel implements Cloneable {
protected UnifiedArgumentCollection UAC;
protected Set<String> samples;
protected Logger logger;
protected GenotypeWriterFactory.GENOTYPE_FORMAT OUTPUT_FORMAT;
protected PrintStream verboseWriter;
/**
@ -44,17 +42,14 @@ public abstract class GenotypeCalculationModel implements Cloneable {
* @param samples samples in input bam
* @param logger logger
* @param UAC unified arg collection
* @param outputFormat output format
* @param verboseWriter verbose writer
*/
protected void initialize(Set<String> samples,
Logger logger,
UnifiedArgumentCollection UAC,
GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat,
PrintStream verboseWriter) {
this.UAC = UAC.clone();
this.samples = new TreeSet<String>(samples);
OUTPUT_FORMAT = outputFormat;
this.logger = logger;
this.verboseWriter = verboseWriter;
}

View File

@ -26,7 +26,6 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import static org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCalculationModel.Model.*;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.apache.log4j.Logger;
import java.util.Set;
@ -34,7 +33,6 @@ import java.io.PrintStream;
public class GenotypeCalculationModelFactory {
//private GenotypeCalculationModelFactory() {} // cannot be instantiated
public static GenotypeCalculationModel.Model getGenotypeCalculationModel(final String name) {
return valueOf(name);
@ -46,7 +44,6 @@ public class GenotypeCalculationModelFactory {
* @param samples samples in bam
* @param logger logger
* @param UAC the unified argument collection
* @param outputFormat the output format
* @param verboseWriter verbose writer
*
* @return model
@ -54,7 +51,6 @@ public class GenotypeCalculationModelFactory {
public static GenotypeCalculationModel makeGenotypeCalculation(Set<String> samples,
Logger logger,
UnifiedArgumentCollection UAC,
GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat,
PrintStream verboseWriter) {
GenotypeCalculationModel gcm;
switch ( UAC.genotypeModel ) {
@ -63,16 +59,13 @@ public class GenotypeCalculationModelFactory {
boolean useExptGenotypeLikelihoods = UAC.genotypeModel == JOINT_ESTIMATE_EXPT_GL;
gcm = new DiploidGenotypeCalculationModel(useExptGenotypeLikelihoods);
break;
// case POOLED:
// gcm = new PooledCalculationModel();
// break;
case INDELS:
gcm = new SimpleIndelCalculationModel();
break;
default: throw new RuntimeException("Unexpected GenotypeCalculationModel " + UAC.genotypeModel);
}
gcm.initialize(samples, logger, UAC, outputFormat, verboseWriter);
gcm.initialize(samples, logger, UAC, verboseWriter);
return gcm;
}
}

View File

@ -26,7 +26,6 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broad.tribble.vcf.*;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
@ -127,7 +126,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
}
// initialize the header
GenotypeWriterFactory.writeHeader(writer, GenomeAnalysisEngine.instance.getSAMFileHeader(), new VCFHeader(getHeaderInfo(), UG_engine.samples)) ;
GenotypeWriterFactory.writeHeader(writer, new VCFHeader(getHeaderInfo(), UG_engine.samples)) ;
}
private Set<VCFHeaderLine> getHeaderInfo() {
@ -207,7 +206,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
try {
// we are actually making a call
sum.nCallsMade++;
writer.addCall(value.vc, value.refBase);
writer.add(value.vc, value.refBase);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException(e.getMessage() + "; this is often caused by using the --assume_single_sample_reads argument with the wrong sample name");
}

View File

@ -39,12 +39,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.glf.GLFGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriter;
import org.broadinstitute.sting.utils.pileup.*;
import org.broad.tribble.vcf.VCFConstants;
@ -94,12 +89,9 @@ public class UnifiedGenotyperEngine {
this.annotationEngine = engine;
// deal with input errors
if ( UAC.genotypeModel == GenotypeCalculationModel.Model.INDELS && !(genotypeWriter instanceof VCFGenotypeWriter) ) {
throw new IllegalArgumentException("Attempting to use an output format other than VCF with indels. Please set the output format to VCF.");
}
if ( toolkit.getArguments().numberOfThreads > 1 && UAC.ASSUME_SINGLE_SAMPLE != null ) {
// the ASSUME_SINGLE_SAMPLE argument can't be handled (at least for now) while we are multi-threaded because the IO system doesn't know how to get the sample name
throw new IllegalArgumentException("For technical reasons, the ASSUME_SINGLE_SAMPLE argument cannot be used with multiple threads");
throw new IllegalArgumentException("For technical reasons, the ASSUME_SINGLE_SAMPLE argument cannot be used with multiple threads; please run again without the -nt argument");
}
// get all of the unique sample names
@ -133,18 +125,7 @@ public class UnifiedGenotyperEngine {
// initialize the GenotypeCalculationModel for this thread if that hasn't been done yet
if ( gcm.get() == null ) {
GenotypeWriterFactory.GENOTYPE_FORMAT format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
if ( genotypeWriter != null ) {
if ( genotypeWriter instanceof VCFGenotypeWriter )
format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
else if ( genotypeWriter instanceof GLFGenotypeWriter)
format = GenotypeWriterFactory.GENOTYPE_FORMAT.GLF;
else if ( genotypeWriter instanceof GeliGenotypeWriter)
format = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
else
throw new StingException("Unsupported genotype format: " + genotypeWriter.getClass().getName());
}
gcm.set(GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC, format, verboseWriter));
gcm.set(GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC, verboseWriter));
}
byte ref = refContext.getBase();

View File

@ -1,48 +0,0 @@
package org.broadinstitute.sting.utils.genotype;
import org.broad.tribble.util.variantcontext.Allele;
import org.broad.tribble.util.variantcontext.MutableGenotype;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import java.util.*;
/**
* This class emcompasses all the basic information about a called genotype.
*
* @author ebanks
*/
public class CalledGenotype extends MutableGenotype {
// key names for standard genotype attributes
public static final String LIKELIHOODS_ATTRIBUTE_KEY = "Likelihoods";
public static final String POSTERIORS_ATTRIBUTE_KEY = "Posteriors";
public static final String READBACKEDPILEUP_ATTRIBUTE_KEY = "ReadBackedPileup";
public CalledGenotype(String sampleName, List<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, boolean genotypesArePhased) {
super(sampleName, alleles, negLog10PError, filters, attributes, genotypesArePhased);
}
public CalledGenotype(String sampleName, List<Allele> alleles, double negLog10PError) {
super(sampleName, alleles, negLog10PError);
}
public CalledGenotype(String sampleName, List<Allele> alleles) {
super(sampleName, alleles, NO_NEG_LOG_10PERROR, null, null, false);
}
// ---------------------------------------------------------------------------------------------------------
//
// routines to modify useful attribute fields
//
// ---------------------------------------------------------------------------------------------------------
public void setLikelihoods(double likelihoods[]) { putAttribute(LIKELIHOODS_ATTRIBUTE_KEY, likelihoods); }
public void setPosteriors(double posteriors[]) { putAttribute(POSTERIORS_ATTRIBUTE_KEY, posteriors); }
public void setReadBackedPileup(ReadBackedPileup pileup) { putAttribute(READBACKEDPILEUP_ATTRIBUTE_KEY, pileup); }
public double[] getLikelihoods() { return (double[])getAttribute(LIKELIHOODS_ATTRIBUTE_KEY); }
public double[] getPosteriors() { return (double[])getAttribute(POSTERIORS_ATTRIBUTE_KEY); }
public ReadBackedPileup getReadBackedPileup() { return (ReadBackedPileup)getAttribute(READBACKEDPILEUP_ATTRIBUTE_KEY); }
}

View File

@ -37,11 +37,11 @@ import org.broad.tribble.util.variantcontext.VariantContext;
*/
public interface GenotypeWriter {
/**
* Add a genotype, given a variant context
* Add a record, given a variant context, with the genotype fields restricted to what is defined in the header
* @param vc the variant context representing the call to add
* @param refBase This is required for VCF writers, as the VCF format explicitly requires (previous) ref base for an indel.
*/
public void addCall(VariantContext vc, byte refBase);
public void add(VariantContext vc, byte refBase);
/** finish writing, closing any open files. */
public void close();

View File

@ -1,10 +1,7 @@
package org.broadinstitute.sting.utils.genotype;
import net.sf.samtools.SAMFileHeader;
import org.broad.tribble.vcf.VCFHeader;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.geli.*;
import org.broadinstitute.sting.utils.genotype.glf.*;
import org.broadinstitute.sting.utils.vcf.GATKVCFWriter;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import java.io.File;
@ -26,53 +23,18 @@ public class GenotypeWriterFactory {
/**
* create a genotype writer
* @param format the format
* @param destination the destination file
* @return the genotype writer object
*/
public static GenotypeWriter create(GENOTYPE_FORMAT format, File destination) {
switch (format) {
case GLF:
return new GLFWriter(destination);
case GELI:
return new GeliTextWriter(destination);
case GELI_BINARY:
return new GeliAdapter(destination);
case VCF:
return new VCFGenotypeWriterAdapter(destination);
default:
throw new StingException("Genotype writer " + format.toString() + " is not implemented");
}
public static GenotypeWriter create(File destination) {
return new GATKVCFWriter(destination);
}
public static GenotypeWriter create(GENOTYPE_FORMAT format, PrintStream destination) {
switch (format) {
case GELI:
return new GeliTextWriter(destination);
case GLF:
return new GLFWriter(destination);
case VCF:
return new VCFGenotypeWriterAdapter(destination);
default:
throw new StingException("Genotype writer to " + format.toString() + " to standard output is not implemented");
}
public static GenotypeWriter create(PrintStream destination) {
return new GATKVCFWriter(destination);
}
public static void writeHeader(GenotypeWriter writer,
SAMFileHeader header,
VCFHeader vcfHeader) {
// VCF
if ( writer instanceof VCFGenotypeWriter ) {
((VCFGenotypeWriter)writer).writeHeader(vcfHeader);
}
// GELI
else if ( writer instanceof GeliGenotypeWriter ) {
((GeliGenotypeWriter)writer).writeHeader(header);
}
// GLF
else if ( writer instanceof GLFGenotypeWriter ) {
((GLFGenotypeWriter)writer).writeHeader(header.toString());
}
// nothing to do for GELI TEXT
public static void writeHeader(GenotypeWriter writer, VCFHeader vcfHeader) {
((VCFGenotypeWriter)writer).writeHeader(vcfHeader);
}
}

View File

@ -3,14 +3,7 @@ package org.broadinstitute.sting.utils.genotype.geli;
import edu.mit.broad.picard.genotype.geli.GeliFileWriter;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMSequenceRecord;
import org.broad.tribble.util.variantcontext.Genotype;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import org.broadinstitute.sting.utils.genotype.CalledGenotype;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import java.io.File;
@ -73,30 +66,6 @@ public class GeliAdapter implements GeliGenotypeWriter {
this.writer = GeliFileWriter.newInstanceForPresortedRecords(writeTo, fileHeader);
}
/**
* add a single point genotype call to the genotype likelihood file
*
* @param contig the contig you're calling in
* @param position the position on the contig
* @param referenceBase the reference base
* @param maxMappingQuality the max MQ
* @param readCount the read count
* @param likelihoods the likelihoods of each of the possible alleles
*/
private void addCall(SAMSequenceRecord contig,
int position,
char referenceBase,
double maxMappingQuality,
int readCount,
LikelihoodObject likelihoods) {
GenotypeLikelihoods lk = likelihoods.convertToGenotypeLikelihoods(writer.getFileHeader(), contig.getSequenceIndex(), position, (byte) referenceBase);
lk.setNumReads(readCount);
lk.setMaxMappingQuality(maxMappingQuality > Short.MAX_VALUE ? Short.MAX_VALUE : (short)Math.round(maxMappingQuality));
writer.addGenotypeLikelihoods(lk);
}
public void addGenotypeLikelihoods(GenotypeLikelihoods gl) {
if ( writer == null )
throw new IllegalStateException("The Geli Header must be written before records can be added");
@ -110,53 +79,8 @@ public class GeliAdapter implements GeliGenotypeWriter {
* @param vc the variant context representing the call to add
* @param refBase not used by this writer
*/
public void addCall(VariantContext vc, byte refBase) {
if ( writer == null )
throw new IllegalStateException("The Geli Header must be written before calls can be added");
char ref = vc.getReference().toString().charAt(0);
if ( vc.getNSamples() != 1 )
throw new IllegalArgumentException("The Geli format does not support multi-sample or no-calls");
Genotype genotype = vc.getGenotypes().values().iterator().next();
if ( genotype.isNoCall() )
throw new IllegalArgumentException("The Geli format does not support no-calls");
ReadBackedPileup pileup;
double[] posteriors;
if ( genotype instanceof CalledGenotype ) {
pileup = ((CalledGenotype)genotype).getReadBackedPileup();
posteriors = ((CalledGenotype)genotype).getPosteriors();
} else {
pileup = (ReadBackedPileup)genotype.getAttribute(CalledGenotype.READBACKEDPILEUP_ATTRIBUTE_KEY);
posteriors = (double[])genotype.getAttribute(CalledGenotype.POSTERIORS_ATTRIBUTE_KEY);
}
if ( posteriors == null )
throw new IllegalArgumentException("The Geli format requires posteriors");
int readCount = 0;
double maxMappingQual = 0;
if ( pileup != null ) {
readCount = pileup.size();
for (PileupElement p : pileup ) {
if ( maxMappingQual < p.getMappingQual() )
maxMappingQual = p.getMappingQual();
}
} else {
if (genotype.hasAttribute(GeliTextWriter.READ_COUNT_ATTRIBUTE_KEY))
readCount = genotype.getAttributeAsInt(GeliTextWriter.READ_COUNT_ATTRIBUTE_KEY);
if (genotype.hasAttribute(GeliTextWriter.MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY))
maxMappingQual = Double.valueOf(genotype.getAttributeAsInt(GeliTextWriter.MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY));
}
LikelihoodObject obj = new LikelihoodObject(posteriors, LikelihoodObject.LIKELIHOOD_TYPE.LOG);
addCall(GenomeLocParser.getContigInfo(vc.getChr()),
vc.getStart(),
ref,
maxMappingQual,
readCount,
obj);
public void add(VariantContext vc, byte refBase) {
throw new UnsupportedOperationException("We no longer support writing Geli");
}
/** finish writing, closing any open files. */

View File

@ -2,22 +2,13 @@ package org.broadinstitute.sting.utils.genotype.geli;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
import net.sf.samtools.SAMFileHeader;
import org.broad.tribble.util.variantcontext.Allele;
import org.broad.tribble.util.variantcontext.Genotype;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.CalledGenotype;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
/**
@ -71,83 +62,8 @@ public class GeliTextWriter implements GeliGenotypeWriter {
* @param vc the variant context representing the call to add
* @param refBase required by the inteface; not used by this writer.
*/
public void addCall(VariantContext vc, byte refBase) {
char ref = vc.getReference().toString().charAt(0);
if ( vc.getNSamples() != 1 )
throw new IllegalArgumentException("The Geli format does not support multi-sample or no-calls");
Genotype genotype = vc.getGenotypes().values().iterator().next();
if ( genotype.isNoCall() )
throw new IllegalArgumentException("The Geli format does not support no-calls");
ReadBackedPileup pileup;
double[] posteriors;
if ( genotype instanceof CalledGenotype ) {
pileup = ((CalledGenotype)genotype).getReadBackedPileup();
posteriors = ((CalledGenotype)genotype).getPosteriors();
} else {
pileup = (ReadBackedPileup)genotype.getAttribute(CalledGenotype.READBACKEDPILEUP_ATTRIBUTE_KEY);
posteriors = (double[])genotype.getAttribute(CalledGenotype.POSTERIORS_ATTRIBUTE_KEY);
}
if ( posteriors == null )
throw new IllegalArgumentException("The Geli format requires posteriors");
double[] lks;
lks = Arrays.copyOf(posteriors, posteriors.length);
Arrays.sort(lks);
double nextVrsBest = lks[9] - lks[8];
double nextVrsRef = 0;
if (ref != 'X')
nextVrsRef = lks[9] - posteriors[DiploidGenotype.createHomGenotype((byte)ref).ordinal()];
int readCount = 0;
double maxMappingQual = 0;
if ( pileup != null ) {
readCount = pileup.size();
for (PileupElement p : pileup ) {
if ( maxMappingQual < p.getMappingQual() )
maxMappingQual = p.getMappingQual();
}
}
// if we've stored the max mapping qual value in the genotype get it there
if (maxMappingQual == 0 && genotype.hasAttribute(MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY))
maxMappingQual = (double)genotype.getAttributeAsInt(MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY);
// if we've stored the read count value in the genotype get it there
if (readCount == 0 && genotype.hasAttribute(READ_COUNT_ATTRIBUTE_KEY))
readCount = genotype.getAttributeAsInt(READ_COUNT_ATTRIBUTE_KEY);
ArrayList<Character> alleles = new ArrayList<Character>();
for ( Allele a : genotype.getAlleles() )
alleles.add(a.toString().charAt(0));
Collections.sort(alleles);
StringBuffer sb = new StringBuffer();
for ( Character base : alleles )
sb.append(base);
mWriter.println(String.format("%s %16d %c %8d %.0f %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f",
vc.getChr(),
vc.getStart(),
ref,
readCount,
maxMappingQual,
sb.toString(),
nextVrsRef,
nextVrsBest,
posteriors[0],
posteriors[1],
posteriors[2],
posteriors[3],
posteriors[4],
posteriors[5],
posteriors[6],
posteriors[7],
posteriors[8],
posteriors[9]));
mWriter.flush(); // necessary so that writing to an output stream will work
public void add(VariantContext vc, byte refBase) {
throw new UnsupportedOperationException("We no longer support writing Geli");
}
public void addGenotypeLikelihoods(GenotypeLikelihoods gl) {

View File

@ -3,17 +3,9 @@ package org.broadinstitute.sting.utils.genotype.glf;
import net.sf.samtools.SAMSequenceRecord;
import net.sf.samtools.util.BinaryCodec;
import net.sf.samtools.util.BlockCompressedOutputStream;
import org.broad.tribble.util.variantcontext.Genotype;
import org.broad.tribble.util.variantcontext.MutableGenotype;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.vcf.VCFConstants;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.genotype.CalledGenotype;
import org.broadinstitute.sting.utils.genotype.IndelLikelihood;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import java.io.DataOutputStream;
import java.io.File;
@ -151,67 +143,8 @@ public class GLFWriter implements GLFGenotypeWriter {
* @param vc the variant context representing the call to add
* @param refBase not used by this writer
*/
public void addCall(VariantContext vc, byte refBase) {
if ( headerText == null )
throw new IllegalStateException("The GLF Header must be written before calls can be added");
char ref = vc.getReference().toString().charAt(0);
if ( vc.getNSamples() != 1 )
throw new IllegalArgumentException("The GLF format does not support multi-sample or no-calls");
Genotype genotype = vc.getGenotypes().values().iterator().next();
if ( genotype.isNoCall() )
throw new IllegalArgumentException("The GLF format does not support no-calls");
ReadBackedPileup pileup;
double[] likelihoods;
if ( genotype instanceof CalledGenotype) {
pileup = ((CalledGenotype)genotype).getReadBackedPileup();
likelihoods = ((CalledGenotype)genotype).getLikelihoods();
} else {
pileup = (ReadBackedPileup)genotype.getAttribute(CalledGenotype.READBACKEDPILEUP_ATTRIBUTE_KEY);
likelihoods = (double[])genotype.getAttribute(CalledGenotype.LIKELIHOODS_ATTRIBUTE_KEY);
}
if ( likelihoods == null )
throw new IllegalArgumentException("The GLF format requires likelihoods");
LikelihoodObject obj = new LikelihoodObject(likelihoods, LikelihoodObject.LIKELIHOOD_TYPE.LOG);
obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.NEGATIVE_LOG); // transform! ... to negitive log likelihoods
// calculate the RMS mapping qualities and the read depth
double rms = 0.0;
int readCount = 0;
if ( pileup != null) {
rms = calculateRMS(pileup);
readCount = pileup.size();
}
// if we can't get the rms from the read pile-up (preferred), check the tags, the VC might have it
if (genotype.hasAttribute(RMS_MAPPING_QUAL) && new Double(0.0).equals(rms))
rms = (Double)((MutableGenotype)genotype).getAttribute(RMS_MAPPING_QUAL);
// if we can't get the depth from the read pile-up (preferred), check the tags, the VC might have it
if (genotype.hasAttribute(VCFConstants.DEPTH_KEY) && 0 == readCount)
readCount = (Integer)((MutableGenotype)genotype).getAttribute(VCFConstants.DEPTH_KEY);
addCall(GenomeLocParser.getContigInfo(vc.getChr()), vc.getStart(), (float) rms, ref, readCount, obj);
}
/**
* calculate the rms , given the read pileup
*
* @param pileup the pileup
*
* @return the rms of the read mapping qualities
*/
private double calculateRMS(ReadBackedPileup pileup) {
int[] qualities = new int[pileup.size()];
int index = 0;
for (PileupElement p : pileup )
qualities[index++] = p.getMappingQual();
return MathUtils.rms(qualities);
public void add(VariantContext vc, byte refBase) {
throw new UnsupportedOperationException("We no longer support writing GLF");
}
/**

View File

@ -1,127 +0,0 @@
/*
* Copyright (c) 2010.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
import org.broadinstitute.sting.utils.StingException;
import org.broad.tribble.vcf.VCFHeaderLine;
import org.broad.tribble.vcf.VCFHeader;
import org.broad.tribble.vcf.VCFFormatHeaderLine;
import org.broad.tribble.vcf.VCFCodec;
import org.broad.tribble.source.BasicFeatureSource;
import org.broad.tribble.index.Index;
import java.io.*;
import java.util.HashSet;
import java.util.Iterator;
/**
* @author ebanks
* <p/>
* Class VCFGenotypeWriterAdapter
* <p/>
* Adapt the VCF writter to the genotype output system
*/
public class VCFGenotypeWriterAdapter extends VCFWriter implements VCFGenotypeWriter {
// allowed genotype format strings
private HashSet<String> allowedGenotypeFormatKeys = null;
public VCFGenotypeWriterAdapter(File writeTo) {
super(writeTo);
}
public VCFGenotypeWriterAdapter(OutputStream writeTo) {
super(writeTo);
}
public void addCall(VariantContext vc, byte ref) {
vc = VariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatKeys);
add(vc, ref);
}
public void writeHeader(VCFHeader header) {
for ( VCFHeaderLine field : header.getMetaData() ) {
if ( field instanceof VCFFormatHeaderLine ) {
if ( allowedGenotypeFormatKeys == null )
allowedGenotypeFormatKeys = new HashSet<String>();
allowedGenotypeFormatKeys.add(((VCFFormatHeaderLine)field).getName());
}
}
super.writeHeader(header);
}
public void append(File file) {
// if we don't need to restrict the FORMAT fields, then read blindly
if ( allowedGenotypeFormatKeys == null )
blindAppend(file);
else
smartAppend(file);
}
private void blindAppend(File file) {
try {
BufferedReader reader = new BufferedReader(new FileReader(file));
String line = reader.readLine();
while ( line != null ) {
if ( !VCFHeaderLine.isHeaderLine(line) ) {
mWriter.write(line);
mWriter.write("\n");
}
line = reader.readLine();
}
reader.close();
} catch (IOException e) {
throw new StingException("Error reading file " + file + " in VCFGenotypeWriter: ", e);
}
}
private void smartAppend(File file) {
try {
VCFCodec codec = new VCFCodec();
Index index = TribbleRMDTrackBuilder.loadIndex(file, codec, false);
BasicFeatureSource<VariantContext> vcfReader = new BasicFeatureSource(file.getAbsolutePath(),index,codec);
Iterator<VariantContext> iterator = vcfReader.iterator();
while ( iterator.hasNext() ) {
VariantContext vc = iterator.next();
vc = VariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatKeys);
add(vc, vc.getReferenceBaseForIndel());
}
vcfReader.close();
} catch (FileNotFoundException e) {
throw new StingException("Error reading file " + file + " in VCFGenotypeWriter: ", e);
} catch (IOException e) {
throw new StingException("Error reading file " + file + " in VCFGenotypeWriter: ", e);
}
}
}

View File

@ -10,8 +10,6 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.CalledGenotype;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import java.io.*;
import java.util.*;
@ -305,10 +303,6 @@ public class VCFWriter {
else {
val = getQualValue(Math.min(g.getPhredScaledQual(), VCFConstants.MAX_GENOTYPE_QUAL));
}
} else if ( key.equals(VCFConstants.DEPTH_KEY) && val == null ) {
ReadBackedPileup pileup = (ReadBackedPileup)g.getAttribute(CalledGenotype.READBACKEDPILEUP_ATTRIBUTE_KEY);
if ( pileup != null )
val = pileup.size();
} else if ( key.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
val = g.isFiltered() ? Utils.join(";", Utils.sorted(g.getFilters())) : (g.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED);
}

View File

@ -0,0 +1,74 @@
/*
* Copyright (c) 2010.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.vcf;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import org.broad.tribble.vcf.*;
import java.io.*;
/**
* @author ebanks
* <p/>
* Class GATKVCFWriter
* <p/>
* GATK-specific version of the VCF Writer
*/
public class GATKVCFWriter extends VCFWriter implements VCFGenotypeWriter {
public GATKVCFWriter(File writeTo) {
super(writeTo);
}
public GATKVCFWriter(OutputStream writeTo) {
super(writeTo);
}
public void writeHeader(VCFHeader header) {
// TODO -- put the command-line generating code for the header right here
super.writeHeader(header);
}
public void append(File file) {
try {
BufferedReader reader = new BufferedReader(new FileReader(file));
String line = reader.readLine();
while ( line != null ) {
if ( !VCFHeaderLine.isHeaderLine(line) ) {
mWriter.write(line);
mWriter.write("\n");
}
line = reader.readLine();
}
reader.close();
} catch (IOException e) {
throw new StingException("Error reading file " + file + " in GATKVCFWriter: ", e);
}
}
}

View File

@ -1,214 +0,0 @@
package org.broadinstitute.sting.gatk.refdata;
import edu.mit.broad.picard.genotype.geli.GeliFileReader;
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
import net.sf.samtools.util.CloseableIterator;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broad.tribble.gelitext.GeliTextCodec;
import org.broad.tribble.gelitext.GeliTextFeature;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
*
* @author aaron
*
* Class VariantContextAdaptorsUnitTest
*
* This test class exists to test input -> output of variant formats
* run through the VariantContext class.
*/
public class VariantContextAdaptorsUnitTest extends BaseTest {
public static IndexedFastaSequenceFile seq = null;
@BeforeClass
public static void beforeClass() {
seq = new IndexedFastaSequenceFile(new File(b36KGReference));
GenomeLocParser.setupRefContigOrdering(seq);
}
/**
* this test takes a known Geli file, reads in the records (storing them into an array),
* and creates VariantContext records. These VC records are then outputted through a genotype writer,
* and then read back in off of disk and compared to the original records. This way we are positive all
* the information that encodes a Geli makes it into the VC and then out to disk.
*/
@Test
public void testVariantContextGeliTextToGeliText() {
// our input and output files
File knownFile = new File(validationDataLocation + "/well_formed.geli"); // our known good geli
File tempFile = new File("temp.geli"); // our temporary geli output -> input file
tempFile.deleteOnExit(); // delete when we're done
// create our genotype writer for GLFs
GenotypeWriter gw = GenotypeWriterFactory.create(GenotypeWriterFactory.GENOTYPE_FORMAT.GELI,tempFile);
((GeliTextWriter)gw).writeHeader(null); // the write header command ignores the parameter
GeliTextFeature geliText;
GeliTextCodec codec = new GeliTextCodec();
// buffer the records we see
List<GeliTextFeature> records = new ArrayList<GeliTextFeature>();
// a little more complicated than the GLF example, we have to read the file in
AsciiLineReader reader = createReader(knownFile);
// get the first real line (non-header)
String line = cleanHeaderFromFile(reader);
// while we have records, make a Variant Context and output it to a GLF file
while (line != null && line != "") {
geliText = (GeliTextFeature)codec.decode(line);
records.add(geliText); // we know they're all single calls in the reference file
VariantContext vc = VariantContextAdaptors.toVariantContext("Geli",geliText, null);
if (vc != null) gw.addCall(vc,(byte)' ');
line = readLine(reader);
}
gw.close(); // close the file
reader.close();
// buffer the new records we see
List<GeliTextFeature> records2 = new ArrayList<GeliTextFeature>();
reader = createReader(tempFile);
// get the first real line (non-header)
line = cleanHeaderFromFile(reader);
// while we have records, make a Variant Context and output it to a GLF file
while (line != null && line != "") {
geliText = (GeliTextFeature)codec.decode(line);
records2.add(geliText); // we know they're all single calls in the reference file
line = readLine(reader);
}
gw.close(); // close the file
reader.close();
// compare sizes
Assert.assertEquals("The input Geli file doesn't contain the same number of records as we saw in the first file", records.size(),records2.size());
// now compare each record
for (int x = 0; x < records.size(); x++)
if(!records.get(x).equals(records2.get(x))) {
System.err.println("Record 1 :" + records.get(x).toString());
System.err.println("Record 2 :" + records2.get(x).toString());
Assert.fail("Geli Text Records were not preserved when cycling them to and from disc");
}
}
/**
* this test takes a known GeliBinary file, reads in the records (storing them into an array),
* and creates VariantContext records. These VC records are then outputted through a genotype writer,
* and then read back in off of disk and compared to the original records. This way we are positive all
* the information that encodes a Geli makes it into the VC and then out to disk.
*/
@Test
public void testVariantContextGeliBinaryToGeliBinary() {
// our input and output files
File knownFile = new File(validationDataLocation + "large_test_geli_binary.geli"); // our known good geli
File tempFile = new File("temp_binary.geli"); // our temporary geli output -> input file
tempFile.deleteOnExit(); // delete when we're done
// create our genotype writer for GLFs
GenotypeWriter gw = GenotypeWriterFactory.create(GenotypeWriterFactory.GENOTYPE_FORMAT.GELI_BINARY,tempFile);
// buffer the records we see
List<rodGELI> records = new ArrayList<rodGELI>();
// a little more complicated than the GLF example, we have to read the file in
GeliFileReader reader = new GeliFileReader(knownFile);
((GeliGenotypeWriter)gw).writeHeader(reader.getFileHeader()); // the write header command ignores the parameter
CloseableIterator<GenotypeLikelihoods> iterator = reader.iterator();
// while we have records, make a Variant Context and output it to a GeliBinary file
while (iterator.hasNext()) {
rodGELI gel = new rodGELI("myROD",iterator.next());
records.add(gel);
VariantContext vc = VariantContextAdaptors.toVariantContext("myROD",gel, null);
if (vc != null) gw.addCall(vc,(byte)' ');
}
iterator.close();
gw.close(); // close the file
reader.close();
// buffer the new records we see
List<rodGELI> records2 = new ArrayList<rodGELI>();
reader = new GeliFileReader(tempFile);
iterator = reader.iterator();
while (iterator.hasNext()) {
rodGELI gel = new rodGELI("myROD",iterator.next());
records2.add(gel);
}
iterator.close();
reader.close();
// compare sizes
Assert.assertEquals("The input GeliBinary file doesn't contain the same number of records as we saw in the first file", records.size(),records2.size());
// now compare each record
for (int x = 0; x < records.size(); x++) {
if(!records.get(x).getGeliRecord().equals(records2.get(x).getGeliRecord())) {
Assert.fail("GeliBinary Records were not preserved when cycling them to and from disc");
System.err.println("Record 1 :" + records.get(x).toString());
System.err.println("Record 2 :" + records2.get(x).toString());
}
}
}
/**
* clean header lines form a geli file
* @param reader the reader
* @return the first line that's not a header
*/
private String cleanHeaderFromFile(AsciiLineReader reader) {
String line = "#";
while (line != null && line.startsWith("#"))
line = readLine(reader);
return line;
}
/**
* create a reader, given a file and the reader object
* @param knownFile the file to read in
* @return AsciiLineReader the ascii line reader
*/
private AsciiLineReader createReader(File knownFile) {
AsciiLineReader reader = null;
try {
reader = new AsciiLineReader(new FileInputStream(knownFile));
} catch (FileNotFoundException e) {
Assert.fail("File not found: " + knownFile);
}
return reader;
}
/**
* read a line from the specified reader. A method to make the above code look cleaner
* @param reader the ascii line reader
* @return a line of text
*/
public String readLine(AsciiLineReader reader) {
try {
String line = reader.readLine();
return line;
} catch (IOException e) {
return null;
}
}
}

View File

@ -106,30 +106,6 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
executeTest("testConfidence2", spec2);
}
// --------------------------------------------------------------------------------------------------------------
//
// testing other output formats
//
// --------------------------------------------------------------------------------------------------------------
@Test
public void testOtherFormat() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "GLF", "b3d463eb0b7e59604296747e1eb7103c" );
e.put( "GELI_BINARY", "764a0fed1b3cf089230fd91f3be9c2df" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -vf " + entry.getKey(), 1,
Arrays.asList(entry.getValue()));
executeTest(String.format("testOtherFormat[%s]", entry.getKey()), spec);
}
}
// --------------------------------------------- //
// ALL REMAINING TESTS ARE OUTPUT IN GELI FORMAT //
// --------------------------------------------- //
// --------------------------------------------------------------------------------------------------------------
//
// testing heterozygosity
@ -138,12 +114,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testHeterozyosity() {
HashMap<Double, String> e = new HashMap<Double, String>();
e.put( 0.01, "ee390f91867e8729b96220115e56ddb3" );
e.put( 1.0 / 1850, "f96ad0ed71449bdb16b0c5561303a05a" );
e.put( 0.01, "79be222f9c5fa7091723b8172c236a22" );
e.put( 1.0 / 1850, "e217d213a7ebaf9c4bd41f79234055cc" );
for ( Map.Entry<Double, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -vf GELI -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 --heterozygosity " + entry.getKey(), 1,
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 --heterozygosity " + entry.getKey(), 1,
Arrays.asList(entry.getValue()));
executeTest(String.format("testHeterozyosity[%s]", entry.getKey()), spec);
}
@ -158,12 +134,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test
public void testOtherBaseCallModel() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "one_state", "bcc983210b576d9fd228a67c5b9f372a" );
e.put( "three_state", "2db3a5f3d46e13e2f44c34fbb7e7936f" );
e.put( "one_state", "8e7a18b71c08d655df15b3e5204ae924" );
e.put( "three_state", "eacf6c82bf81d20f3fc4a74051869375" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
baseCommand + " -vf GELI -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm " + entry.getKey(), 1,
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm " + entry.getKey(), 1,
Arrays.asList(entry.getValue()));
executeTest(String.format("testOtherBaseCallModel[%s]", entry.getKey()), spec);
}
@ -180,10 +156,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
baseCommand +
" -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" +
" -varout %s" +
" -L 1:10,000,000-10,100,000" +
" -vf GELI",
" -L 1:10,000,000-10,100,000",
1,
Arrays.asList("f67c690bf2e4eee2bb7c58b6646a2a98"));
Arrays.asList("99102fb35f0f33f582da8dbde64d80cd"));
executeTest(String.format("testMultiTechnologies"), spec);
}