From ca5b274f16bb17c5ab823677b77f8e9bff07da13 Mon Sep 17 00:00:00 2001 From: ebanks Date: Tue, 10 Aug 2010 04:18:29 +0000 Subject: [PATCH] Unit, integration, and performance tests are all busted, so this is a good time to make a big commit... Major cleanup of the genotype writer code from the calling end. UG no longer supports making calls in anything but VCF, and that allows us to use the VCFWriter more generically now. Putting the ball in Matt's court to finish collapsing everything. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3996 348d0f76-0448-11de-a6fe-93d51630548a --- .../io/storage/GLFGenotypeWriterStorage.java | 67 ------ .../GeliBinaryGenotypeWriterStorage.java | 65 ------ .../GeliTextGenotypeWriterStorage.java | 64 ------ .../io/storage/GenotypeWriterStorage.java | 12 +- .../sting/gatk/io/storage/StorageFactory.java | 42 +--- .../gatk/io/stubs/GLFGenotypeWriterStub.java | 63 ------ .../stubs/GeliBinaryGenotypeWriterStub.java | 51 ----- .../io/stubs/GeliTextGenotypeWriterStub.java | 61 ----- .../GenotypeWriterArgumentTypeDescriptor.java | 33 +-- .../gatk/io/stubs/GenotypeWriterStub.java | 4 +- .../gatk/refdata/VariantContextAdaptors.java | 14 +- .../walkers/genotyper/BatchedCallsMerger.java | 5 +- .../DiploidGenotypeCalculationModel.java | 12 +- .../genotyper/GenotypeCalculationModel.java | 5 - .../GenotypeCalculationModelFactory.java | 9 +- .../walkers/genotyper/UnifiedGenotyper.java | 5 +- .../genotyper/UnifiedGenotyperEngine.java | 23 +- .../sting/utils/genotype/CalledGenotype.java | 48 ---- .../sting/utils/genotype/GenotypeWriter.java | 4 +- .../utils/genotype/GenotypeWriterFactory.java | 52 +---- .../utils/genotype/geli/GeliAdapter.java | 80 +------ .../utils/genotype/geli/GeliTextWriter.java | 88 +------ .../sting/utils/genotype/glf/GLFWriter.java | 71 +----- .../vcf/VCFGenotypeWriterAdapter.java | 127 ----------- .../sting/utils/genotype/vcf/VCFWriter.java | 6 - .../sting/utils/vcf/GATKVCFWriter.java | 74 ++++++ .../VariantContextAdaptorsUnitTest.java | 214 ------------------ .../UnifiedGenotyperIntegrationTest.java | 41 +--- 28 files changed, 128 insertions(+), 1212 deletions(-) delete mode 100644 java/src/org/broadinstitute/sting/gatk/io/storage/GLFGenotypeWriterStorage.java delete mode 100644 java/src/org/broadinstitute/sting/gatk/io/storage/GeliBinaryGenotypeWriterStorage.java delete mode 100644 java/src/org/broadinstitute/sting/gatk/io/storage/GeliTextGenotypeWriterStorage.java delete mode 100644 java/src/org/broadinstitute/sting/gatk/io/stubs/GLFGenotypeWriterStub.java delete mode 100644 java/src/org/broadinstitute/sting/gatk/io/stubs/GeliBinaryGenotypeWriterStub.java delete mode 100644 java/src/org/broadinstitute/sting/gatk/io/stubs/GeliTextGenotypeWriterStub.java delete mode 100755 java/src/org/broadinstitute/sting/utils/genotype/CalledGenotype.java delete mode 100644 java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java create mode 100644 java/src/org/broadinstitute/sting/utils/vcf/GATKVCFWriter.java delete mode 100644 java/test/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptorsUnitTest.java diff --git a/java/src/org/broadinstitute/sting/gatk/io/storage/GLFGenotypeWriterStorage.java b/java/src/org/broadinstitute/sting/gatk/io/storage/GLFGenotypeWriterStorage.java deleted file mode 100644 index 7fbf27874..000000000 --- a/java/src/org/broadinstitute/sting/gatk/io/storage/GLFGenotypeWriterStorage.java +++ /dev/null @@ -1,67 +0,0 @@ -package org.broadinstitute.sting.gatk.io.storage; - -import org.broadinstitute.sting.utils.genotype.glf.GLFReader; -import org.broadinstitute.sting.utils.genotype.glf.GLFRecord; -import org.broadinstitute.sting.utils.genotype.glf.GLFGenotypeWriter; -import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub; - -import java.io.File; - -/** - * Provides temporary and permanent storage for genotypes in GLF format. - * - * @author mhanna - * @version 0.1 - */ -public class GLFGenotypeWriterStorage extends GenotypeWriterStorage implements GLFGenotypeWriter { - /** - * Creates new (permanent) storage for GLF genotype writers. - * @param stub Stub containing appropriate input parameters. - */ - public GLFGenotypeWriterStorage(GenotypeWriterStub stub) { - super(stub); - } - - /** - * Creates new (temporary) storage for GLF genotype writers. - * @param stub Stub containing appropriate input parameters. - * @param target Target file for output data. - */ - public GLFGenotypeWriterStorage(GenotypeWriterStub stub, File target) { - super(stub,target); - } - - /** - * Write the geli header to the target file. - * @param headerText The header to write. - */ - public void writeHeader(String headerText) { - ((GLFGenotypeWriter)writer).writeHeader(headerText); - } - - /** - * add a GLF record to the output file - * - * @param contigName the contig name - * @param contigLength the contig length - * @param rec the GLF record to write. - */ - public void addGLFRecord(String contigName, int contigLength, GLFRecord rec) { - ((GLFGenotypeWriter)writer).addGLFRecord(contigName,contigLength,rec); - } - - /** - * Merges the stream backing up this temporary storage into the target. - * @param target Target stream for the temporary storage. May not be null. - */ - public void mergeInto(GLFGenotypeWriter target) { - GLFReader reader = new GLFReader(file); - while ( reader.hasNext() ) { - GLFRecord rec = reader.next(); - target.addGLFRecord(rec.getContig(),(int)rec.getPosition(),rec); - } - reader.close(); - - file.delete(); - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/io/storage/GeliBinaryGenotypeWriterStorage.java b/java/src/org/broadinstitute/sting/gatk/io/storage/GeliBinaryGenotypeWriterStorage.java deleted file mode 100644 index 24ad70f4b..000000000 --- a/java/src/org/broadinstitute/sting/gatk/io/storage/GeliBinaryGenotypeWriterStorage.java +++ /dev/null @@ -1,65 +0,0 @@ -package org.broadinstitute.sting.gatk.io.storage; - -import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter; -import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub; - -import java.io.File; - -import edu.mit.broad.picard.genotype.geli.GeliFileReader; -import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods; -import net.sf.samtools.SAMFileHeader; - -/** - * Provides temporary and permanent storage for genotypes in Geli binary format. - * - * @author mhanna - * @version 0.1 - */ -public class GeliBinaryGenotypeWriterStorage extends GenotypeWriterStorage implements GeliGenotypeWriter { - /** - * Creates new (permanent) storage for geli binary genotype writers. - * @param stub Stub containing appropriate input parameters. - */ - public GeliBinaryGenotypeWriterStorage(GenotypeWriterStub stub) { - super(stub); - } - - /** - * Creates new (temporary) storage for geli binary genotype writers. - * @param stub Stub containing appropriate input parameters. - * @param target Target file for output data. - */ - public GeliBinaryGenotypeWriterStorage(GenotypeWriterStub stub, File target) { - super(stub,target); - } - - /** - * Write the geli header to the target file. - * @param header The header to write. - */ - public void writeHeader(SAMFileHeader header) { - ((GeliGenotypeWriter)writer).writeHeader(header); - } - - /** - * Writes the genotype likelihoods to the output. - * @param gl genotype likelihoods to write. - */ - public void addGenotypeLikelihoods(GenotypeLikelihoods gl) { - ((GeliGenotypeWriter)writer).addGenotypeLikelihoods(gl); - } - - - /** - * Merges the stream backing up this temporary storage into the target. - * @param target Target stream for the temporary storage. May not be null. - */ - public void mergeInto(GeliGenotypeWriter target) { - GeliFileReader reader = new GeliFileReader(file); - while ( reader.hasNext() ) - target.addGenotypeLikelihoods(reader.next()); - reader.close(); - - file.delete(); - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/io/storage/GeliTextGenotypeWriterStorage.java b/java/src/org/broadinstitute/sting/gatk/io/storage/GeliTextGenotypeWriterStorage.java deleted file mode 100644 index 4113c0bae..000000000 --- a/java/src/org/broadinstitute/sting/gatk/io/storage/GeliTextGenotypeWriterStorage.java +++ /dev/null @@ -1,64 +0,0 @@ -package org.broadinstitute.sting.gatk.io.storage; - -import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter; -import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub; - -import java.io.File; - -import edu.mit.broad.picard.genotype.geli.GeliFileReader; -import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods; -import net.sf.samtools.SAMFileHeader; - -/** - * Provides temporary and permanent storage for genotypes in Geli text format. - * - * @author mhanna - * @version 0.1 - */ -public class GeliTextGenotypeWriterStorage extends GenotypeWriterStorage implements GeliGenotypeWriter { - /** - * Creates new (permanent) storage for geli text genotype writers. - * @param stub Stub containing appropriate input parameters. - */ - public GeliTextGenotypeWriterStorage(GenotypeWriterStub stub) { - super(stub); - } - - /** - * Creates new (temporary) storage for geli text genotype writers. - * @param stub Stub containing appropriate input parameters. - * @param target Target file for output data. - */ - public GeliTextGenotypeWriterStorage(GenotypeWriterStub stub, File target) { - super(stub,target); - } - - /** - * Write the geli header to the target file. - * @param header The header to write. - */ - public void writeHeader(SAMFileHeader header) { - ((GeliGenotypeWriter)writer).writeHeader(header); - } - - /** - * Writes the genotype likelihoods to the output. - * @param gl genotype likelihoods to write. - */ - public void addGenotypeLikelihoods(GenotypeLikelihoods gl) { - ((GeliGenotypeWriter)writer).addGenotypeLikelihoods(gl); - } - - /** - * Merges the stream backing up this temporary storage into the target. - * @param target Target stream for the temporary storage. May not be null. - */ - public void mergeInto(GeliGenotypeWriter target) { - GeliFileReader reader = new GeliFileReader(file); - while ( reader.hasNext() ) - target.addGenotypeLikelihoods(reader.next()); - reader.close(); - - file.delete(); - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java b/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java index 46d2f2479..df559225f 100755 --- a/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java +++ b/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java @@ -55,9 +55,9 @@ public abstract class GenotypeWriterStorage implements this.file = stub.getFile(); this.stream = stub.getOutputStream(); if(file != null) - writer = GenotypeWriterFactory.create(stub.getFormat(), file); + writer = GenotypeWriterFactory.create(file); else if(stream != null) - writer = GenotypeWriterFactory.create(stub.getFormat(), stream); + writer = GenotypeWriterFactory.create(stream); else throw new StingException("Unable to create target to which to write; storage was provided with neither a file nor a stream."); } @@ -70,13 +70,13 @@ public abstract class GenotypeWriterStorage implements public GenotypeWriterStorage( GenotypeWriterStub stub, File file ) { this.file = file; this.stream = null; - writer = GenotypeWriterFactory.create(stub.getFormat(), file); + writer = GenotypeWriterFactory.create(file); Set samples = SampleUtils.getSAMFileSamples(stub.getSAMFileHeader()); - GenotypeWriterFactory.writeHeader(writer, stub.getSAMFileHeader(), new VCFHeader(null, samples)); + GenotypeWriterFactory.writeHeader(writer, new VCFHeader(null, samples)); } - public void addCall(VariantContext vc, byte ref) { - writer.addCall(vc, ref); + public void add(VariantContext vc, byte ref) { + writer.add(vc, ref); } public void close() { diff --git a/java/src/org/broadinstitute/sting/gatk/io/storage/StorageFactory.java b/java/src/org/broadinstitute/sting/gatk/io/storage/StorageFactory.java index 1d3a35e58..c4aecc29b 100644 --- a/java/src/org/broadinstitute/sting/gatk/io/storage/StorageFactory.java +++ b/java/src/org/broadinstitute/sting/gatk/io/storage/StorageFactory.java @@ -63,7 +63,7 @@ public class StorageFactory { * @return Storage object with a facade of type T. */ public static Storage createStorage( Stub stub, File file ) { - Storage storage = null; + Storage storage; if(stub instanceof OutputStreamStub) { if( file != null ) @@ -79,42 +79,10 @@ public class StorageFactory { } else if(stub instanceof GenotypeWriterStub) { GenotypeWriterStub genotypeWriterStub = (GenotypeWriterStub)stub; - if( file != null ) { - switch(genotypeWriterStub.getFormat()) { - case GELI: - storage = new GeliTextGenotypeWriterStorage(genotypeWriterStub,file); - break; - case GELI_BINARY: - storage = new GeliBinaryGenotypeWriterStorage(genotypeWriterStub,file); - break; - case GLF: - storage = new GLFGenotypeWriterStorage(genotypeWriterStub,file); - break; - case VCF: - storage = new VCFGenotypeWriterStorage(genotypeWriterStub,file); - break; - default: - throw new StingException("Unsupported genotype file format: " + genotypeWriterStub.getFormat()); - } - } - else { - switch(genotypeWriterStub.getFormat()) { - case GELI: - storage = new GeliTextGenotypeWriterStorage(genotypeWriterStub); - break; - case GELI_BINARY: - storage = new GeliBinaryGenotypeWriterStorage(genotypeWriterStub); - break; - case GLF: - storage = new GLFGenotypeWriterStorage(genotypeWriterStub); - break; - case VCF: - storage = new VCFGenotypeWriterStorage(genotypeWriterStub); - break; - default: - throw new StingException("Unsupported genotype file format: " + genotypeWriterStub.getFormat()); - } - } + if( file != null ) + storage = new VCFGenotypeWriterStorage(genotypeWriterStub,file); + else + storage = new VCFGenotypeWriterStorage(genotypeWriterStub); } else throw new StingException("Unsupported stub type: " + stub.getClass().getName()); diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/GLFGenotypeWriterStub.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/GLFGenotypeWriterStub.java deleted file mode 100644 index 1f540bf19..000000000 --- a/java/src/org/broadinstitute/sting/gatk/io/stubs/GLFGenotypeWriterStub.java +++ /dev/null @@ -1,63 +0,0 @@ -package org.broadinstitute.sting.gatk.io.stubs; - -import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; -import org.broadinstitute.sting.utils.genotype.glf.GLFGenotypeWriter; -import org.broadinstitute.sting.utils.genotype.glf.GLFRecord; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; - -import java.io.File; -import java.io.PrintStream; - -/** - * Stub providing a passthrough for GLF files. - * - * @author mhanna - * @version 0.1 - */ -public class GLFGenotypeWriterStub extends GenotypeWriterStub implements GLFGenotypeWriter { - /** - * Construct a new stub with the given engine and target file. - * @param engine The engine, for extracting command-line arguments, etc. - * @param genotypeFile Target file into which to write genotyping data. - */ - public GLFGenotypeWriterStub(GenomeAnalysisEngine engine, File genotypeFile) { - super(engine,genotypeFile); - } - - /** - * Construct a new stub with the given engine and target stream. - * @param engine The engine, for extracting command-line arguments, etc. - * @param genotypeStream Target stream into which to write genotyping data. - */ - public GLFGenotypeWriterStub(GenomeAnalysisEngine engine, PrintStream genotypeStream) { - super(engine,genotypeStream); - } - - /** - * Gets the format of this stub. We may want to discontinue use of this method and rely on instanceof comparisons. - * @return GLF always. - */ - public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() { - return GenotypeWriterFactory.GENOTYPE_FORMAT.GLF; - } - - /** - * Write the GLF header to the target file. - * @param headerText The header to write. - */ - public void writeHeader(String headerText) { - outputTracker.getStorage(this).writeHeader(headerText); - } - - /** - * add a GLF record to the output file - * - * @param contigName the contig name - * @param contigLength the contig length - * @param rec the GLF record to write. - */ - public void addGLFRecord(String contigName, int contigLength, GLFRecord rec) { - outputTracker.getStorage(this).addGLFRecord(contigName, contigLength, rec); - } - -} diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/GeliBinaryGenotypeWriterStub.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/GeliBinaryGenotypeWriterStub.java deleted file mode 100644 index 7c186c9f1..000000000 --- a/java/src/org/broadinstitute/sting/gatk/io/stubs/GeliBinaryGenotypeWriterStub.java +++ /dev/null @@ -1,51 +0,0 @@ -package org.broadinstitute.sting.gatk.io.stubs; - -import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter; -import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import net.sf.samtools.SAMFileHeader; - -import java.io.File; - -import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods; - -/** - * Stub providing a passthrough for geli binary files. - * - * @author mhanna - * @version 0.1 - */ -public class GeliBinaryGenotypeWriterStub extends GenotypeWriterStub implements GeliGenotypeWriter { - /** - * Construct a new stub with the given engine and target file. - * @param engine The engine, for extracting command-line arguments, etc. - * @param genotypeFile Target file into which to write genotyping data. - */ - public GeliBinaryGenotypeWriterStub(GenomeAnalysisEngine engine,File genotypeFile) { - super(engine,genotypeFile); - } - - /** - * Gets the format of this stub. We may want to discontinue use of this method and rely on instanceof comparisons. - * @return GELI_BINARY always. - */ - public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() { - return GenotypeWriterFactory.GENOTYPE_FORMAT.GELI_BINARY; - } - - /** - * Write the geli header to the target file. - * @param header The header to write. - */ - public void writeHeader(SAMFileHeader header) { - outputTracker.getStorage(this).writeHeader(header); - } - - /** - * Writes the genotype likelihoods to the output. - * @param gl genotype likelihoods to write. - */ - public void addGenotypeLikelihoods(GenotypeLikelihoods gl) { - outputTracker.getStorage(this).addGenotypeLikelihoods(gl); - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/GeliTextGenotypeWriterStub.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/GeliTextGenotypeWriterStub.java deleted file mode 100644 index 1af3c7651..000000000 --- a/java/src/org/broadinstitute/sting/gatk/io/stubs/GeliTextGenotypeWriterStub.java +++ /dev/null @@ -1,61 +0,0 @@ -package org.broadinstitute.sting.gatk.io.stubs; - -import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter; -import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; - -import java.io.File; -import java.io.PrintStream; - -import net.sf.samtools.SAMFileHeader; -import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods; - -/** - * Stub providing a passthrough for geli text files. - * - * @author mhanna - * @version 0.1 - */ -public class GeliTextGenotypeWriterStub extends GenotypeWriterStub implements GeliGenotypeWriter { - /** - * Construct a new stub with the given engine and target file. - * @param engine The engine, for extracting command-line arguments, etc. - * @param genotypeFile Target file into which to write genotyping data. - */ - public GeliTextGenotypeWriterStub(GenomeAnalysisEngine engine, File genotypeFile) { - super(engine,genotypeFile); - } - - /** - * Construct a new stub with the given engine and target stream. - * @param engine The engine, for extracting command-line arguments, etc. - * @param genotypeStream Target stream into which to write genotyping data. - */ - public GeliTextGenotypeWriterStub(GenomeAnalysisEngine engine, PrintStream genotypeStream) { - super(engine,genotypeStream); - } - - /** - * Gets the format of this stub. We may want to discontinue use of this method and rely on instanceof comparisons. - * @return GELI always. - */ - public GenotypeWriterFactory.GENOTYPE_FORMAT getFormat() { - return GenotypeWriterFactory.GENOTYPE_FORMAT.GELI; - } - - /** - * Write the geli header to the target file. - * @param header The header to write. - */ - public void writeHeader(SAMFileHeader header) { - outputTracker.getStorage(this).writeHeader(header); - } - - /** - * Writes the genotype likelihoods to the output. - * @param gl genotype likelihoods to write. - */ - public void addGenotypeLikelihoods(GenotypeLikelihoods gl) { - outputTracker.getStorage(this).addGenotypeLikelihoods(gl); - } -} diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterArgumentTypeDescriptor.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterArgumentTypeDescriptor.java index 2ac9c0314..2f75ab2fa 100644 --- a/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterArgumentTypeDescriptor.java +++ b/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterArgumentTypeDescriptor.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.io.stubs; import org.broadinstitute.sting.commandline.*; -import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.genotype.GenotypeWriter; import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; @@ -111,38 +110,8 @@ public class GenotypeWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor String writerFileName = getArgumentValue(createGenotypeFileArgumentDefinition(source),matches); File writerFile = writerFileName != null ? new File(writerFileName) : null; - // Get the format of the genotype object, if it exists. - String genotypeFormatText = getArgumentValue(createGenotypeFormatArgumentDefinition(source),matches); - GenotypeWriterFactory.GENOTYPE_FORMAT genotypeFormat = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF; - if(genotypeFormatText != null) { - try { - genotypeFormat = Enum.valueOf(GenotypeWriterFactory.GENOTYPE_FORMAT.class,genotypeFormatText); - } - catch(IllegalArgumentException ex) { - throw new StingException(String.format("Genotype format %s is invalid.",genotypeFormatText)); - } - } - // Create a stub for the given object. - GenotypeWriterStub stub; - switch(genotypeFormat) { - case GELI: - stub = (writerFile != null) ? new GeliTextGenotypeWriterStub(engine, writerFile) : new GeliTextGenotypeWriterStub(engine,System.out); - break; - case GELI_BINARY: - if(writerFile == null) - throw new StingException("Geli binary files cannot be output to the console."); - stub = new GeliBinaryGenotypeWriterStub(engine, writerFile); - break; - case GLF: - stub = (writerFile != null) ? new GLFGenotypeWriterStub(engine, writerFile) : new GLFGenotypeWriterStub(engine,System.out); - break; - case VCF: - stub = (writerFile != null) ? new VCFGenotypeWriterStub(engine, writerFile) : new VCFGenotypeWriterStub(engine,System.out); - break; - default: - throw new StingException("Unable to create stub for file format " + genotypeFormat); - } + GenotypeWriterStub stub = (writerFile != null) ? new VCFGenotypeWriterStub(engine, writerFile) : new VCFGenotypeWriterStub(engine,System.out); engine.addOutput(stub); diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterStub.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterStub.java index 4d2ace45c..495188648 100755 --- a/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterStub.java +++ b/java/src/org/broadinstitute/sting/gatk/io/stubs/GenotypeWriterStub.java @@ -129,8 +129,8 @@ public abstract class GenotypeWriterStub implements St /** * @{inheritDoc} */ - public void addCall(VariantContext vc, byte ref) { - outputTracker.getStorage(this).addCall(vc,ref); + public void add(VariantContext vc, byte ref) { + outputTracker.getStorage(this).add(vc,ref); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index 6fbaf0e77..462aa3d3e 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -14,8 +14,6 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.genotype.CalledGenotype; -import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter; import java.util.*; @@ -279,9 +277,9 @@ public class VariantContextAdaptors { MutableGenotype call = new MutableGenotype(name, genotypeAlleles); // set the likelihoods, depth, and RMS mapping quality values - call.putAttribute(CalledGenotype.POSTERIORS_ATTRIBUTE_KEY,geli.getLikelihoods()); - call.putAttribute(GeliTextWriter.MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY,geli.getMaximumMappingQual()); - call.putAttribute(GeliTextWriter.READ_COUNT_ATTRIBUTE_KEY,geli.getDepthOfCoverage()); + //call.putAttribute(CalledGenotype.POSTERIORS_ATTRIBUTE_KEY,geli.getLikelihoods()); + //call.putAttribute(GeliTextWriter.MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY,geli.getMaximumMappingQual()); + //call.putAttribute(GeliTextWriter.READ_COUNT_ATTRIBUTE_KEY,geli.getDepthOfCoverage()); // add the call to the genotype list, and then use this list to create a VariantContext genotypes.add(call); @@ -353,9 +351,9 @@ public class VariantContextAdaptors { post[x] = Double.valueOf(geli.getLikelihood(DiploidGenotype.fromBases((byte) gTypes[x].charAt(0), (byte) gTypes[x].charAt(1)))); // set the likelihoods, depth, and RMS mapping quality values - call.putAttribute(CalledGenotype.POSTERIORS_ATTRIBUTE_KEY, post); - call.putAttribute(GeliTextWriter.MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY, (int) geli.getMaxMappingQuality()); - call.putAttribute(GeliTextWriter.READ_COUNT_ATTRIBUTE_KEY, geli.getNumReads()); + //call.putAttribute(CalledGenotype.POSTERIORS_ATTRIBUTE_KEY, post); + //call.putAttribute(GeliTextWriter.MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY, (int) geli.getMaxMappingQuality()); + //call.putAttribute(GeliTextWriter.READ_COUNT_ATTRIBUTE_KEY, geli.getNumReads()); // add the call to the genotype list, and then use this list to create a VariantContext genotypes.add(call); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BatchedCallsMerger.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BatchedCallsMerger.java index c498d9d55..19eefcabc 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BatchedCallsMerger.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BatchedCallsMerger.java @@ -25,7 +25,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broad.tribble.util.variantcontext.VariantContext; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.*; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -92,7 +91,7 @@ public class BatchedCallsMerger extends LocusWalker imp UG_engine.samples = samples; // initialize the header - GenotypeWriterFactory.writeHeader(writer, GenomeAnalysisEngine.instance.getSAMFileHeader(), new VCFHeader(headerLines, samples)); + GenotypeWriterFactory.writeHeader(writer, new VCFHeader(headerLines, samples)); } public VariantContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { @@ -159,7 +158,7 @@ public class BatchedCallsMerger extends LocusWalker imp return sum; try { - writer.addCall(value, value.getReference().getBases()[0]); + writer.add(value, value.getReference().getBases()[0]); } catch (IllegalArgumentException e) { throw new IllegalArgumentException(e.getMessage() + "; this is often caused by using the --assume_single_sample_reads argument with the wrong sample name"); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java index a29350f28..f0e4b1683 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java @@ -31,7 +31,6 @@ import org.broad.tribble.vcf.VCFConstants; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.genotype.DiploidGenotype; -import org.broadinstitute.sting.utils.genotype.CalledGenotype; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; @@ -150,20 +149,17 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul myAlleles.add(altAllele); } - CalledGenotype cg = new CalledGenotype(sample, myAlleles, AFbasedGenotype.second); - cg.setLikelihoods(GLs.get(sample).getLikelihoods()); - cg.setReadBackedPileup(contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup()); - cg.putAttribute(VCFConstants.DEPTH_KEY, contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size()); + HashMap attributes = new HashMap(); + attributes.put(VCFConstants.DEPTH_KEY, contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size()); - cg.setPosteriors(GLs.get(sample).getPosteriors()); double[] likelihoods = GLs.get(sample).getLikelihoods(); String GL = String.format("%.2f,%.2f,%.2f", likelihoods[refGenotype.ordinal()], likelihoods[hetGenotype.ordinal()], likelihoods[homGenotype.ordinal()]); - cg.putAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, GL); + attributes.put(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, GL); - calls.put(sample, cg); + calls.put(sample, new Genotype(sample, myAlleles, AFbasedGenotype.second, null, attributes, false)); } return calls; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java index b10e105fa..c3208031a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java @@ -6,7 +6,6 @@ import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; import java.io.PrintStream; import java.util.Map; @@ -29,7 +28,6 @@ public abstract class GenotypeCalculationModel implements Cloneable { protected UnifiedArgumentCollection UAC; protected Set samples; protected Logger logger; - protected GenotypeWriterFactory.GENOTYPE_FORMAT OUTPUT_FORMAT; protected PrintStream verboseWriter; /** @@ -44,17 +42,14 @@ public abstract class GenotypeCalculationModel implements Cloneable { * @param samples samples in input bam * @param logger logger * @param UAC unified arg collection - * @param outputFormat output format * @param verboseWriter verbose writer */ protected void initialize(Set samples, Logger logger, UnifiedArgumentCollection UAC, - GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat, PrintStream verboseWriter) { this.UAC = UAC.clone(); this.samples = new TreeSet(samples); - OUTPUT_FORMAT = outputFormat; this.logger = logger; this.verboseWriter = verboseWriter; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModelFactory.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModelFactory.java index cbc6950e4..238b0bcb3 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModelFactory.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModelFactory.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import static org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCalculationModel.Model.*; -import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; import org.apache.log4j.Logger; import java.util.Set; @@ -34,7 +33,6 @@ import java.io.PrintStream; public class GenotypeCalculationModelFactory { - //private GenotypeCalculationModelFactory() {} // cannot be instantiated public static GenotypeCalculationModel.Model getGenotypeCalculationModel(final String name) { return valueOf(name); @@ -46,7 +44,6 @@ public class GenotypeCalculationModelFactory { * @param samples samples in bam * @param logger logger * @param UAC the unified argument collection - * @param outputFormat the output format * @param verboseWriter verbose writer * * @return model @@ -54,7 +51,6 @@ public class GenotypeCalculationModelFactory { public static GenotypeCalculationModel makeGenotypeCalculation(Set samples, Logger logger, UnifiedArgumentCollection UAC, - GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat, PrintStream verboseWriter) { GenotypeCalculationModel gcm; switch ( UAC.genotypeModel ) { @@ -63,16 +59,13 @@ public class GenotypeCalculationModelFactory { boolean useExptGenotypeLikelihoods = UAC.genotypeModel == JOINT_ESTIMATE_EXPT_GL; gcm = new DiploidGenotypeCalculationModel(useExptGenotypeLikelihoods); break; -// case POOLED: -// gcm = new PooledCalculationModel(); -// break; case INDELS: gcm = new SimpleIndelCalculationModel(); break; default: throw new RuntimeException("Unexpected GenotypeCalculationModel " + UAC.genotypeModel); } - gcm.initialize(samples, logger, UAC, outputFormat, verboseWriter); + gcm.initialize(samples, logger, UAC, verboseWriter); return gcm; } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index c14ce44ce..3876ef8cd 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broad.tribble.vcf.*; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; @@ -127,7 +126,7 @@ public class UnifiedGenotyper extends LocusWalker getHeaderInfo() { @@ -207,7 +206,7 @@ public class UnifiedGenotyper extends LocusWalker 1 && UAC.ASSUME_SINGLE_SAMPLE != null ) { // the ASSUME_SINGLE_SAMPLE argument can't be handled (at least for now) while we are multi-threaded because the IO system doesn't know how to get the sample name - throw new IllegalArgumentException("For technical reasons, the ASSUME_SINGLE_SAMPLE argument cannot be used with multiple threads"); + throw new IllegalArgumentException("For technical reasons, the ASSUME_SINGLE_SAMPLE argument cannot be used with multiple threads; please run again without the -nt argument"); } // get all of the unique sample names @@ -133,18 +125,7 @@ public class UnifiedGenotyperEngine { // initialize the GenotypeCalculationModel for this thread if that hasn't been done yet if ( gcm.get() == null ) { - GenotypeWriterFactory.GENOTYPE_FORMAT format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF; - if ( genotypeWriter != null ) { - if ( genotypeWriter instanceof VCFGenotypeWriter ) - format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF; - else if ( genotypeWriter instanceof GLFGenotypeWriter) - format = GenotypeWriterFactory.GENOTYPE_FORMAT.GLF; - else if ( genotypeWriter instanceof GeliGenotypeWriter) - format = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI; - else - throw new StingException("Unsupported genotype format: " + genotypeWriter.getClass().getName()); - } - gcm.set(GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC, format, verboseWriter)); + gcm.set(GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC, verboseWriter)); } byte ref = refContext.getBase(); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/CalledGenotype.java b/java/src/org/broadinstitute/sting/utils/genotype/CalledGenotype.java deleted file mode 100755 index 30143c92a..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/CalledGenotype.java +++ /dev/null @@ -1,48 +0,0 @@ -package org.broadinstitute.sting.utils.genotype; - -import org.broad.tribble.util.variantcontext.Allele; -import org.broad.tribble.util.variantcontext.MutableGenotype; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; - -import java.util.*; - -/** - * This class emcompasses all the basic information about a called genotype. - * - * @author ebanks - */ -public class CalledGenotype extends MutableGenotype { - - // key names for standard genotype attributes - public static final String LIKELIHOODS_ATTRIBUTE_KEY = "Likelihoods"; - public static final String POSTERIORS_ATTRIBUTE_KEY = "Posteriors"; - public static final String READBACKEDPILEUP_ATTRIBUTE_KEY = "ReadBackedPileup"; - - - public CalledGenotype(String sampleName, List alleles, double negLog10PError, Set filters, Map attributes, boolean genotypesArePhased) { - super(sampleName, alleles, negLog10PError, filters, attributes, genotypesArePhased); - } - - public CalledGenotype(String sampleName, List alleles, double negLog10PError) { - super(sampleName, alleles, negLog10PError); - } - - public CalledGenotype(String sampleName, List alleles) { - super(sampleName, alleles, NO_NEG_LOG_10PERROR, null, null, false); - } - - // --------------------------------------------------------------------------------------------------------- - // - // routines to modify useful attribute fields - // - // --------------------------------------------------------------------------------------------------------- - - public void setLikelihoods(double likelihoods[]) { putAttribute(LIKELIHOODS_ATTRIBUTE_KEY, likelihoods); } - public void setPosteriors(double posteriors[]) { putAttribute(POSTERIORS_ATTRIBUTE_KEY, posteriors); } - public void setReadBackedPileup(ReadBackedPileup pileup) { putAttribute(READBACKEDPILEUP_ATTRIBUTE_KEY, pileup); } - - public double[] getLikelihoods() { return (double[])getAttribute(LIKELIHOODS_ATTRIBUTE_KEY); } - public double[] getPosteriors() { return (double[])getAttribute(POSTERIORS_ATTRIBUTE_KEY); } - public ReadBackedPileup getReadBackedPileup() { return (ReadBackedPileup)getAttribute(READBACKEDPILEUP_ATTRIBUTE_KEY); } - -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java index 44a20d86b..901c3f321 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java @@ -37,11 +37,11 @@ import org.broad.tribble.util.variantcontext.VariantContext; */ public interface GenotypeWriter { /** - * Add a genotype, given a variant context + * Add a record, given a variant context, with the genotype fields restricted to what is defined in the header * @param vc the variant context representing the call to add * @param refBase This is required for VCF writers, as the VCF format explicitly requires (previous) ref base for an indel. */ - public void addCall(VariantContext vc, byte refBase); + public void add(VariantContext vc, byte refBase); /** finish writing, closing any open files. */ public void close(); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java index 78a5c269a..3031d0156 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java @@ -1,10 +1,7 @@ package org.broadinstitute.sting.utils.genotype; -import net.sf.samtools.SAMFileHeader; import org.broad.tribble.vcf.VCFHeader; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.geli.*; -import org.broadinstitute.sting.utils.genotype.glf.*; +import org.broadinstitute.sting.utils.vcf.GATKVCFWriter; import org.broadinstitute.sting.utils.genotype.vcf.*; import java.io.File; @@ -26,53 +23,18 @@ public class GenotypeWriterFactory { /** * create a genotype writer - * @param format the format * @param destination the destination file * @return the genotype writer object */ - public static GenotypeWriter create(GENOTYPE_FORMAT format, File destination) { - switch (format) { - case GLF: - return new GLFWriter(destination); - case GELI: - return new GeliTextWriter(destination); - case GELI_BINARY: - return new GeliAdapter(destination); - case VCF: - return new VCFGenotypeWriterAdapter(destination); - default: - throw new StingException("Genotype writer " + format.toString() + " is not implemented"); - } + public static GenotypeWriter create(File destination) { + return new GATKVCFWriter(destination); } - public static GenotypeWriter create(GENOTYPE_FORMAT format, PrintStream destination) { - switch (format) { - case GELI: - return new GeliTextWriter(destination); - case GLF: - return new GLFWriter(destination); - case VCF: - return new VCFGenotypeWriterAdapter(destination); - default: - throw new StingException("Genotype writer to " + format.toString() + " to standard output is not implemented"); - } + public static GenotypeWriter create(PrintStream destination) { + return new GATKVCFWriter(destination); } - public static void writeHeader(GenotypeWriter writer, - SAMFileHeader header, - VCFHeader vcfHeader) { - // VCF - if ( writer instanceof VCFGenotypeWriter ) { - ((VCFGenotypeWriter)writer).writeHeader(vcfHeader); - } - // GELI - else if ( writer instanceof GeliGenotypeWriter ) { - ((GeliGenotypeWriter)writer).writeHeader(header); - } - // GLF - else if ( writer instanceof GLFGenotypeWriter ) { - ((GLFGenotypeWriter)writer).writeHeader(header.toString()); - } - // nothing to do for GELI TEXT + public static void writeHeader(GenotypeWriter writer, VCFHeader vcfHeader) { + ((VCFGenotypeWriter)writer).writeHeader(vcfHeader); } } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java index c2bdf3e69..1a594adc0 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java @@ -3,14 +3,7 @@ package org.broadinstitute.sting.utils.genotype.geli; import edu.mit.broad.picard.genotype.geli.GeliFileWriter; import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods; import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMSequenceRecord; -import org.broad.tribble.util.variantcontext.Genotype; import org.broad.tribble.util.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.genotype.LikelihoodObject; -import org.broadinstitute.sting.utils.genotype.CalledGenotype; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.pileup.PileupElement; import java.io.File; @@ -73,30 +66,6 @@ public class GeliAdapter implements GeliGenotypeWriter { this.writer = GeliFileWriter.newInstanceForPresortedRecords(writeTo, fileHeader); } - - /** - * add a single point genotype call to the genotype likelihood file - * - * @param contig the contig you're calling in - * @param position the position on the contig - * @param referenceBase the reference base - * @param maxMappingQuality the max MQ - * @param readCount the read count - * @param likelihoods the likelihoods of each of the possible alleles - */ - private void addCall(SAMSequenceRecord contig, - int position, - char referenceBase, - double maxMappingQuality, - int readCount, - LikelihoodObject likelihoods) { - GenotypeLikelihoods lk = likelihoods.convertToGenotypeLikelihoods(writer.getFileHeader(), contig.getSequenceIndex(), position, (byte) referenceBase); - lk.setNumReads(readCount); - - lk.setMaxMappingQuality(maxMappingQuality > Short.MAX_VALUE ? Short.MAX_VALUE : (short)Math.round(maxMappingQuality)); - writer.addGenotypeLikelihoods(lk); - } - public void addGenotypeLikelihoods(GenotypeLikelihoods gl) { if ( writer == null ) throw new IllegalStateException("The Geli Header must be written before records can be added"); @@ -110,53 +79,8 @@ public class GeliAdapter implements GeliGenotypeWriter { * @param vc the variant context representing the call to add * @param refBase not used by this writer */ - public void addCall(VariantContext vc, byte refBase) { - if ( writer == null ) - throw new IllegalStateException("The Geli Header must be written before calls can be added"); - - char ref = vc.getReference().toString().charAt(0); - if ( vc.getNSamples() != 1 ) - throw new IllegalArgumentException("The Geli format does not support multi-sample or no-calls"); - - Genotype genotype = vc.getGenotypes().values().iterator().next(); - if ( genotype.isNoCall() ) - throw new IllegalArgumentException("The Geli format does not support no-calls"); - - ReadBackedPileup pileup; - double[] posteriors; - if ( genotype instanceof CalledGenotype ) { - pileup = ((CalledGenotype)genotype).getReadBackedPileup(); - posteriors = ((CalledGenotype)genotype).getPosteriors(); - } else { - pileup = (ReadBackedPileup)genotype.getAttribute(CalledGenotype.READBACKEDPILEUP_ATTRIBUTE_KEY); - posteriors = (double[])genotype.getAttribute(CalledGenotype.POSTERIORS_ATTRIBUTE_KEY); - } - - if ( posteriors == null ) - throw new IllegalArgumentException("The Geli format requires posteriors"); - - int readCount = 0; - double maxMappingQual = 0; - if ( pileup != null ) { - readCount = pileup.size(); - for (PileupElement p : pileup ) { - if ( maxMappingQual < p.getMappingQual() ) - maxMappingQual = p.getMappingQual(); - } - } else { - if (genotype.hasAttribute(GeliTextWriter.READ_COUNT_ATTRIBUTE_KEY)) - readCount = genotype.getAttributeAsInt(GeliTextWriter.READ_COUNT_ATTRIBUTE_KEY); - if (genotype.hasAttribute(GeliTextWriter.MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY)) - maxMappingQual = Double.valueOf(genotype.getAttributeAsInt(GeliTextWriter.MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY)); - } - - LikelihoodObject obj = new LikelihoodObject(posteriors, LikelihoodObject.LIKELIHOOD_TYPE.LOG); - addCall(GenomeLocParser.getContigInfo(vc.getChr()), - vc.getStart(), - ref, - maxMappingQual, - readCount, - obj); + public void add(VariantContext vc, byte refBase) { + throw new UnsupportedOperationException("We no longer support writing Geli"); } /** finish writing, closing any open files. */ diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java index b496bd071..9602dc9c6 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java @@ -2,22 +2,13 @@ package org.broadinstitute.sting.utils.genotype.geli; import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods; import net.sf.samtools.SAMFileHeader; -import org.broad.tribble.util.variantcontext.Allele; -import org.broad.tribble.util.variantcontext.Genotype; import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.CalledGenotype; -import org.broadinstitute.sting.utils.genotype.DiploidGenotype; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.io.File; import java.io.FileNotFoundException; import java.io.PrintStream; import java.io.PrintWriter; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; /** @@ -71,83 +62,8 @@ public class GeliTextWriter implements GeliGenotypeWriter { * @param vc the variant context representing the call to add * @param refBase required by the inteface; not used by this writer. */ - public void addCall(VariantContext vc, byte refBase) { - - char ref = vc.getReference().toString().charAt(0); - - if ( vc.getNSamples() != 1 ) - throw new IllegalArgumentException("The Geli format does not support multi-sample or no-calls"); - - Genotype genotype = vc.getGenotypes().values().iterator().next(); - if ( genotype.isNoCall() ) - throw new IllegalArgumentException("The Geli format does not support no-calls"); - - ReadBackedPileup pileup; - double[] posteriors; - if ( genotype instanceof CalledGenotype ) { - pileup = ((CalledGenotype)genotype).getReadBackedPileup(); - posteriors = ((CalledGenotype)genotype).getPosteriors(); - } else { - pileup = (ReadBackedPileup)genotype.getAttribute(CalledGenotype.READBACKEDPILEUP_ATTRIBUTE_KEY); - posteriors = (double[])genotype.getAttribute(CalledGenotype.POSTERIORS_ATTRIBUTE_KEY); - } - - if ( posteriors == null ) - throw new IllegalArgumentException("The Geli format requires posteriors"); - - double[] lks; - lks = Arrays.copyOf(posteriors, posteriors.length); - Arrays.sort(lks); - - double nextVrsBest = lks[9] - lks[8]; - double nextVrsRef = 0; - if (ref != 'X') - nextVrsRef = lks[9] - posteriors[DiploidGenotype.createHomGenotype((byte)ref).ordinal()]; - - int readCount = 0; - double maxMappingQual = 0; - if ( pileup != null ) { - readCount = pileup.size(); - for (PileupElement p : pileup ) { - if ( maxMappingQual < p.getMappingQual() ) - maxMappingQual = p.getMappingQual(); - } - } - // if we've stored the max mapping qual value in the genotype get it there - if (maxMappingQual == 0 && genotype.hasAttribute(MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY)) - maxMappingQual = (double)genotype.getAttributeAsInt(MAXIMUM_MAPPING_QUALITY_ATTRIBUTE_KEY); - // if we've stored the read count value in the genotype get it there - if (readCount == 0 && genotype.hasAttribute(READ_COUNT_ATTRIBUTE_KEY)) - readCount = genotype.getAttributeAsInt(READ_COUNT_ATTRIBUTE_KEY); - - ArrayList alleles = new ArrayList(); - for ( Allele a : genotype.getAlleles() ) - alleles.add(a.toString().charAt(0)); - Collections.sort(alleles); - StringBuffer sb = new StringBuffer(); - for ( Character base : alleles ) - sb.append(base); - - mWriter.println(String.format("%s %16d %c %8d %.0f %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f", - vc.getChr(), - vc.getStart(), - ref, - readCount, - maxMappingQual, - sb.toString(), - nextVrsRef, - nextVrsBest, - posteriors[0], - posteriors[1], - posteriors[2], - posteriors[3], - posteriors[4], - posteriors[5], - posteriors[6], - posteriors[7], - posteriors[8], - posteriors[9])); - mWriter.flush(); // necessary so that writing to an output stream will work + public void add(VariantContext vc, byte refBase) { + throw new UnsupportedOperationException("We no longer support writing Geli"); } public void addGenotypeLikelihoods(GenotypeLikelihoods gl) { diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java index 44d309921..1364708e5 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java @@ -3,17 +3,9 @@ package org.broadinstitute.sting.utils.genotype.glf; import net.sf.samtools.SAMSequenceRecord; import net.sf.samtools.util.BinaryCodec; import net.sf.samtools.util.BlockCompressedOutputStream; -import org.broad.tribble.util.variantcontext.Genotype; -import org.broad.tribble.util.variantcontext.MutableGenotype; import org.broad.tribble.util.variantcontext.VariantContext; -import org.broad.tribble.vcf.VCFConstants; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.genotype.CalledGenotype; import org.broadinstitute.sting.utils.genotype.IndelLikelihood; import org.broadinstitute.sting.utils.genotype.LikelihoodObject; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.io.DataOutputStream; import java.io.File; @@ -151,67 +143,8 @@ public class GLFWriter implements GLFGenotypeWriter { * @param vc the variant context representing the call to add * @param refBase not used by this writer */ - public void addCall(VariantContext vc, byte refBase) { - if ( headerText == null ) - throw new IllegalStateException("The GLF Header must be written before calls can be added"); - - - char ref = vc.getReference().toString().charAt(0); - if ( vc.getNSamples() != 1 ) - throw new IllegalArgumentException("The GLF format does not support multi-sample or no-calls"); - - Genotype genotype = vc.getGenotypes().values().iterator().next(); - if ( genotype.isNoCall() ) - throw new IllegalArgumentException("The GLF format does not support no-calls"); - - ReadBackedPileup pileup; - double[] likelihoods; - if ( genotype instanceof CalledGenotype) { - pileup = ((CalledGenotype)genotype).getReadBackedPileup(); - likelihoods = ((CalledGenotype)genotype).getLikelihoods(); - } else { - pileup = (ReadBackedPileup)genotype.getAttribute(CalledGenotype.READBACKEDPILEUP_ATTRIBUTE_KEY); - likelihoods = (double[])genotype.getAttribute(CalledGenotype.LIKELIHOODS_ATTRIBUTE_KEY); - } - - if ( likelihoods == null ) - throw new IllegalArgumentException("The GLF format requires likelihoods"); - LikelihoodObject obj = new LikelihoodObject(likelihoods, LikelihoodObject.LIKELIHOOD_TYPE.LOG); - obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.NEGATIVE_LOG); // transform! ... to negitive log likelihoods - - // calculate the RMS mapping qualities and the read depth - double rms = 0.0; - int readCount = 0; - - if ( pileup != null) { - rms = calculateRMS(pileup); - readCount = pileup.size(); - } - // if we can't get the rms from the read pile-up (preferred), check the tags, the VC might have it - if (genotype.hasAttribute(RMS_MAPPING_QUAL) && new Double(0.0).equals(rms)) - rms = (Double)((MutableGenotype)genotype).getAttribute(RMS_MAPPING_QUAL); - - // if we can't get the depth from the read pile-up (preferred), check the tags, the VC might have it - if (genotype.hasAttribute(VCFConstants.DEPTH_KEY) && 0 == readCount) - readCount = (Integer)((MutableGenotype)genotype).getAttribute(VCFConstants.DEPTH_KEY); - - addCall(GenomeLocParser.getContigInfo(vc.getChr()), vc.getStart(), (float) rms, ref, readCount, obj); - } - - - /** - * calculate the rms , given the read pileup - * - * @param pileup the pileup - * - * @return the rms of the read mapping qualities - */ - private double calculateRMS(ReadBackedPileup pileup) { - int[] qualities = new int[pileup.size()]; - int index = 0; - for (PileupElement p : pileup ) - qualities[index++] = p.getMappingQual(); - return MathUtils.rms(qualities); + public void add(VariantContext vc, byte refBase) { + throw new UnsupportedOperationException("We no longer support writing GLF"); } /** diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java deleted file mode 100644 index 8bab9b155..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2010. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.genotype.vcf; - -import org.broad.tribble.util.variantcontext.VariantContext; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; -import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder; -import org.broadinstitute.sting.utils.StingException; -import org.broad.tribble.vcf.VCFHeaderLine; -import org.broad.tribble.vcf.VCFHeader; -import org.broad.tribble.vcf.VCFFormatHeaderLine; -import org.broad.tribble.vcf.VCFCodec; -import org.broad.tribble.source.BasicFeatureSource; -import org.broad.tribble.index.Index; - -import java.io.*; -import java.util.HashSet; -import java.util.Iterator; - - -/** - * @author ebanks - *

- * Class VCFGenotypeWriterAdapter - *

- * Adapt the VCF writter to the genotype output system - */ -public class VCFGenotypeWriterAdapter extends VCFWriter implements VCFGenotypeWriter { - - // allowed genotype format strings - private HashSet allowedGenotypeFormatKeys = null; - - public VCFGenotypeWriterAdapter(File writeTo) { - super(writeTo); - } - - public VCFGenotypeWriterAdapter(OutputStream writeTo) { - super(writeTo); - } - - public void addCall(VariantContext vc, byte ref) { - vc = VariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatKeys); - add(vc, ref); - } - - public void writeHeader(VCFHeader header) { - for ( VCFHeaderLine field : header.getMetaData() ) { - if ( field instanceof VCFFormatHeaderLine ) { - if ( allowedGenotypeFormatKeys == null ) - allowedGenotypeFormatKeys = new HashSet(); - allowedGenotypeFormatKeys.add(((VCFFormatHeaderLine)field).getName()); - } - } - super.writeHeader(header); - } - - public void append(File file) { - // if we don't need to restrict the FORMAT fields, then read blindly - if ( allowedGenotypeFormatKeys == null ) - blindAppend(file); - else - smartAppend(file); - } - - private void blindAppend(File file) { - try { - BufferedReader reader = new BufferedReader(new FileReader(file)); - String line = reader.readLine(); - while ( line != null ) { - if ( !VCFHeaderLine.isHeaderLine(line) ) { - mWriter.write(line); - mWriter.write("\n"); - } - line = reader.readLine(); - } - - reader.close(); - } catch (IOException e) { - throw new StingException("Error reading file " + file + " in VCFGenotypeWriter: ", e); - } - } - - private void smartAppend(File file) { - try { - VCFCodec codec = new VCFCodec(); - Index index = TribbleRMDTrackBuilder.loadIndex(file, codec, false); - BasicFeatureSource vcfReader = new BasicFeatureSource(file.getAbsolutePath(),index,codec); - Iterator iterator = vcfReader.iterator(); - while ( iterator.hasNext() ) { - VariantContext vc = iterator.next(); - vc = VariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatKeys); - add(vc, vc.getReferenceBaseForIndel()); - } - vcfReader.close(); - } catch (FileNotFoundException e) { - throw new StingException("Error reading file " + file + " in VCFGenotypeWriter: ", e); - } catch (IOException e) { - throw new StingException("Error reading file " + file + " in VCFGenotypeWriter: ", e); - } - - - - } -} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java index a2cbcf2aa..de75cb94a 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java @@ -10,8 +10,6 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.CalledGenotype; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import java.io.*; import java.util.*; @@ -305,10 +303,6 @@ public class VCFWriter { else { val = getQualValue(Math.min(g.getPhredScaledQual(), VCFConstants.MAX_GENOTYPE_QUAL)); } - } else if ( key.equals(VCFConstants.DEPTH_KEY) && val == null ) { - ReadBackedPileup pileup = (ReadBackedPileup)g.getAttribute(CalledGenotype.READBACKEDPILEUP_ATTRIBUTE_KEY); - if ( pileup != null ) - val = pileup.size(); } else if ( key.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) { val = g.isFiltered() ? Utils.join(";", Utils.sorted(g.getFilters())) : (g.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED); } diff --git a/java/src/org/broadinstitute/sting/utils/vcf/GATKVCFWriter.java b/java/src/org/broadinstitute/sting/utils/vcf/GATKVCFWriter.java new file mode 100644 index 000000000..71c4072c7 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/vcf/GATKVCFWriter.java @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2010. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.vcf; + +import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.genotype.vcf.*; +import org.broad.tribble.vcf.*; + +import java.io.*; + + +/** + * @author ebanks + *

+ * Class GATKVCFWriter + *

+ * GATK-specific version of the VCF Writer + */ +public class GATKVCFWriter extends VCFWriter implements VCFGenotypeWriter { + + public GATKVCFWriter(File writeTo) { + super(writeTo); + } + + public GATKVCFWriter(OutputStream writeTo) { + super(writeTo); + } + + public void writeHeader(VCFHeader header) { + // TODO -- put the command-line generating code for the header right here + super.writeHeader(header); + } + + public void append(File file) { + try { + BufferedReader reader = new BufferedReader(new FileReader(file)); + String line = reader.readLine(); + while ( line != null ) { + if ( !VCFHeaderLine.isHeaderLine(line) ) { + mWriter.write(line); + mWriter.write("\n"); + } + line = reader.readLine(); + } + + reader.close(); + } catch (IOException e) { + throw new StingException("Error reading file " + file + " in GATKVCFWriter: ", e); + } + } +} diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptorsUnitTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptorsUnitTest.java deleted file mode 100644 index 565ce5333..000000000 --- a/java/test/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptorsUnitTest.java +++ /dev/null @@ -1,214 +0,0 @@ -package org.broadinstitute.sting.gatk.refdata; - -import edu.mit.broad.picard.genotype.geli.GeliFileReader; -import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods; -import net.sf.samtools.util.CloseableIterator; -import net.sf.picard.reference.IndexedFastaSequenceFile; -import org.broad.tribble.gelitext.GeliTextCodec; -import org.broad.tribble.gelitext.GeliTextFeature; -import org.broad.tribble.readers.AsciiLineReader; -import org.broad.tribble.util.variantcontext.VariantContext; -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.genotype.GenotypeWriter; -import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; -import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter; -import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Test; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - - -/** - * - * @author aaron - * - * Class VariantContextAdaptorsUnitTest - * - * This test class exists to test input -> output of variant formats - * run through the VariantContext class. - */ -public class VariantContextAdaptorsUnitTest extends BaseTest { - public static IndexedFastaSequenceFile seq = null; - - @BeforeClass - public static void beforeClass() { - seq = new IndexedFastaSequenceFile(new File(b36KGReference)); - GenomeLocParser.setupRefContigOrdering(seq); - } - - /** - * this test takes a known Geli file, reads in the records (storing them into an array), - * and creates VariantContext records. These VC records are then outputted through a genotype writer, - * and then read back in off of disk and compared to the original records. This way we are positive all - * the information that encodes a Geli makes it into the VC and then out to disk. - */ - @Test - public void testVariantContextGeliTextToGeliText() { - - // our input and output files - File knownFile = new File(validationDataLocation + "/well_formed.geli"); // our known good geli - File tempFile = new File("temp.geli"); // our temporary geli output -> input file - tempFile.deleteOnExit(); // delete when we're done - - // create our genotype writer for GLFs - GenotypeWriter gw = GenotypeWriterFactory.create(GenotypeWriterFactory.GENOTYPE_FORMAT.GELI,tempFile); - ((GeliTextWriter)gw).writeHeader(null); // the write header command ignores the parameter - - GeliTextFeature geliText; - GeliTextCodec codec = new GeliTextCodec(); - // buffer the records we see - List records = new ArrayList(); - - // a little more complicated than the GLF example, we have to read the file in - AsciiLineReader reader = createReader(knownFile); - - // get the first real line (non-header) - String line = cleanHeaderFromFile(reader); - - // while we have records, make a Variant Context and output it to a GLF file - while (line != null && line != "") { - geliText = (GeliTextFeature)codec.decode(line); - records.add(geliText); // we know they're all single calls in the reference file - VariantContext vc = VariantContextAdaptors.toVariantContext("Geli",geliText, null); - if (vc != null) gw.addCall(vc,(byte)' '); - line = readLine(reader); - } - gw.close(); // close the file - reader.close(); - - // buffer the new records we see - List records2 = new ArrayList(); - - reader = createReader(tempFile); - // get the first real line (non-header) - line = cleanHeaderFromFile(reader); - - // while we have records, make a Variant Context and output it to a GLF file - while (line != null && line != "") { - geliText = (GeliTextFeature)codec.decode(line); - records2.add(geliText); // we know they're all single calls in the reference file - line = readLine(reader); - } - gw.close(); // close the file - reader.close(); - // compare sizes - Assert.assertEquals("The input Geli file doesn't contain the same number of records as we saw in the first file", records.size(),records2.size()); - // now compare each record - for (int x = 0; x < records.size(); x++) - if(!records.get(x).equals(records2.get(x))) { - System.err.println("Record 1 :" + records.get(x).toString()); - System.err.println("Record 2 :" + records2.get(x).toString()); - Assert.fail("Geli Text Records were not preserved when cycling them to and from disc"); - } - } - - /** - * this test takes a known GeliBinary file, reads in the records (storing them into an array), - * and creates VariantContext records. These VC records are then outputted through a genotype writer, - * and then read back in off of disk and compared to the original records. This way we are positive all - * the information that encodes a Geli makes it into the VC and then out to disk. - */ - @Test - public void testVariantContextGeliBinaryToGeliBinary() { - - // our input and output files - File knownFile = new File(validationDataLocation + "large_test_geli_binary.geli"); // our known good geli - File tempFile = new File("temp_binary.geli"); // our temporary geli output -> input file - tempFile.deleteOnExit(); // delete when we're done - - // create our genotype writer for GLFs - GenotypeWriter gw = GenotypeWriterFactory.create(GenotypeWriterFactory.GENOTYPE_FORMAT.GELI_BINARY,tempFile); - - // buffer the records we see - List records = new ArrayList(); - - // a little more complicated than the GLF example, we have to read the file in - GeliFileReader reader = new GeliFileReader(knownFile); - - ((GeliGenotypeWriter)gw).writeHeader(reader.getFileHeader()); // the write header command ignores the parameter - - CloseableIterator iterator = reader.iterator(); - // while we have records, make a Variant Context and output it to a GeliBinary file - while (iterator.hasNext()) { - rodGELI gel = new rodGELI("myROD",iterator.next()); - records.add(gel); - VariantContext vc = VariantContextAdaptors.toVariantContext("myROD",gel, null); - if (vc != null) gw.addCall(vc,(byte)' '); - } - iterator.close(); - gw.close(); // close the file - reader.close(); - - // buffer the new records we see - List records2 = new ArrayList(); - - reader = new GeliFileReader(tempFile); - iterator = reader.iterator(); - while (iterator.hasNext()) { - rodGELI gel = new rodGELI("myROD",iterator.next()); - records2.add(gel); - } - iterator.close(); - reader.close(); - // compare sizes - Assert.assertEquals("The input GeliBinary file doesn't contain the same number of records as we saw in the first file", records.size(),records2.size()); - // now compare each record - for (int x = 0; x < records.size(); x++) { - if(!records.get(x).getGeliRecord().equals(records2.get(x).getGeliRecord())) { - Assert.fail("GeliBinary Records were not preserved when cycling them to and from disc"); - System.err.println("Record 1 :" + records.get(x).toString()); - System.err.println("Record 2 :" + records2.get(x).toString()); - - } - } - } - - /** - * clean header lines form a geli file - * @param reader the reader - * @return the first line that's not a header - */ - private String cleanHeaderFromFile(AsciiLineReader reader) { - String line = "#"; - while (line != null && line.startsWith("#")) - line = readLine(reader); - return line; - } - - /** - * create a reader, given a file and the reader object - * @param knownFile the file to read in - * @return AsciiLineReader the ascii line reader - */ - private AsciiLineReader createReader(File knownFile) { - AsciiLineReader reader = null; - try { - reader = new AsciiLineReader(new FileInputStream(knownFile)); - } catch (FileNotFoundException e) { - Assert.fail("File not found: " + knownFile); - } - return reader; - } - - /** - * read a line from the specified reader. A method to make the above code look cleaner - * @param reader the ascii line reader - * @return a line of text - */ - public String readLine(AsciiLineReader reader) { - try { - String line = reader.readLine(); - return line; - } catch (IOException e) { - return null; - } - } -} diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 8a423c2e0..7bda228bf 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -106,30 +106,6 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { executeTest("testConfidence2", spec2); } - // -------------------------------------------------------------------------------------------------------------- - // - // testing other output formats - // - // -------------------------------------------------------------------------------------------------------------- - - @Test - public void testOtherFormat() { - HashMap e = new HashMap(); - e.put( "GLF", "b3d463eb0b7e59604296747e1eb7103c" ); - e.put( "GELI_BINARY", "764a0fed1b3cf089230fd91f3be9c2df" ); - - for ( Map.Entry entry : e.entrySet() ) { - WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -vf " + entry.getKey(), 1, - Arrays.asList(entry.getValue())); - executeTest(String.format("testOtherFormat[%s]", entry.getKey()), spec); - } - } - - // --------------------------------------------- // - // ALL REMAINING TESTS ARE OUTPUT IN GELI FORMAT // - // --------------------------------------------- // - // -------------------------------------------------------------------------------------------------------------- // // testing heterozygosity @@ -138,12 +114,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testHeterozyosity() { HashMap e = new HashMap(); - e.put( 0.01, "ee390f91867e8729b96220115e56ddb3" ); - e.put( 1.0 / 1850, "f96ad0ed71449bdb16b0c5561303a05a" ); + e.put( 0.01, "79be222f9c5fa7091723b8172c236a22" ); + e.put( 1.0 / 1850, "e217d213a7ebaf9c4bd41f79234055cc" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - baseCommand + " -vf GELI -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 --heterozygosity " + entry.getKey(), 1, + baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 --heterozygosity " + entry.getKey(), 1, Arrays.asList(entry.getValue())); executeTest(String.format("testHeterozyosity[%s]", entry.getKey()), spec); } @@ -158,12 +134,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testOtherBaseCallModel() { HashMap e = new HashMap(); - e.put( "one_state", "bcc983210b576d9fd228a67c5b9f372a" ); - e.put( "three_state", "2db3a5f3d46e13e2f44c34fbb7e7936f" ); + e.put( "one_state", "8e7a18b71c08d655df15b3e5204ae924" ); + e.put( "three_state", "eacf6c82bf81d20f3fc4a74051869375" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - baseCommand + " -vf GELI -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm " + entry.getKey(), 1, + baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm " + entry.getKey(), 1, Arrays.asList(entry.getValue())); executeTest(String.format("testOtherBaseCallModel[%s]", entry.getKey()), spec); } @@ -180,10 +156,9 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" + " -varout %s" + - " -L 1:10,000,000-10,100,000" + - " -vf GELI", + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("f67c690bf2e4eee2bb7c58b6646a2a98")); + Arrays.asList("99102fb35f0f33f582da8dbde64d80cd")); executeTest(String.format("testMultiTechnologies"), spec); }