High performance version of standard vcf writer. New general static Tribble class for common constants, including general .idx constant and functions to get standard index name for a given file.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4471 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
6368a46bab
commit
38a67fed63
|
|
@ -25,6 +25,8 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.io.storage;
|
||||
|
||||
import org.broadinstitute.sting.gatk.AbstractGenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.Stub;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamStub;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterStub;
|
||||
|
|
|
|||
|
|
@ -1,10 +1,9 @@
|
|||
package org.broadinstitute.sting.gatk.io.storage;
|
||||
|
||||
import org.broad.tribble.vcf.StandardVCFWriter;
|
||||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broad.tribble.vcf.VCFHeaderLine;
|
||||
import org.broad.tribble.readers.LineReader;
|
||||
import org.broad.tribble.source.BasicFeatureSource;
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub;
|
||||
|
||||
|
|
@ -12,6 +11,7 @@ import java.io.*;
|
|||
|
||||
import net.sf.samtools.util.BlockCompressedOutputStream;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
|
||||
/**
|
||||
* Provides temporary and permanent storage for genotypes in VCF format.
|
||||
|
|
@ -21,7 +21,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
|||
*/
|
||||
public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
|
||||
protected final File file;
|
||||
protected final OutputStream stream;
|
||||
protected OutputStream stream;
|
||||
protected final VCFWriter writer;
|
||||
|
||||
/**
|
||||
|
|
@ -30,29 +30,40 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
|
|||
* @param stub Stub to use when constructing the output file.
|
||||
*/
|
||||
public VCFWriterStorage( VCFWriterStub stub ) {
|
||||
|
||||
if ( stub.getFile() != null ) {
|
||||
file = stub.getFile();
|
||||
try {
|
||||
if ( stub.isCompressed() )
|
||||
stream = new BlockCompressedOutputStream(file);
|
||||
else
|
||||
stream = new PrintStream(file);
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new UserException.CouldNotCreateOutputFile(file, "Unable to open target output stream", ex);
|
||||
}
|
||||
this.file = stub.getFile();
|
||||
writer = VCFWriterToFile(stub, stub.getFile());
|
||||
}
|
||||
else if ( stub.getOutputStream() != null ) {
|
||||
this.file = null;
|
||||
this.stream = stub.getOutputStream();
|
||||
writer = new StandardVCFWriter(stream);
|
||||
}
|
||||
else
|
||||
throw new ReviewedStingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");
|
||||
|
||||
writer = new StandardVCFWriter(stream);
|
||||
}
|
||||
|
||||
/**
|
||||
* common initialization routine for multiple constructors
|
||||
* @param stub
|
||||
* @param file
|
||||
* @return A VCF writer for use with this class
|
||||
*/
|
||||
private StandardVCFWriter VCFWriterToFile(VCFWriterStub stub, File file) {
|
||||
try {
|
||||
if ( stub.isCompressed() )
|
||||
stream = new BlockCompressedOutputStream(file);
|
||||
else
|
||||
stream = new PrintStream(file);
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new UserException.CouldNotCreateOutputFile(file, "Unable to open target output stream", ex);
|
||||
}
|
||||
|
||||
return new StandardVCFWriter(file, this.stream);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructs an object which will redirect into a different file.
|
||||
* @param stub Stub to use when synthesizing file / header info.
|
||||
|
|
@ -60,13 +71,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
|
|||
*/
|
||||
public VCFWriterStorage(VCFWriterStub stub, File file) {
|
||||
this.file = file;
|
||||
try {
|
||||
this.stream = new PrintStream(file);
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new UserException.CouldNotCreateOutputFile(file, "Unable to open target output stream",ex);
|
||||
}
|
||||
writer = new StandardVCFWriter(this.stream);
|
||||
this.writer = VCFWriterToFile(stub, file);
|
||||
writer.writeHeader(stub.getVCFHeader());
|
||||
}
|
||||
|
||||
|
|
@ -94,20 +99,33 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
|
|||
* Merges the stream backing up this temporary storage into the target.
|
||||
* @param target Target stream for the temporary storage. May not be null.
|
||||
*/
|
||||
// public void mergeInto(VCFWriterStorage target) {
|
||||
// PrintStream formattingTarget = new PrintStream(target.stream);
|
||||
// try {
|
||||
// BufferedReader reader = new BufferedReader(new FileReader(file));
|
||||
// String line = reader.readLine();
|
||||
// while ( line != null ) {
|
||||
// if (!VCFHeaderLine.isHeaderLine(line))
|
||||
// formattingTarget.printf("%s%n",line);
|
||||
// line = reader.readLine();
|
||||
// }
|
||||
//
|
||||
// reader.close();
|
||||
// } catch (IOException e) {
|
||||
// throw new UserException.CouldNotReadInputFile(file, "Error reading file in VCFWriterStorage: ", e);
|
||||
// }
|
||||
// }
|
||||
public void mergeInto(VCFWriterStorage target) {
|
||||
PrintStream formattingTarget = new PrintStream(target.stream);
|
||||
try {
|
||||
BufferedReader reader = new BufferedReader(new FileReader(file));
|
||||
String line = reader.readLine();
|
||||
while ( line != null ) {
|
||||
if (!VCFHeaderLine.isHeaderLine(line))
|
||||
formattingTarget.printf("%s%n",line);
|
||||
line = reader.readLine();
|
||||
BasicFeatureSource<VariantContext> source = BasicFeatureSource.getFeatureSource(file.getAbsolutePath(), new VCFCodec());
|
||||
|
||||
for ( VariantContext vc : source.iterator() ) {
|
||||
target.writer.add(vc, vc.getReferenceBaseForIndel());
|
||||
}
|
||||
|
||||
reader.close();
|
||||
source.close();
|
||||
} catch (IOException e) {
|
||||
throw new UserException.CouldNotReadInputFile(file, "Error reading file in VCFWriterStorage: ", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -69,9 +69,6 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
// the input strings we use to create RODs from
|
||||
private final List<RMDTriplet> inputs = new ArrayList<RMDTriplet>();
|
||||
|
||||
// the linear index extension
|
||||
public static final String indexExtension = ".idx";
|
||||
|
||||
private Map<String, Class> classes = null;
|
||||
|
||||
// private sequence dictionary we use to set our tracks with
|
||||
|
|
@ -201,7 +198,7 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
|
||||
// if we don't have a dictionary in the Tribble file, and we've set a dictionary for this builder, set it in the file if they match
|
||||
if (dictFromIndex.size() == 0 && dict != null) {
|
||||
File indexFile = indexFileForFile(inputFile);
|
||||
File indexFile = Tribble.indexFile(inputFile);
|
||||
setIndexSequenceDictionary(index,dict,indexFile,true);
|
||||
dictFromIndex = getSequenceDictionaryFromProperties(index);
|
||||
}
|
||||
|
|
@ -218,10 +215,6 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
return reader;
|
||||
}
|
||||
|
||||
public static File indexFileForFile(File inputFile) {
|
||||
return new File(inputFile.getAbsoluteFile() + indexExtension);
|
||||
}
|
||||
|
||||
/**
|
||||
* create an index for the input file
|
||||
* @param inputFile the input file
|
||||
|
|
@ -231,7 +224,7 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
*/
|
||||
public synchronized static Index loadIndex(File inputFile, FeatureCodec codec) throws IOException {
|
||||
// create the index file name, locking on the index file name
|
||||
File indexFile = indexFileForFile(inputFile);
|
||||
File indexFile = Tribble.indexFile(inputFile);
|
||||
FSLockWithShared lock = new FSLockWithShared(indexFile);
|
||||
|
||||
// acquire a lock on the file
|
||||
|
|
|
|||
|
|
@ -26,12 +26,16 @@
|
|||
package org.broadinstitute.sting;
|
||||
|
||||
import junit.framework.Assert;
|
||||
import org.broad.tribble.Tribble;
|
||||
import org.broad.tribble.index.IndexFactory;
|
||||
import org.broad.tribble.vcf.VCFCodec;
|
||||
import org.broadinstitute.sting.gatk.CommandLineExecutable;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParserTestUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.junit.Test;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
|
||||
|
|
@ -139,10 +143,25 @@ public class WalkerTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
public void maybeValidateSupplementaryFile(final String name, final File resultFile) {
|
||||
File indexFile = Tribble.indexFile(resultFile);
|
||||
//System.out.println("Putative index file is " + indexFile);
|
||||
if ( indexFile.exists() ) {
|
||||
if ( resultFile.getAbsolutePath().contains(".vcf") ) {
|
||||
// todo -- currently we only understand VCF files! Blow up since we can't test them
|
||||
throw new StingException("Found an index created for file " + resultFile + " but we can only validate VCF files. Extend this code!");
|
||||
}
|
||||
|
||||
System.out.println("Verifying on-the-fly index " + indexFile + " for test " + name + " using file " + resultFile);
|
||||
Assert.assertTrue(IndexFactory.onDiskIndexEqualToNewlyCreatedIndex(resultFile, indexFile, new VCFCodec()));
|
||||
}
|
||||
}
|
||||
|
||||
public List<String> assertMatchingMD5s(final String name, List<File> resultFiles, List<String> expectedMD5s) {
|
||||
List<String> md5s = new ArrayList<String>();
|
||||
for (int i = 0; i < resultFiles.size(); i++) {
|
||||
String md5 = assertMatchingMD5(name, resultFiles.get(i), expectedMD5s.get(i));
|
||||
maybeValidateSupplementaryFile(name, resultFiles.get(i));
|
||||
md5s.add(i, md5);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.refdata.tracks.builders;
|
|||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.broad.tribble.Tribble;
|
||||
import org.broad.tribble.index.Index;
|
||||
import org.broad.tribble.vcf.VCFCodec;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
|
|
@ -77,7 +78,7 @@ public class RMDTrackBuilderUnitTest extends BaseTest {
|
|||
}
|
||||
// make sure we didn't write the file (check that it's timestamp is within bounds)
|
||||
//System.err.println(new File(vcfFile + RMDTrackBuilder.indexExtension).lastModified());
|
||||
Assert.assertTrue(Math.abs(1279591752000l - new File(vcfFile + RMDTrackBuilder.indexExtension).lastModified()) < 100);
|
||||
Assert.assertTrue(Math.abs(1279591752000l - Tribble.indexFile(vcfFile).lastModified()) < 100);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -86,7 +87,7 @@ public class RMDTrackBuilderUnitTest extends BaseTest {
|
|||
@Test
|
||||
public void testDirIsLockedIndexFromDisk() {
|
||||
File vcfFile = new File(validationDataLocation + "/ROD_validation/read_only/good_index.vcf");
|
||||
File vcfFileIndex = new File(validationDataLocation + "/ROD_validation/read_only/good_index.vcf.idx");
|
||||
File vcfFileIndex = Tribble.indexFile(vcfFile);
|
||||
Index ind = null;
|
||||
try {
|
||||
ind = builder.attemptIndexFromDisk(vcfFile,new VCFCodec(),vcfFileIndex,new FSLockWithShared(vcfFile));
|
||||
|
|
@ -102,7 +103,7 @@ public class RMDTrackBuilderUnitTest extends BaseTest {
|
|||
@Test
|
||||
public void testBuilderIndexDirectoryUnwritable() {
|
||||
File vcfFile = new File(validationDataLocation + "/ROD_validation/read_only/no_index.vcf");
|
||||
File vcfFileIndex = new File(validationDataLocation + "/ROD_validation/read_only/no_index.vcf.idx");
|
||||
File vcfFileIndex = Tribble.indexFile(vcfFile);
|
||||
|
||||
Index ind = null;
|
||||
try {
|
||||
|
|
@ -121,7 +122,7 @@ public class RMDTrackBuilderUnitTest extends BaseTest {
|
|||
@Test
|
||||
public void testGenerateIndexForUnindexedFile() {
|
||||
File vcfFile = new File(validationDataLocation + "/ROD_validation/always_reindex.vcf");
|
||||
File vcfFileIndex = new File(validationDataLocation + "/ROD_validation/always_reindex.vcf.idx");
|
||||
File vcfFileIndex = Tribble.indexFile(vcfFile);
|
||||
|
||||
// if we can't write to the directory, don't fault the tester, just pass
|
||||
if (!vcfFileIndex.getParentFile().canWrite()) {
|
||||
|
|
@ -147,7 +148,7 @@ public class RMDTrackBuilderUnitTest extends BaseTest {
|
|||
@Test
|
||||
public void testBuilderIndexSequenceDictionary() {
|
||||
File vcfFile = createCorrectDateIndexFile(new File(validationDataLocation + "/ROD_validation/newerTribbleTrack.vcf"));
|
||||
Long indexTimeStamp = new File(vcfFile.getAbsolutePath() + ".idx").lastModified();
|
||||
Long indexTimeStamp = Tribble.indexFile(vcfFile).lastModified();
|
||||
try {
|
||||
Index idx = builder.loadIndex(vcfFile, new VCFCodec());
|
||||
RMDTrackBuilder.setIndexSequenceDictionary(idx,seq.getSequenceDictionary(),vcfFile,false);
|
||||
|
|
@ -157,11 +158,9 @@ public class RMDTrackBuilderUnitTest extends BaseTest {
|
|||
e.printStackTrace();
|
||||
Assert.fail("IO exception unexpected" + e.getMessage());
|
||||
}
|
||||
//System.err.println("index : " + new File(vcfFile + ".idx").lastModified());
|
||||
//System.err.println("old : " + indexTimeStamp);
|
||||
|
||||
// make sure that we removed and updated the index
|
||||
Assert.assertTrue("Fail: index file was modified", new File(vcfFile + ".idx").lastModified() == indexTimeStamp);
|
||||
Assert.assertTrue("Fail: index file was modified", Tribble.indexFile(vcfFile).lastModified() == indexTimeStamp);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -184,11 +183,11 @@ public class RMDTrackBuilderUnitTest extends BaseTest {
|
|||
Thread.sleep(2000);
|
||||
|
||||
// create a fake index, before we copy so it's out of date
|
||||
File tmpIndex = new File(tmpFile.getAbsolutePath() + ".idx");
|
||||
File tmpIndex = Tribble.indexFile(tmpFile);
|
||||
tmpIndex.deleteOnExit();
|
||||
|
||||
// copy the vcf (tribble) file to the tmp file location
|
||||
copyFile(new File(tribbleFile + ".idx"), tmpIndex);
|
||||
copyFile(Tribble.indexFile(tribbleFile), tmpIndex);
|
||||
|
||||
return tmpFile;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
import org.broad.tribble.Tribble;
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
|
|
@ -76,7 +77,7 @@ public class VCFWriterUnitTest extends BaseTest {
|
|||
counter++;
|
||||
}
|
||||
Assert.assertEquals(2,counter);
|
||||
new File(fakeVCFFile + RMDTrackBuilder.indexExtension).delete();
|
||||
Tribble.indexFile(fakeVCFFile).delete();
|
||||
fakeVCFFile.delete();
|
||||
}
|
||||
catch (IOException e ) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue