Bugfixes and testdata cleanup

-- Cut down the size of a few large files in public/testdata that were only used in part
-- Refactor vcf Filename => shadow BCF filename to BCF2Utils.  Fix bug in WalkerTest due to the way this was handled previously
This commit is contained in:
Mark DePristo 2012-05-24 13:26:05 -04:00
parent 57a1ac0888
commit 7280cdf937
5 changed files with 31 additions and 10 deletions

View File

@ -28,6 +28,7 @@ import net.sf.samtools.util.BlockCompressedOutputStream;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broad.tribble.AbstractFeatureReader; import org.broad.tribble.AbstractFeatureReader;
import org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub; import org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -105,7 +106,7 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
// if the stub says to test BCF, create a secondary writer to BCF and an 2 way out writer to send to both // if the stub says to test BCF, create a secondary writer to BCF and an 2 way out writer to send to both
// TODO -- remove me when argument generateShadowBCF is removed // TODO -- remove me when argument generateShadowBCF is removed
if ( stub.alsoWriteBCFForTest() && ! VariantContextWriterFactory.isBCFOutput(file, options)) { if ( stub.alsoWriteBCFForTest() && ! VariantContextWriterFactory.isBCFOutput(file, options)) {
final File bcfFile = new File(file.getAbsolutePath().replace(".vcf", ".bcf")); final File bcfFile = BCF2Utils.shadowBCF(file);
VariantContextWriter bcfWriter = VariantContextWriterFactory.create(bcfFile, stub.getMasterSequenceDictionary(), options); VariantContextWriter bcfWriter = VariantContextWriterFactory.create(bcfFile, stub.getMasterSequenceDictionary(), options);
writer = new TestWriter(writer, bcfWriter); writer = new TestWriter(writer, bcfWriter);
} }

View File

@ -30,6 +30,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFIDHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFIDHeaderLine;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.util.ArrayList; import java.util.ArrayList;
@ -189,4 +190,21 @@ public class BCF2Utils {
public static final boolean isCollapsedString(final String s) { public static final boolean isCollapsedString(final String s) {
return s.charAt(0) == ','; return s.charAt(0) == ',';
} }
/**
* Returns a good name for a shadow BCF file for vcfFile.
*
* foo.vcf => foo.bcf
* foo.xxx => foo.xxx.bcf
*
* @param vcfFile
* @return
*/
public static final File shadowBCF(final File vcfFile) {
final String path = vcfFile.getAbsolutePath();
if ( path.contains(".vcf") )
return new File(path.replace(".vcf", ".bcf"));
else
return new File( path + ".bcf" );
}
} }

View File

@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -60,7 +61,7 @@ public class WalkerTest extends BaseTest {
} }
public void validateOutputBCFIfPossible(final String name, final File resultFile) { public void validateOutputBCFIfPossible(final String name, final File resultFile) {
final File bcfFile = new File(resultFile.getAbsolutePath().replace(".vcf", ".bcf")); final File bcfFile = BCF2Utils.shadowBCF(resultFile);
if ( bcfFile.exists() ) { if ( bcfFile.exists() ) {
logger.warn("Checking shadow BCF output file " + bcfFile + " against VCF file " + resultFile); logger.warn("Checking shadow BCF output file " + bcfFile + " against VCF file " + resultFile);
try { try {

View File

@ -41,16 +41,17 @@ public class FeatureToGATKFeatureIteratorUnitTest extends BaseTest {
@Test @Test
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public void testCloseFilePointers() throws IOException { public void testCloseFilePointers() throws IOException {
final String chr = "20";
IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.hg19Reference)); IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.hg19Reference));
GenomeLocParser parser = new GenomeLocParser(seq); GenomeLocParser parser = new GenomeLocParser(seq);
File file = new File(validationDataLocation + "NA12878.hg19.example1.vcf"); File file = new File(validationDataLocation + "NA12878.hg19.example1.vcf");
VCFCodec codec = new VCFCodec(); VCFCodec codec = new VCFCodec();
TestFeatureReader reader = new TestFeatureReader(file.getAbsolutePath(), codec); TestFeatureReader reader = new TestFeatureReader(file.getAbsolutePath(), codec);
CheckableCloseableTribbleIterator<Feature> tribbleIterator = reader.query("20", 1, 100000); CheckableCloseableTribbleIterator<Feature> tribbleIterator = reader.query(chr, 1, 100000);
FeatureToGATKFeatureIterator gatkIterator = new FeatureToGATKFeatureIterator(parser, tribbleIterator, "test"); FeatureToGATKFeatureIterator gatkIterator = new FeatureToGATKFeatureIterator(parser, tribbleIterator, "test");
Assert.assertTrue(gatkIterator.hasNext(), "GATK feature iterator does not have a next value."); Assert.assertTrue(gatkIterator.hasNext(), "GATK feature iterator does not have a next value.");
GenomeLoc gatkLocation = gatkIterator.next().getLocation(); GenomeLoc gatkLocation = gatkIterator.next().getLocation();
Assert.assertEquals(gatkLocation.getContig(), "20", "Instead of chr 20 rod iterator was at location " + gatkLocation); Assert.assertEquals(gatkLocation.getContig(), chr, "Instead of chr 20 rod iterator was at location " + gatkLocation);
Assert.assertFalse(tribbleIterator.isClosed(), "Tribble iterator is closed but should be still open."); Assert.assertFalse(tribbleIterator.isClosed(), "Tribble iterator is closed but should be still open.");
gatkIterator.close(); gatkIterator.close();
Assert.assertTrue(tribbleIterator.isClosed(), "Tribble iterator is open but should be now closed."); Assert.assertTrue(tribbleIterator.isClosed(), "Tribble iterator is open but should be now closed.");

View File

@ -32,22 +32,22 @@ package org.broadinstitute.sting.utils.variantcontext.writer;
import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceDictionary;
import org.broad.tribble.FeatureCodec; import org.broad.tribble.FeatureCodec;
import org.broad.tribble.FeatureCodecHeader;
import org.broad.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.codecs.bcf2.*; import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec; import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextTestProvider; import org.broadinstitute.sting.utils.variantcontext.VariantContextTestProvider;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.BeforeSuite; import org.testng.annotations.BeforeSuite;
import org.testng.annotations.DataProvider; import org.testng.annotations.DataProvider;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import java.io.*; import java.io.File;
import java.util.*; import java.io.IOException;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.List;
public class VariantContextWritersUnitTest extends BaseTest { public class VariantContextWritersUnitTest extends BaseTest {