Bugfixes and testdata cleanup

-- Cut down the size of a few large files in public/testdata that were only used in part
-- Refactor vcf Filename => shadow BCF filename to BCF2Utils.  Fix bug in WalkerTest due to the way this was handled previously
This commit is contained in:
Mark DePristo 2012-05-24 13:26:05 -04:00
parent 57a1ac0888
commit 7280cdf937
5 changed files with 31 additions and 10 deletions

View File

@ -28,6 +28,7 @@ import net.sf.samtools.util.BlockCompressedOutputStream;
import org.apache.log4j.Logger;
import org.broad.tribble.AbstractFeatureReader;
import org.broadinstitute.sting.gatk.io.stubs.VariantContextWriterStub;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -105,7 +106,7 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
// if the stub says to test BCF, create a secondary writer to BCF and an 2 way out writer to send to both
// TODO -- remove me when argument generateShadowBCF is removed
if ( stub.alsoWriteBCFForTest() && ! VariantContextWriterFactory.isBCFOutput(file, options)) {
final File bcfFile = new File(file.getAbsolutePath().replace(".vcf", ".bcf"));
final File bcfFile = BCF2Utils.shadowBCF(file);
VariantContextWriter bcfWriter = VariantContextWriterFactory.create(bcfFile, stub.getMasterSequenceDictionary(), options);
writer = new TestWriter(writer, bcfWriter);
}

View File

@ -30,6 +30,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFIDHeaderLine;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
@ -189,4 +190,21 @@ public class BCF2Utils {
public static final boolean isCollapsedString(final String s) {
return s.charAt(0) == ',';
}
/**
* Returns a good name for a shadow BCF file for vcfFile.
*
* foo.vcf => foo.bcf
* foo.xxx => foo.xxx.bcf
*
* @param vcfFile
* @return
*/
public static final File shadowBCF(final File vcfFile) {
final String path = vcfFile.getAbsolutePath();
if ( path.contains(".vcf") )
return new File(path.replace(".vcf", ".bcf"));
else
return new File( path + ".bcf" );
}
}

View File

@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -60,7 +61,7 @@ public class WalkerTest extends BaseTest {
}
public void validateOutputBCFIfPossible(final String name, final File resultFile) {
final File bcfFile = new File(resultFile.getAbsolutePath().replace(".vcf", ".bcf"));
final File bcfFile = BCF2Utils.shadowBCF(resultFile);
if ( bcfFile.exists() ) {
logger.warn("Checking shadow BCF output file " + bcfFile + " against VCF file " + resultFile);
try {

View File

@ -41,16 +41,17 @@ public class FeatureToGATKFeatureIteratorUnitTest extends BaseTest {
@Test
@SuppressWarnings("unchecked")
public void testCloseFilePointers() throws IOException {
final String chr = "20";
IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(BaseTest.hg19Reference));
GenomeLocParser parser = new GenomeLocParser(seq);
File file = new File(validationDataLocation + "NA12878.hg19.example1.vcf");
VCFCodec codec = new VCFCodec();
TestFeatureReader reader = new TestFeatureReader(file.getAbsolutePath(), codec);
CheckableCloseableTribbleIterator<Feature> tribbleIterator = reader.query("20", 1, 100000);
CheckableCloseableTribbleIterator<Feature> tribbleIterator = reader.query(chr, 1, 100000);
FeatureToGATKFeatureIterator gatkIterator = new FeatureToGATKFeatureIterator(parser, tribbleIterator, "test");
Assert.assertTrue(gatkIterator.hasNext(), "GATK feature iterator does not have a next value.");
GenomeLoc gatkLocation = gatkIterator.next().getLocation();
Assert.assertEquals(gatkLocation.getContig(), "20", "Instead of chr 20 rod iterator was at location " + gatkLocation);
Assert.assertEquals(gatkLocation.getContig(), chr, "Instead of chr 20 rod iterator was at location " + gatkLocation);
Assert.assertFalse(tribbleIterator.isClosed(), "Tribble iterator is closed but should be still open.");
gatkIterator.close();
Assert.assertTrue(tribbleIterator.isClosed(), "Tribble iterator is open but should be now closed.");

View File

@ -32,22 +32,22 @@ package org.broadinstitute.sting.utils.variantcontext.writer;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.samtools.SAMSequenceDictionary;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.FeatureCodecHeader;
import org.broad.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.codecs.bcf2.*;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextTestProvider;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.BeforeSuite;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.*;
import java.util.*;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.List;
public class VariantContextWritersUnitTest extends BaseTest {