diff --git a/build.xml b/build.xml index 7389503bb..ca5d22a5a 100644 --- a/build.xml +++ b/build.xml @@ -646,7 +646,7 @@ - + diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltration.java index 4ce291ca3..1415239b4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltration.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltration.java @@ -333,7 +333,11 @@ public class VariantFiltration extends RodWalker { filters.add(exp.name); } } - builder.filters(filters); + + if ( filters.isEmpty() ) + builder.passFilters(); + else + builder.filters(filters); writer.add(builder.make()); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java index 18b4d0b6c..0b9654610 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java @@ -26,15 +26,12 @@ package org.broadinstitute.sting.utils.codecs.bcf2; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; -import net.sf.samtools.SAMSequenceRecord; import org.apache.log4j.Logger; import org.broad.tribble.Feature; import org.broad.tribble.FeatureCodec; import org.broad.tribble.FeatureCodecHeader; import org.broad.tribble.readers.AsciiLineReader; import org.broad.tribble.readers.PositionalBufferedStream; -import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; -import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -44,12 +41,15 @@ import java.io.ByteArrayInputStream; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; -import java.util.*; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; /** * Decode BCF2 files */ -public final class BCF2Codec implements FeatureCodec, ReferenceDependentFeatureCodec { +public final class BCF2Codec implements FeatureCodec { final protected static Logger logger = Logger.getLogger(BCF2Codec.class); private final static boolean FORBID_SYMBOLICS = false; @@ -162,7 +162,7 @@ public final class BCF2Codec implements FeatureCodec, ReferenceD contigNames.add(contig.getID()); } } else { - logger.info("Didn't find any contig lines in BCF2 file, falling back (dangerously) to GATK reference dictionary"); + throw new UserException.MalformedBCF2("Didn't find any contig lines in BCF2 file header"); } // create the string dictionary @@ -201,19 +201,6 @@ public final class BCF2Codec implements FeatureCodec, ReferenceD } } - // -------------------------------------------------------------------------------- - // - // Reference dependence - // - // -------------------------------------------------------------------------------- - - @Override - public void setGenomeLocParser(final GenomeLocParser genomeLocParser) { - // initialize contigNames to standard ones in reference - for ( final SAMSequenceRecord contig : genomeLocParser.getContigs().getSequences() ) - contigNames.add(contig.getSequenceName()); - } - // -------------------------------------------------------------------------------- // // implicit block diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java index 269750c66..219daf315 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java @@ -160,7 +160,7 @@ public class BCF2FieldWriterManager { /** * Get a site writer specialized to encode values for site info field * @param field key found in the VCF header INFO records - * @return + * @return non-null writer if one can be found, or null if none exists for field */ public BCF2FieldWriter.SiteWriter getSiteFieldWriter(final String field) { return getWriter(field, siteWriters); @@ -169,17 +169,14 @@ public class BCF2FieldWriterManager { /** * Get a genotypes writer specialized to encode values for genotypes field * @param field key found in the VCF header FORMAT records - * @return + * @return non-null writer if one can be found, or null if none exists for field */ public BCF2FieldWriter.GenotypesWriter getGenotypeFieldWriter(final String field) { return getWriter(field, genotypesWriters); } @Requires({"map != null", "key != null"}) - @Ensures("result != null") public T getWriter(final String key, final Map map) { - final T writer = map.get(key); - if ( writer == null ) throw new ReviewedStingException("BUG: no writer found for " + key); - return writer; + return map.get(key); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java index c77fe7172..df2008e8e 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java @@ -83,6 +83,14 @@ import java.util.*; * @since 06/12 */ class BCF2Writer extends IndexingVariantContextWriter { + /** + * If true, we will write out the undecoded raw bytes for a genotypes block, if it + * is found in the input VC. This can be very dangerous as the genotype encoding + * depends on the exact ordering of the header. + * + * TODO -- enable when the new smart VCF header code is created by Eric Banks + */ + private final static boolean WRITE_UNDECODED_GENOTYPE_BLOCK = false; final protected static Logger logger = Logger.getLogger(BCF2Writer.class); final private static boolean ALLOW_MISSING_CONTIG_LINES = false; @@ -237,9 +245,11 @@ class BCF2Writer extends IndexingVariantContextWriter { private BCF2Codec.LazyData getLazyData(final VariantContext vc) { if ( vc.getGenotypes().isLazyWithData() ) { - LazyGenotypesContext lgc = (LazyGenotypesContext)vc.getGenotypes(); - if ( lgc.getUnparsedGenotypeData() instanceof BCF2Codec.LazyData ) + LazyGenotypesContext lgc = (LazyGenotypesContext)vc.getGenotypes(); + if ( WRITE_UNDECODED_GENOTYPE_BLOCK && lgc.getUnparsedGenotypeData() instanceof BCF2Codec.LazyData ) return (BCF2Codec.LazyData)lgc.getUnparsedGenotypeData(); + else + lgc.decode(); // WARNING -- required to avoid keeping around bad lazy data for too long } return null; @@ -278,6 +288,8 @@ class BCF2Writer extends IndexingVariantContextWriter { private void buildFilter( VariantContext vc ) throws IOException { if ( vc.isFiltered() ) { encodeStringsByRef(vc.getFilters()); + } else if ( vc.filtersWereApplied() ) { + encodeStringsByRef(Collections.singleton(VCFConstants.PASSES_FILTERS_v4)); } else { encoder.encodeTypedMissing(BCF2Type.INT8); } @@ -285,8 +297,9 @@ class BCF2Writer extends IndexingVariantContextWriter { private void buildInfo( VariantContext vc ) throws IOException { for ( Map.Entry infoFieldEntry : vc.getAttributes().entrySet() ) { - final String key = infoFieldEntry.getKey(); - final BCF2FieldWriter.SiteWriter writer = fieldManager.getSiteFieldWriter(key); + final String field = infoFieldEntry.getKey(); + final BCF2FieldWriter.SiteWriter writer = fieldManager.getSiteFieldWriter(field); + if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "INFO"); writer.start(encoder, vc); writer.site(encoder, vc); writer.done(encoder, vc); @@ -294,26 +307,40 @@ class BCF2Writer extends IndexingVariantContextWriter { } private byte[] buildSamplesData(final VariantContext vc) throws IOException { - final BCF2Codec.LazyData lazyData = getLazyData(vc); + final BCF2Codec.LazyData lazyData = getLazyData(vc); // has critical side effects if ( lazyData != null ) { // we never decoded any data from this BCF file, so just pass it back return lazyData.bytes; - } else { - // we have to do work to convert the VC into a BCF2 byte stream - final List genotypeFields = VCFWriter.calcVCFGenotypeKeys(vc, header); - for ( final String field : genotypeFields ) { - final BCF2FieldWriter.GenotypesWriter writer = fieldManager.getGenotypeFieldWriter(field); - - writer.start(encoder, vc); - for ( final String name : sampleNames ) { - Genotype g = vc.getGenotype(name); - if ( g == null ) VCFWriter.missingSampleError(vc, header); - writer.addGenotype(encoder, vc, g); - } - writer.done(encoder, vc); - } - return encoder.getRecordBytes(); } + + // we have to do work to convert the VC into a BCF2 byte stream + final List genotypeFields = VCFWriter.calcVCFGenotypeKeys(vc, header); + for ( final String field : genotypeFields ) { + final BCF2FieldWriter.GenotypesWriter writer = fieldManager.getGenotypeFieldWriter(field); + if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "FORMAT"); + + writer.start(encoder, vc); + for ( final String name : sampleNames ) { + Genotype g = vc.getGenotype(name); + if ( g == null ) VCFWriter.missingSampleError(vc, header); + writer.addGenotype(encoder, vc, g); + } + writer.done(encoder, vc); + } + return encoder.getRecordBytes(); + } + + /** + * Throws a meaningful error message when a field (INFO or FORMAT) is found when writing out a file + * but there's no header line for it. + * + * @param vc + * @param field + * @param fieldType + */ + private final void errorUnexpectedFieldToWrite(final VariantContext vc, final String field, final String fieldType) { + throw new UserException("Found field " + field + " in the " + fieldType + " fields of VariantContext at " + + vc.getChr() + ":" + vc.getStart() + " from " + vc.getSource() + " but this hasn't been defined in the VCFHeader"); } // -------------------------------------------------------------------------------- diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java index ae5128c75..97b985a29 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java @@ -99,4 +99,13 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { Arrays.asList("8077eb3bab5ff98f12085eb04176fdc9")); executeTest("test deletions", spec); } + + @Test + public void testUnfilteredBecomesFilteredAndPass() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference + + " --filterExpression 'FS > 60.0' --filterName SNP_FS -V " + privateTestDir + "unfilteredForFiltering.vcf", 1, + Arrays.asList("8ed32a2272bab8043a255362335395ef")); + executeTest("testUnfilteredBecomesFilteredAndPass", spec); + } } diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VCFJarClassLoadingUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VCFJarClassLoadingUnitTest.java index fa5476150..50fbea708 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VCFJarClassLoadingUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VCFJarClassLoadingUnitTest.java @@ -50,12 +50,12 @@ public class VCFJarClassLoadingUnitTest { ClassLoader classLoader = new URLClassLoader(jarURLs, null); classLoader.loadClass("org.broadinstitute.sting.utils.variantcontext.VariantContext"); -// TODO -- uncomment when we include BCF2 codec -// classLoader.loadClass("org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec"); + classLoader.loadClass("org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec"); classLoader.loadClass("org.broadinstitute.sting.utils.codecs.vcf.VCFCodec"); classLoader.loadClass("org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec"); classLoader.loadClass("org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter"); classLoader.loadClass("org.broadinstitute.sting.utils.variantcontext.writer.VCFWriter"); + classLoader.loadClass("org.broadinstitute.sting.utils.variantcontext.writer.BCF2Writer"); } /** diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java index c436a7b45..1a0e8e39d 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java @@ -740,6 +740,8 @@ public class VariantContextTestProvider { Assert.assertEquals(actual.getAlleles(), expected.getAlleles(), "alleles"); assertAttributesEquals(actual.getAttributes(), expected.getAttributes()); + Assert.assertEquals(actual.filtersWereApplied(), expected.filtersWereApplied(), "filtersWereApplied"); + Assert.assertEquals(actual.isFiltered(), expected.isFiltered(), "isFiltered"); BaseTest.assertEqualsSet(actual.getFilters(), expected.getFilters(), "filters"); BaseTest.assertEqualsDoubleSmart(actual.getPhredScaledQual(), expected.getPhredScaledQual());