Merged bug fix from Stable into Unstable

This commit is contained in:
Mark DePristo 2012-07-25 09:01:21 -04:00
commit f5f61d28b7
8 changed files with 75 additions and 49 deletions

View File

@ -646,7 +646,7 @@
<jar jarfile="${dist.dir}/vcf.jar"> <jar jarfile="${dist.dir}/vcf.jar">
<fileset dir="${java.classes}"> <fileset dir="${java.classes}">
<include name="org/broadinstitute/sting/utils/codecs/vcf/**/*.class"/> <include name="org/broadinstitute/sting/utils/codecs/vcf/**/*.class"/>
<!-- <include name="org/broadinstitute/sting/utils/codecs/bcf2/**/*.class"/> --> <include name="org/broadinstitute/sting/utils/codecs/bcf2/**/*.class"/>
<include name="org/broadinstitute/sting/utils/variantcontext/**/*.class"/> <include name="org/broadinstitute/sting/utils/variantcontext/**/*.class"/>
<include name="org/broadinstitute/sting/utils/exceptions/**"/> <include name="org/broadinstitute/sting/utils/exceptions/**"/>
<include name="org/broadinstitute/sting/utils/help/DocumentedGATKFeature.class"/> <include name="org/broadinstitute/sting/utils/help/DocumentedGATKFeature.class"/>

View File

@ -333,7 +333,11 @@ public class VariantFiltration extends RodWalker<Integer, Integer> {
filters.add(exp.name); filters.add(exp.name);
} }
} }
builder.filters(filters);
if ( filters.isEmpty() )
builder.passFilters();
else
builder.filters(filters);
writer.add(builder.make()); writer.add(builder.make());
} }

View File

@ -26,15 +26,12 @@ package org.broadinstitute.sting.utils.codecs.bcf2;
import com.google.java.contract.Ensures; import com.google.java.contract.Ensures;
import com.google.java.contract.Requires; import com.google.java.contract.Requires;
import net.sf.samtools.SAMSequenceRecord;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broad.tribble.Feature; import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec; import org.broad.tribble.FeatureCodec;
import org.broad.tribble.FeatureCodecHeader; import org.broad.tribble.FeatureCodecHeader;
import org.broad.tribble.readers.AsciiLineReader; import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.PositionalBufferedStream; import org.broad.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
@ -44,12 +41,15 @@ import java.io.ByteArrayInputStream;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.util.*; import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/** /**
* Decode BCF2 files * Decode BCF2 files
*/ */
public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDependentFeatureCodec { public final class BCF2Codec implements FeatureCodec<VariantContext> {
final protected static Logger logger = Logger.getLogger(BCF2Codec.class); final protected static Logger logger = Logger.getLogger(BCF2Codec.class);
private final static boolean FORBID_SYMBOLICS = false; private final static boolean FORBID_SYMBOLICS = false;
@ -162,7 +162,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
contigNames.add(contig.getID()); contigNames.add(contig.getID());
} }
} else { } else {
logger.info("Didn't find any contig lines in BCF2 file, falling back (dangerously) to GATK reference dictionary"); throw new UserException.MalformedBCF2("Didn't find any contig lines in BCF2 file header");
} }
// create the string dictionary // create the string dictionary
@ -201,19 +201,6 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
} }
} }
// --------------------------------------------------------------------------------
//
// Reference dependence
//
// --------------------------------------------------------------------------------
@Override
public void setGenomeLocParser(final GenomeLocParser genomeLocParser) {
// initialize contigNames to standard ones in reference
for ( final SAMSequenceRecord contig : genomeLocParser.getContigs().getSequences() )
contigNames.add(contig.getSequenceName());
}
// -------------------------------------------------------------------------------- // --------------------------------------------------------------------------------
// //
// implicit block // implicit block

View File

@ -160,7 +160,7 @@ public class BCF2FieldWriterManager {
/** /**
* Get a site writer specialized to encode values for site info field * Get a site writer specialized to encode values for site info field
* @param field key found in the VCF header INFO records * @param field key found in the VCF header INFO records
* @return * @return non-null writer if one can be found, or null if none exists for field
*/ */
public BCF2FieldWriter.SiteWriter getSiteFieldWriter(final String field) { public BCF2FieldWriter.SiteWriter getSiteFieldWriter(final String field) {
return getWriter(field, siteWriters); return getWriter(field, siteWriters);
@ -169,17 +169,14 @@ public class BCF2FieldWriterManager {
/** /**
* Get a genotypes writer specialized to encode values for genotypes field * Get a genotypes writer specialized to encode values for genotypes field
* @param field key found in the VCF header FORMAT records * @param field key found in the VCF header FORMAT records
* @return * @return non-null writer if one can be found, or null if none exists for field
*/ */
public BCF2FieldWriter.GenotypesWriter getGenotypeFieldWriter(final String field) { public BCF2FieldWriter.GenotypesWriter getGenotypeFieldWriter(final String field) {
return getWriter(field, genotypesWriters); return getWriter(field, genotypesWriters);
} }
@Requires({"map != null", "key != null"}) @Requires({"map != null", "key != null"})
@Ensures("result != null")
public <T> T getWriter(final String key, final Map<String, T> map) { public <T> T getWriter(final String key, final Map<String, T> map) {
final T writer = map.get(key); return map.get(key);
if ( writer == null ) throw new ReviewedStingException("BUG: no writer found for " + key);
return writer;
} }
} }

View File

@ -83,6 +83,14 @@ import java.util.*;
* @since 06/12 * @since 06/12
*/ */
class BCF2Writer extends IndexingVariantContextWriter { class BCF2Writer extends IndexingVariantContextWriter {
/**
* If true, we will write out the undecoded raw bytes for a genotypes block, if it
* is found in the input VC. This can be very dangerous as the genotype encoding
* depends on the exact ordering of the header.
*
* TODO -- enable when the new smart VCF header code is created by Eric Banks
*/
private final static boolean WRITE_UNDECODED_GENOTYPE_BLOCK = false;
final protected static Logger logger = Logger.getLogger(BCF2Writer.class); final protected static Logger logger = Logger.getLogger(BCF2Writer.class);
final private static boolean ALLOW_MISSING_CONTIG_LINES = false; final private static boolean ALLOW_MISSING_CONTIG_LINES = false;
@ -237,9 +245,11 @@ class BCF2Writer extends IndexingVariantContextWriter {
private BCF2Codec.LazyData getLazyData(final VariantContext vc) { private BCF2Codec.LazyData getLazyData(final VariantContext vc) {
if ( vc.getGenotypes().isLazyWithData() ) { if ( vc.getGenotypes().isLazyWithData() ) {
LazyGenotypesContext lgc = (LazyGenotypesContext)vc.getGenotypes(); LazyGenotypesContext lgc = (LazyGenotypesContext)vc.getGenotypes();
if ( lgc.getUnparsedGenotypeData() instanceof BCF2Codec.LazyData ) if ( WRITE_UNDECODED_GENOTYPE_BLOCK && lgc.getUnparsedGenotypeData() instanceof BCF2Codec.LazyData )
return (BCF2Codec.LazyData)lgc.getUnparsedGenotypeData(); return (BCF2Codec.LazyData)lgc.getUnparsedGenotypeData();
else
lgc.decode(); // WARNING -- required to avoid keeping around bad lazy data for too long
} }
return null; return null;
@ -278,6 +288,8 @@ class BCF2Writer extends IndexingVariantContextWriter {
private void buildFilter( VariantContext vc ) throws IOException { private void buildFilter( VariantContext vc ) throws IOException {
if ( vc.isFiltered() ) { if ( vc.isFiltered() ) {
encodeStringsByRef(vc.getFilters()); encodeStringsByRef(vc.getFilters());
} else if ( vc.filtersWereApplied() ) {
encodeStringsByRef(Collections.singleton(VCFConstants.PASSES_FILTERS_v4));
} else { } else {
encoder.encodeTypedMissing(BCF2Type.INT8); encoder.encodeTypedMissing(BCF2Type.INT8);
} }
@ -285,8 +297,9 @@ class BCF2Writer extends IndexingVariantContextWriter {
private void buildInfo( VariantContext vc ) throws IOException { private void buildInfo( VariantContext vc ) throws IOException {
for ( Map.Entry<String, Object> infoFieldEntry : vc.getAttributes().entrySet() ) { for ( Map.Entry<String, Object> infoFieldEntry : vc.getAttributes().entrySet() ) {
final String key = infoFieldEntry.getKey(); final String field = infoFieldEntry.getKey();
final BCF2FieldWriter.SiteWriter writer = fieldManager.getSiteFieldWriter(key); final BCF2FieldWriter.SiteWriter writer = fieldManager.getSiteFieldWriter(field);
if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "INFO");
writer.start(encoder, vc); writer.start(encoder, vc);
writer.site(encoder, vc); writer.site(encoder, vc);
writer.done(encoder, vc); writer.done(encoder, vc);
@ -294,26 +307,40 @@ class BCF2Writer extends IndexingVariantContextWriter {
} }
private byte[] buildSamplesData(final VariantContext vc) throws IOException { private byte[] buildSamplesData(final VariantContext vc) throws IOException {
final BCF2Codec.LazyData lazyData = getLazyData(vc); final BCF2Codec.LazyData lazyData = getLazyData(vc); // has critical side effects
if ( lazyData != null ) { if ( lazyData != null ) {
// we never decoded any data from this BCF file, so just pass it back // we never decoded any data from this BCF file, so just pass it back
return lazyData.bytes; return lazyData.bytes;
} else {
// we have to do work to convert the VC into a BCF2 byte stream
final List<String> genotypeFields = VCFWriter.calcVCFGenotypeKeys(vc, header);
for ( final String field : genotypeFields ) {
final BCF2FieldWriter.GenotypesWriter writer = fieldManager.getGenotypeFieldWriter(field);
writer.start(encoder, vc);
for ( final String name : sampleNames ) {
Genotype g = vc.getGenotype(name);
if ( g == null ) VCFWriter.missingSampleError(vc, header);
writer.addGenotype(encoder, vc, g);
}
writer.done(encoder, vc);
}
return encoder.getRecordBytes();
} }
// we have to do work to convert the VC into a BCF2 byte stream
final List<String> genotypeFields = VCFWriter.calcVCFGenotypeKeys(vc, header);
for ( final String field : genotypeFields ) {
final BCF2FieldWriter.GenotypesWriter writer = fieldManager.getGenotypeFieldWriter(field);
if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "FORMAT");
writer.start(encoder, vc);
for ( final String name : sampleNames ) {
Genotype g = vc.getGenotype(name);
if ( g == null ) VCFWriter.missingSampleError(vc, header);
writer.addGenotype(encoder, vc, g);
}
writer.done(encoder, vc);
}
return encoder.getRecordBytes();
}
/**
* Throws a meaningful error message when a field (INFO or FORMAT) is found when writing out a file
* but there's no header line for it.
*
* @param vc
* @param field
* @param fieldType
*/
private final void errorUnexpectedFieldToWrite(final VariantContext vc, final String field, final String fieldType) {
throw new UserException("Found field " + field + " in the " + fieldType + " fields of VariantContext at " +
vc.getChr() + ":" + vc.getStart() + " from " + vc.getSource() + " but this hasn't been defined in the VCFHeader");
} }
// -------------------------------------------------------------------------------- // --------------------------------------------------------------------------------

View File

@ -99,4 +99,13 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
Arrays.asList("8077eb3bab5ff98f12085eb04176fdc9")); Arrays.asList("8077eb3bab5ff98f12085eb04176fdc9"));
executeTest("test deletions", spec); executeTest("test deletions", spec);
} }
@Test
public void testUnfilteredBecomesFilteredAndPass() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference
+ " --filterExpression 'FS > 60.0' --filterName SNP_FS -V " + privateTestDir + "unfilteredForFiltering.vcf", 1,
Arrays.asList("8ed32a2272bab8043a255362335395ef"));
executeTest("testUnfilteredBecomesFilteredAndPass", spec);
}
} }

View File

@ -50,12 +50,12 @@ public class VCFJarClassLoadingUnitTest {
ClassLoader classLoader = new URLClassLoader(jarURLs, null); ClassLoader classLoader = new URLClassLoader(jarURLs, null);
classLoader.loadClass("org.broadinstitute.sting.utils.variantcontext.VariantContext"); classLoader.loadClass("org.broadinstitute.sting.utils.variantcontext.VariantContext");
// TODO -- uncomment when we include BCF2 codec classLoader.loadClass("org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec");
// classLoader.loadClass("org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec");
classLoader.loadClass("org.broadinstitute.sting.utils.codecs.vcf.VCFCodec"); classLoader.loadClass("org.broadinstitute.sting.utils.codecs.vcf.VCFCodec");
classLoader.loadClass("org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec"); classLoader.loadClass("org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec");
classLoader.loadClass("org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter"); classLoader.loadClass("org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter");
classLoader.loadClass("org.broadinstitute.sting.utils.variantcontext.writer.VCFWriter"); classLoader.loadClass("org.broadinstitute.sting.utils.variantcontext.writer.VCFWriter");
classLoader.loadClass("org.broadinstitute.sting.utils.variantcontext.writer.BCF2Writer");
} }
/** /**

View File

@ -740,6 +740,8 @@ public class VariantContextTestProvider {
Assert.assertEquals(actual.getAlleles(), expected.getAlleles(), "alleles"); Assert.assertEquals(actual.getAlleles(), expected.getAlleles(), "alleles");
assertAttributesEquals(actual.getAttributes(), expected.getAttributes()); assertAttributesEquals(actual.getAttributes(), expected.getAttributes());
Assert.assertEquals(actual.filtersWereApplied(), expected.filtersWereApplied(), "filtersWereApplied");
Assert.assertEquals(actual.isFiltered(), expected.isFiltered(), "isFiltered");
BaseTest.assertEqualsSet(actual.getFilters(), expected.getFilters(), "filters"); BaseTest.assertEqualsSet(actual.getFilters(), expected.getFilters(), "filters");
BaseTest.assertEqualsDoubleSmart(actual.getPhredScaledQual(), expected.getPhredScaledQual()); BaseTest.assertEqualsDoubleSmart(actual.getPhredScaledQual(), expected.getPhredScaledQual());