Merged bug fix from Stable into Unstable

This commit is contained in:
Mark DePristo 2012-07-25 09:01:21 -04:00
commit f5f61d28b7
8 changed files with 75 additions and 49 deletions

View File

@ -646,7 +646,7 @@
<jar jarfile="${dist.dir}/vcf.jar">
<fileset dir="${java.classes}">
<include name="org/broadinstitute/sting/utils/codecs/vcf/**/*.class"/>
<!-- <include name="org/broadinstitute/sting/utils/codecs/bcf2/**/*.class"/> -->
<include name="org/broadinstitute/sting/utils/codecs/bcf2/**/*.class"/>
<include name="org/broadinstitute/sting/utils/variantcontext/**/*.class"/>
<include name="org/broadinstitute/sting/utils/exceptions/**"/>
<include name="org/broadinstitute/sting/utils/help/DocumentedGATKFeature.class"/>

View File

@ -333,7 +333,11 @@ public class VariantFiltration extends RodWalker<Integer, Integer> {
filters.add(exp.name);
}
}
builder.filters(filters);
if ( filters.isEmpty() )
builder.passFilters();
else
builder.filters(filters);
writer.add(builder.make());
}

View File

@ -26,15 +26,12 @@ package org.broadinstitute.sting.utils.codecs.bcf2;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.SAMSequenceRecord;
import org.apache.log4j.Logger;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.FeatureCodecHeader;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -44,12 +41,15 @@ import java.io.ByteArrayInputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Decode BCF2 files
*/
public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDependentFeatureCodec {
public final class BCF2Codec implements FeatureCodec<VariantContext> {
final protected static Logger logger = Logger.getLogger(BCF2Codec.class);
private final static boolean FORBID_SYMBOLICS = false;
@ -162,7 +162,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
contigNames.add(contig.getID());
}
} else {
logger.info("Didn't find any contig lines in BCF2 file, falling back (dangerously) to GATK reference dictionary");
throw new UserException.MalformedBCF2("Didn't find any contig lines in BCF2 file header");
}
// create the string dictionary
@ -201,19 +201,6 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
}
}
// --------------------------------------------------------------------------------
//
// Reference dependence
//
// --------------------------------------------------------------------------------
@Override
public void setGenomeLocParser(final GenomeLocParser genomeLocParser) {
// initialize contigNames to standard ones in reference
for ( final SAMSequenceRecord contig : genomeLocParser.getContigs().getSequences() )
contigNames.add(contig.getSequenceName());
}
// --------------------------------------------------------------------------------
//
// implicit block

View File

@ -160,7 +160,7 @@ public class BCF2FieldWriterManager {
/**
* Get a site writer specialized to encode values for site info field
* @param field key found in the VCF header INFO records
* @return
* @return non-null writer if one can be found, or null if none exists for field
*/
public BCF2FieldWriter.SiteWriter getSiteFieldWriter(final String field) {
return getWriter(field, siteWriters);
@ -169,17 +169,14 @@ public class BCF2FieldWriterManager {
/**
* Get a genotypes writer specialized to encode values for genotypes field
* @param field key found in the VCF header FORMAT records
* @return
* @return non-null writer if one can be found, or null if none exists for field
*/
public BCF2FieldWriter.GenotypesWriter getGenotypeFieldWriter(final String field) {
return getWriter(field, genotypesWriters);
}
@Requires({"map != null", "key != null"})
@Ensures("result != null")
public <T> T getWriter(final String key, final Map<String, T> map) {
final T writer = map.get(key);
if ( writer == null ) throw new ReviewedStingException("BUG: no writer found for " + key);
return writer;
return map.get(key);
}
}

View File

@ -83,6 +83,14 @@ import java.util.*;
* @since 06/12
*/
class BCF2Writer extends IndexingVariantContextWriter {
/**
* If true, we will write out the undecoded raw bytes for a genotypes block, if it
* is found in the input VC. This can be very dangerous as the genotype encoding
* depends on the exact ordering of the header.
*
* TODO -- enable when the new smart VCF header code is created by Eric Banks
*/
private final static boolean WRITE_UNDECODED_GENOTYPE_BLOCK = false;
final protected static Logger logger = Logger.getLogger(BCF2Writer.class);
final private static boolean ALLOW_MISSING_CONTIG_LINES = false;
@ -237,9 +245,11 @@ class BCF2Writer extends IndexingVariantContextWriter {
private BCF2Codec.LazyData getLazyData(final VariantContext vc) {
if ( vc.getGenotypes().isLazyWithData() ) {
LazyGenotypesContext lgc = (LazyGenotypesContext)vc.getGenotypes();
if ( lgc.getUnparsedGenotypeData() instanceof BCF2Codec.LazyData )
LazyGenotypesContext lgc = (LazyGenotypesContext)vc.getGenotypes();
if ( WRITE_UNDECODED_GENOTYPE_BLOCK && lgc.getUnparsedGenotypeData() instanceof BCF2Codec.LazyData )
return (BCF2Codec.LazyData)lgc.getUnparsedGenotypeData();
else
lgc.decode(); // WARNING -- required to avoid keeping around bad lazy data for too long
}
return null;
@ -278,6 +288,8 @@ class BCF2Writer extends IndexingVariantContextWriter {
private void buildFilter( VariantContext vc ) throws IOException {
if ( vc.isFiltered() ) {
encodeStringsByRef(vc.getFilters());
} else if ( vc.filtersWereApplied() ) {
encodeStringsByRef(Collections.singleton(VCFConstants.PASSES_FILTERS_v4));
} else {
encoder.encodeTypedMissing(BCF2Type.INT8);
}
@ -285,8 +297,9 @@ class BCF2Writer extends IndexingVariantContextWriter {
private void buildInfo( VariantContext vc ) throws IOException {
for ( Map.Entry<String, Object> infoFieldEntry : vc.getAttributes().entrySet() ) {
final String key = infoFieldEntry.getKey();
final BCF2FieldWriter.SiteWriter writer = fieldManager.getSiteFieldWriter(key);
final String field = infoFieldEntry.getKey();
final BCF2FieldWriter.SiteWriter writer = fieldManager.getSiteFieldWriter(field);
if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "INFO");
writer.start(encoder, vc);
writer.site(encoder, vc);
writer.done(encoder, vc);
@ -294,26 +307,40 @@ class BCF2Writer extends IndexingVariantContextWriter {
}
private byte[] buildSamplesData(final VariantContext vc) throws IOException {
final BCF2Codec.LazyData lazyData = getLazyData(vc);
final BCF2Codec.LazyData lazyData = getLazyData(vc); // has critical side effects
if ( lazyData != null ) {
// we never decoded any data from this BCF file, so just pass it back
return lazyData.bytes;
} else {
// we have to do work to convert the VC into a BCF2 byte stream
final List<String> genotypeFields = VCFWriter.calcVCFGenotypeKeys(vc, header);
for ( final String field : genotypeFields ) {
final BCF2FieldWriter.GenotypesWriter writer = fieldManager.getGenotypeFieldWriter(field);
writer.start(encoder, vc);
for ( final String name : sampleNames ) {
Genotype g = vc.getGenotype(name);
if ( g == null ) VCFWriter.missingSampleError(vc, header);
writer.addGenotype(encoder, vc, g);
}
writer.done(encoder, vc);
}
return encoder.getRecordBytes();
}
// we have to do work to convert the VC into a BCF2 byte stream
final List<String> genotypeFields = VCFWriter.calcVCFGenotypeKeys(vc, header);
for ( final String field : genotypeFields ) {
final BCF2FieldWriter.GenotypesWriter writer = fieldManager.getGenotypeFieldWriter(field);
if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "FORMAT");
writer.start(encoder, vc);
for ( final String name : sampleNames ) {
Genotype g = vc.getGenotype(name);
if ( g == null ) VCFWriter.missingSampleError(vc, header);
writer.addGenotype(encoder, vc, g);
}
writer.done(encoder, vc);
}
return encoder.getRecordBytes();
}
/**
* Throws a meaningful error message when a field (INFO or FORMAT) is found when writing out a file
* but there's no header line for it.
*
* @param vc
* @param field
* @param fieldType
*/
private final void errorUnexpectedFieldToWrite(final VariantContext vc, final String field, final String fieldType) {
throw new UserException("Found field " + field + " in the " + fieldType + " fields of VariantContext at " +
vc.getChr() + ":" + vc.getStart() + " from " + vc.getSource() + " but this hasn't been defined in the VCFHeader");
}
// --------------------------------------------------------------------------------

View File

@ -99,4 +99,13 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
Arrays.asList("8077eb3bab5ff98f12085eb04176fdc9"));
executeTest("test deletions", spec);
}
@Test
public void testUnfilteredBecomesFilteredAndPass() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference
+ " --filterExpression 'FS > 60.0' --filterName SNP_FS -V " + privateTestDir + "unfilteredForFiltering.vcf", 1,
Arrays.asList("8ed32a2272bab8043a255362335395ef"));
executeTest("testUnfilteredBecomesFilteredAndPass", spec);
}
}

View File

@ -50,12 +50,12 @@ public class VCFJarClassLoadingUnitTest {
ClassLoader classLoader = new URLClassLoader(jarURLs, null);
classLoader.loadClass("org.broadinstitute.sting.utils.variantcontext.VariantContext");
// TODO -- uncomment when we include BCF2 codec
// classLoader.loadClass("org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec");
classLoader.loadClass("org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec");
classLoader.loadClass("org.broadinstitute.sting.utils.codecs.vcf.VCFCodec");
classLoader.loadClass("org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec");
classLoader.loadClass("org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter");
classLoader.loadClass("org.broadinstitute.sting.utils.variantcontext.writer.VCFWriter");
classLoader.loadClass("org.broadinstitute.sting.utils.variantcontext.writer.BCF2Writer");
}
/**

View File

@ -740,6 +740,8 @@ public class VariantContextTestProvider {
Assert.assertEquals(actual.getAlleles(), expected.getAlleles(), "alleles");
assertAttributesEquals(actual.getAttributes(), expected.getAttributes());
Assert.assertEquals(actual.filtersWereApplied(), expected.filtersWereApplied(), "filtersWereApplied");
Assert.assertEquals(actual.isFiltered(), expected.isFiltered(), "isFiltered");
BaseTest.assertEqualsSet(actual.getFilters(), expected.getFilters(), "filters");
BaseTest.assertEqualsDoubleSmart(actual.getPhredScaledQual(), expected.getPhredScaledQual());