Merged bug fix from Stable into Unstable
This commit is contained in:
commit
f5f61d28b7
|
|
@ -646,7 +646,7 @@
|
|||
<jar jarfile="${dist.dir}/vcf.jar">
|
||||
<fileset dir="${java.classes}">
|
||||
<include name="org/broadinstitute/sting/utils/codecs/vcf/**/*.class"/>
|
||||
<!-- <include name="org/broadinstitute/sting/utils/codecs/bcf2/**/*.class"/> -->
|
||||
<include name="org/broadinstitute/sting/utils/codecs/bcf2/**/*.class"/>
|
||||
<include name="org/broadinstitute/sting/utils/variantcontext/**/*.class"/>
|
||||
<include name="org/broadinstitute/sting/utils/exceptions/**"/>
|
||||
<include name="org/broadinstitute/sting/utils/help/DocumentedGATKFeature.class"/>
|
||||
|
|
|
|||
|
|
@ -333,7 +333,11 @@ public class VariantFiltration extends RodWalker<Integer, Integer> {
|
|||
filters.add(exp.name);
|
||||
}
|
||||
}
|
||||
builder.filters(filters);
|
||||
|
||||
if ( filters.isEmpty() )
|
||||
builder.passFilters();
|
||||
else
|
||||
builder.filters(filters);
|
||||
|
||||
writer.add(builder.make());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,15 +26,12 @@ package org.broadinstitute.sting.utils.codecs.bcf2;
|
|||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broad.tribble.FeatureCodecHeader;
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broad.tribble.readers.PositionalBufferedStream;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -44,12 +41,15 @@ import java.io.ByteArrayInputStream;
|
|||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Decode BCF2 files
|
||||
*/
|
||||
public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceDependentFeatureCodec {
|
||||
public final class BCF2Codec implements FeatureCodec<VariantContext> {
|
||||
final protected static Logger logger = Logger.getLogger(BCF2Codec.class);
|
||||
private final static boolean FORBID_SYMBOLICS = false;
|
||||
|
||||
|
|
@ -162,7 +162,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
|
|||
contigNames.add(contig.getID());
|
||||
}
|
||||
} else {
|
||||
logger.info("Didn't find any contig lines in BCF2 file, falling back (dangerously) to GATK reference dictionary");
|
||||
throw new UserException.MalformedBCF2("Didn't find any contig lines in BCF2 file header");
|
||||
}
|
||||
|
||||
// create the string dictionary
|
||||
|
|
@ -201,19 +201,6 @@ public final class BCF2Codec implements FeatureCodec<VariantContext>, ReferenceD
|
|||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Reference dependence
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
@Override
|
||||
public void setGenomeLocParser(final GenomeLocParser genomeLocParser) {
|
||||
// initialize contigNames to standard ones in reference
|
||||
for ( final SAMSequenceRecord contig : genomeLocParser.getContigs().getSequences() )
|
||||
contigNames.add(contig.getSequenceName());
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// implicit block
|
||||
|
|
|
|||
|
|
@ -160,7 +160,7 @@ public class BCF2FieldWriterManager {
|
|||
/**
|
||||
* Get a site writer specialized to encode values for site info field
|
||||
* @param field key found in the VCF header INFO records
|
||||
* @return
|
||||
* @return non-null writer if one can be found, or null if none exists for field
|
||||
*/
|
||||
public BCF2FieldWriter.SiteWriter getSiteFieldWriter(final String field) {
|
||||
return getWriter(field, siteWriters);
|
||||
|
|
@ -169,17 +169,14 @@ public class BCF2FieldWriterManager {
|
|||
/**
|
||||
* Get a genotypes writer specialized to encode values for genotypes field
|
||||
* @param field key found in the VCF header FORMAT records
|
||||
* @return
|
||||
* @return non-null writer if one can be found, or null if none exists for field
|
||||
*/
|
||||
public BCF2FieldWriter.GenotypesWriter getGenotypeFieldWriter(final String field) {
|
||||
return getWriter(field, genotypesWriters);
|
||||
}
|
||||
|
||||
@Requires({"map != null", "key != null"})
|
||||
@Ensures("result != null")
|
||||
public <T> T getWriter(final String key, final Map<String, T> map) {
|
||||
final T writer = map.get(key);
|
||||
if ( writer == null ) throw new ReviewedStingException("BUG: no writer found for " + key);
|
||||
return writer;
|
||||
return map.get(key);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -83,6 +83,14 @@ import java.util.*;
|
|||
* @since 06/12
|
||||
*/
|
||||
class BCF2Writer extends IndexingVariantContextWriter {
|
||||
/**
|
||||
* If true, we will write out the undecoded raw bytes for a genotypes block, if it
|
||||
* is found in the input VC. This can be very dangerous as the genotype encoding
|
||||
* depends on the exact ordering of the header.
|
||||
*
|
||||
* TODO -- enable when the new smart VCF header code is created by Eric Banks
|
||||
*/
|
||||
private final static boolean WRITE_UNDECODED_GENOTYPE_BLOCK = false;
|
||||
final protected static Logger logger = Logger.getLogger(BCF2Writer.class);
|
||||
final private static boolean ALLOW_MISSING_CONTIG_LINES = false;
|
||||
|
||||
|
|
@ -237,9 +245,11 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
|
||||
private BCF2Codec.LazyData getLazyData(final VariantContext vc) {
|
||||
if ( vc.getGenotypes().isLazyWithData() ) {
|
||||
LazyGenotypesContext lgc = (LazyGenotypesContext)vc.getGenotypes();
|
||||
if ( lgc.getUnparsedGenotypeData() instanceof BCF2Codec.LazyData )
|
||||
LazyGenotypesContext lgc = (LazyGenotypesContext)vc.getGenotypes();
|
||||
if ( WRITE_UNDECODED_GENOTYPE_BLOCK && lgc.getUnparsedGenotypeData() instanceof BCF2Codec.LazyData )
|
||||
return (BCF2Codec.LazyData)lgc.getUnparsedGenotypeData();
|
||||
else
|
||||
lgc.decode(); // WARNING -- required to avoid keeping around bad lazy data for too long
|
||||
}
|
||||
|
||||
return null;
|
||||
|
|
@ -278,6 +288,8 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
private void buildFilter( VariantContext vc ) throws IOException {
|
||||
if ( vc.isFiltered() ) {
|
||||
encodeStringsByRef(vc.getFilters());
|
||||
} else if ( vc.filtersWereApplied() ) {
|
||||
encodeStringsByRef(Collections.singleton(VCFConstants.PASSES_FILTERS_v4));
|
||||
} else {
|
||||
encoder.encodeTypedMissing(BCF2Type.INT8);
|
||||
}
|
||||
|
|
@ -285,8 +297,9 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
|
||||
private void buildInfo( VariantContext vc ) throws IOException {
|
||||
for ( Map.Entry<String, Object> infoFieldEntry : vc.getAttributes().entrySet() ) {
|
||||
final String key = infoFieldEntry.getKey();
|
||||
final BCF2FieldWriter.SiteWriter writer = fieldManager.getSiteFieldWriter(key);
|
||||
final String field = infoFieldEntry.getKey();
|
||||
final BCF2FieldWriter.SiteWriter writer = fieldManager.getSiteFieldWriter(field);
|
||||
if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "INFO");
|
||||
writer.start(encoder, vc);
|
||||
writer.site(encoder, vc);
|
||||
writer.done(encoder, vc);
|
||||
|
|
@ -294,26 +307,40 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
}
|
||||
|
||||
private byte[] buildSamplesData(final VariantContext vc) throws IOException {
|
||||
final BCF2Codec.LazyData lazyData = getLazyData(vc);
|
||||
final BCF2Codec.LazyData lazyData = getLazyData(vc); // has critical side effects
|
||||
if ( lazyData != null ) {
|
||||
// we never decoded any data from this BCF file, so just pass it back
|
||||
return lazyData.bytes;
|
||||
} else {
|
||||
// we have to do work to convert the VC into a BCF2 byte stream
|
||||
final List<String> genotypeFields = VCFWriter.calcVCFGenotypeKeys(vc, header);
|
||||
for ( final String field : genotypeFields ) {
|
||||
final BCF2FieldWriter.GenotypesWriter writer = fieldManager.getGenotypeFieldWriter(field);
|
||||
|
||||
writer.start(encoder, vc);
|
||||
for ( final String name : sampleNames ) {
|
||||
Genotype g = vc.getGenotype(name);
|
||||
if ( g == null ) VCFWriter.missingSampleError(vc, header);
|
||||
writer.addGenotype(encoder, vc, g);
|
||||
}
|
||||
writer.done(encoder, vc);
|
||||
}
|
||||
return encoder.getRecordBytes();
|
||||
}
|
||||
|
||||
// we have to do work to convert the VC into a BCF2 byte stream
|
||||
final List<String> genotypeFields = VCFWriter.calcVCFGenotypeKeys(vc, header);
|
||||
for ( final String field : genotypeFields ) {
|
||||
final BCF2FieldWriter.GenotypesWriter writer = fieldManager.getGenotypeFieldWriter(field);
|
||||
if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "FORMAT");
|
||||
|
||||
writer.start(encoder, vc);
|
||||
for ( final String name : sampleNames ) {
|
||||
Genotype g = vc.getGenotype(name);
|
||||
if ( g == null ) VCFWriter.missingSampleError(vc, header);
|
||||
writer.addGenotype(encoder, vc, g);
|
||||
}
|
||||
writer.done(encoder, vc);
|
||||
}
|
||||
return encoder.getRecordBytes();
|
||||
}
|
||||
|
||||
/**
|
||||
* Throws a meaningful error message when a field (INFO or FORMAT) is found when writing out a file
|
||||
* but there's no header line for it.
|
||||
*
|
||||
* @param vc
|
||||
* @param field
|
||||
* @param fieldType
|
||||
*/
|
||||
private final void errorUnexpectedFieldToWrite(final VariantContext vc, final String field, final String fieldType) {
|
||||
throw new UserException("Found field " + field + " in the " + fieldType + " fields of VariantContext at " +
|
||||
vc.getChr() + ":" + vc.getStart() + " from " + vc.getSource() + " but this hasn't been defined in the VCFHeader");
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -99,4 +99,13 @@ public class VariantFiltrationIntegrationTest extends WalkerTest {
|
|||
Arrays.asList("8077eb3bab5ff98f12085eb04176fdc9"));
|
||||
executeTest("test deletions", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnfilteredBecomesFilteredAndPass() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T VariantFiltration -o %s --no_cmdline_in_header -R " + b37KGReference
|
||||
+ " --filterExpression 'FS > 60.0' --filterName SNP_FS -V " + privateTestDir + "unfilteredForFiltering.vcf", 1,
|
||||
Arrays.asList("8ed32a2272bab8043a255362335395ef"));
|
||||
executeTest("testUnfilteredBecomesFilteredAndPass", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -50,12 +50,12 @@ public class VCFJarClassLoadingUnitTest {
|
|||
|
||||
ClassLoader classLoader = new URLClassLoader(jarURLs, null);
|
||||
classLoader.loadClass("org.broadinstitute.sting.utils.variantcontext.VariantContext");
|
||||
// TODO -- uncomment when we include BCF2 codec
|
||||
// classLoader.loadClass("org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec");
|
||||
classLoader.loadClass("org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec");
|
||||
classLoader.loadClass("org.broadinstitute.sting.utils.codecs.vcf.VCFCodec");
|
||||
classLoader.loadClass("org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec");
|
||||
classLoader.loadClass("org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter");
|
||||
classLoader.loadClass("org.broadinstitute.sting.utils.variantcontext.writer.VCFWriter");
|
||||
classLoader.loadClass("org.broadinstitute.sting.utils.variantcontext.writer.BCF2Writer");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -740,6 +740,8 @@ public class VariantContextTestProvider {
|
|||
Assert.assertEquals(actual.getAlleles(), expected.getAlleles(), "alleles");
|
||||
|
||||
assertAttributesEquals(actual.getAttributes(), expected.getAttributes());
|
||||
Assert.assertEquals(actual.filtersWereApplied(), expected.filtersWereApplied(), "filtersWereApplied");
|
||||
Assert.assertEquals(actual.isFiltered(), expected.isFiltered(), "isFiltered");
|
||||
BaseTest.assertEqualsSet(actual.getFilters(), expected.getFilters(), "filters");
|
||||
BaseTest.assertEqualsDoubleSmart(actual.getPhredScaledQual(), expected.getPhredScaledQual());
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue