Merge pull request #733 from broadinstitute/pd_allow_untrimmed_format

Added -writeFullFormat engine-level argument
This commit is contained in:
Eric Banks 2014-09-17 23:34:09 -04:00
commit 2da9bf7d09
8 changed files with 46 additions and 16 deletions

View File

@ -44,8 +44,8 @@
<test.listeners>org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.gatk.utils.TestNGTestTransformer,org.broadinstitute.gatk.utils.GATKTextReporter,org.uncommons.reportng.HTMLReporter</test.listeners>
<!-- Version numbers for picard and htsjdk -->
<htsjdk.version>1.118.1556</htsjdk.version>
<picard.version>1.118.1521</picard.version>
<htsjdk.version>1.120.1620</htsjdk.version>
<picard.version>1.120.1579</picard.version>
</properties>
<!-- Dependency configuration (versions, etc.) -->

View File

@ -416,6 +416,18 @@ public class GATKArgumentCollection {
required = false)
public boolean sitesOnlyVCF = false;
/**
* <p>The VCF specification permits missing records to be dropped from the end of FORMAT fields, so long as GT is always output.
* This option prevents GATK from performing that trimming.</p>
*
* <p>For example, given a FORMAT of <pre>GT:AD:DP:PL</pre>, GATK will by default emit <pre>./.</pre> for a variant with
* no reads present (ie, the AD, DP, and PL fields are trimmed). If you specify -writeFullFormat, this record
* would be emitted as <pre>./.:.:.:.</pre></p>
*/
@Argument(fullName = "never_trim_vcf_format_field", shortName = "writeFullFormat", doc = "Always output all the records in VCF FORMAT fields, even if some are missing",
required = false)
public boolean neverTrimVCFFormatField = false;
@Hidden
@Argument(fullName = "bcf", shortName = "bcf", doc = "Force BCF output, regardless of the file's extension",
required = false)

View File

@ -108,6 +108,11 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
*/
private boolean forceBCF = false;
/**
* Should we write all of the fields in the FORMAT field, even if missing fields could be trimmed?
*/
private boolean writeFullFormatField = false;
/**
* Connects this stub with an external stream capable of serving the
* requests of the consumer of this stub.
@ -153,37 +158,37 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
}
/**
* Retrieves the output stearm to which to (ultimately) write.
* Retrieves the output stream to which to (ultimately) write.
* @return The file. Can be null if genotypeFile is not.
*/
public OutputStream getOutputStream() {
return genotypeStream;
}
/**
* Retrieves the output stearm to which to (ultimately) write.
* @return The file. Can be null if genotypeFile is not.
*/
public boolean isCompressed() {
return isCompressed;
}
public void setCompressed(boolean compressed) {
public void setCompressed(final boolean compressed) {
isCompressed = compressed;
}
public void setSkipWritingCommandLineHeader(boolean skipWritingCommandLineHeader) {
public void setSkipWritingCommandLineHeader(final boolean skipWritingCommandLineHeader) {
this.skipWritingCommandLineHeader = skipWritingCommandLineHeader;
}
public void setDoNotWriteGenotypes(boolean doNotWriteGenotypes) {
public void setDoNotWriteGenotypes(final boolean doNotWriteGenotypes) {
this.doNotWriteGenotypes = doNotWriteGenotypes;
}
public void setForceBCF(boolean forceBCF) {
public void setForceBCF(final boolean forceBCF) {
this.forceBCF = forceBCF;
}
public void setWriteFullFormatField(final boolean writeFullFormatField) {
this.writeFullFormatField = writeFullFormatField;
}
public IndexCreator getIndexCreator() {
return indexCreator;
}
@ -202,11 +207,12 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
}
public EnumSet<Options> getWriterOptions(boolean indexOnTheFly) {
List<Options> options = new ArrayList<Options>();
final List<Options> options = new ArrayList<>();
if ( doNotWriteGenotypes ) options.add(Options.DO_NOT_WRITE_GENOTYPES);
if ( engine.lenientVCFProcessing() ) options.add(Options.ALLOW_MISSING_FIELDS_IN_HEADER);
if ( indexOnTheFly) options.add(Options.INDEX_ON_THE_FLY);
if ( writeFullFormatField ) options.add(Options.WRITE_FULL_FORMAT_FIELD);
if ( forceBCF || (getOutputFile() != null && VariantContextWriterFactory.isBCFOutput(getOutputFile())) )
options.add(Options.FORCE_BCF);
@ -235,7 +241,7 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
setDoNotWriteGenotypes(argumentCollection.sitesOnlyVCF);
setSkipWritingCommandLineHeader(argumentCollection.disableCommandLineInVCF);
setForceBCF(argumentCollection.forceBCFOutput);
setWriteFullFormatField(argumentCollection.neverTrimVCFFormatField);
}
public void writeHeader(VCFHeader header) {

View File

@ -716,9 +716,21 @@ public class EngineFeaturesIntegrationTest extends WalkerTest {
executeTest("testVCFFeatures: "+args, spec);
}
private void testVCFFormatHandling(final boolean writeFullFormat, final String md5) {
WalkerTestSpec spec = new WalkerTestSpec("-T SelectVariants -R " + b37KGReference +
" -V " + privateTestDir + "ILLUMINA.wex.broad_phase2_baseline.20111114.both.exome.genotypes.1000.vcf"
+ " --no_cmdline_in_header -o %s "
+ " --fullyDecode " //Without this parameter, the FORMAT fields will be emitted unchanged. Oops
+ (writeFullFormat ? "-writeFullFormat" : "") ,
1, Arrays.asList(md5));
executeTest("testVCFFormatHandling: "+(writeFullFormat ? "Untrimmed" : "Trimmed"), spec);
}
@Test
public void testVCFWriterFeatures() {
testVCFFeatures("--sites_only", "94bf1f2c0946e933515e4322323a5716");
testVCFFeatures("--bcf", "03f2d6988f54a332da48803c78f9c4b3");
testVCFFormatHandling(true, "2b0fa660b0cef4b0f45a10febb453b6c");
testVCFFormatHandling(false, "5960311fdd9ee6db88587efaaf4055a0");
}
}

View File

@ -3,13 +3,13 @@
<modelVersion>4.0.0</modelVersion>
<groupId>picard</groupId>
<artifactId>picard</artifactId>
<version>1.118.1521</version>
<version>1.120.1579</version>
<name>picard</name>
<dependencies>
<dependency>
<groupId>samtools</groupId>
<artifactId>htsjdk</artifactId>
<version>1.118.1556</version>
<version>1.120.1620</version>
</dependency>
<!-- TODO: Picard is using a custom zip with just ant's BZip2 classes. See also: http://www.kohsuke.org/bzip2 -->
<dependency>

View File

@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>samtools</groupId>
<artifactId>htsjdk</artifactId>
<version>1.118.1556</version>
<version>1.120.1620</version>
<name>htsjdk</name>
<dependencies>
<dependency>