GATK changes to conform to Tribble refactoring as part improving Tabix support in Tribble (among other things).

1. Enable on-the-fly indexing for vcf.gz.
2. Handle on-the-fly indexing where file to be indexed is not a regular file, thus index should not be created.
3. Add method setProgressLogger to all SAMFileWriter implementations.
4. Revved picard to 1.109.1722
5. IndelRealigner md5s change because the MC tag is added to records now.

Fixed up and signed off by ebanks.
This commit is contained in:
Alec Wysoker 2014-02-26 16:59:03 -05:00 committed by Eric Banks
parent 34c697bf12
commit 0369f93b24
19 changed files with 93 additions and 42 deletions

View File

@ -60,8 +60,8 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
private static final String knownIndels = validationDataLocation + "indelRealignerTest.pilot1.ceu.vcf";
private static final String baseCommandPrefix = "-T IndelRealigner -noPG -R " + b36KGReference + " -I " + mainTestBam + " -targetIntervals " + mainTestIntervals + " -compress 0 -L 20:49,500-55,500 ";
private static final String baseCommand = baseCommandPrefix + "-o %s ";
private static final String base_md5 = "a102dd55451799e5f053c784b762087e";
private static final String base_md5_with_SW_or_VCF = "06b8eefcbd785e929027feaa22bb060d";
private static final String base_md5 = "458588d68c8ea7e54443ea722604b265";
private static final String base_md5_with_SW_or_VCF = "d5ed91bd5b2023c69078a0fc00268d3c";
@Test
public void testDefaults() {
@ -84,7 +84,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
WalkerTestSpec spec1 = new WalkerTestSpec(
baseCommand + "--consensusDeterminationModel KNOWNS_ONLY -known " + knownIndels,
1,
Arrays.asList("1b24b0f2a20aed1adc726d1b296a3192"));
Arrays.asList("a1b9396f4d5b65f7ae6e0062daf363a3"));
executeTest("realigner known indels only from VCF", spec1);
}
@ -101,7 +101,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
public void testLods() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "-LOD 60", base_md5 );
e.put( "-LOD 1 --consensusDeterminationModel USE_SW", "4bf28d3c0337682d439257874377a681" );
e.put( "-LOD 1 --consensusDeterminationModel USE_SW", "dea9bd14323b33348d9cf28e256415f2" );
for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
@ -117,7 +117,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T IndelRealigner -noPG -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.chrom1.SLX.SRP000032.2009_06.bam -L 1:10,000,000-11,000,000 -targetIntervals " + validationDataLocation + "indelRealignerTest.NA12878.chrom1.intervals -compress 0 -o %s",
1,
Arrays.asList("f4f6c3b2a2be0306a0ecd3def334bafe"));
Arrays.asList("b91c0bf803247f703dc1cb6ccdc4f18f"));
executeTest("realigner long run", spec);
}
@ -126,7 +126,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseCommand + "--noOriginalAlignmentTags --consensusDeterminationModel USE_SW",
1,
Arrays.asList("71fb521f8febfe2dc683fc636e28ae7d"));
Arrays.asList("041e2254f271261fb46dc3878cf638f6"));
executeTest("realigner no output tags", spec);
}
@ -148,7 +148,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
@Test
public void testMaxReadsInMemory() {
HashMap<String, String> e = new HashMap<String, String>();
e.put( "--maxReadsInMemory 10000", base_md5 );
e.put( "--maxReadsInMemory 10000", "0108cd5950f1a4eb90209c3dca8f9e11" );
e.put( "--maxReadsInMemory 40000", base_md5 );
for ( Map.Entry<String, String> entry : e.entrySet() ) {

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.io.storage;
import net.sf.samtools.*;
import net.sf.samtools.util.CloseableIterator;
import net.sf.samtools.util.ProgressLoggerInterface;
import net.sf.samtools.util.RuntimeIOException;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterStub;
@ -149,4 +150,8 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
return writer;
}
@Override
public void setProgressLogger(final ProgressLoggerInterface logger) {
writer.setProgressLogger(logger);
}
}

View File

@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.io.stubs;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.ProgressLoggerInterface;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.io.OutputTracker;
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
@ -313,4 +314,12 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
public void close() {
outputTracker.getStorage(this).close();
}
/**
* @throws java.lang.UnsupportedOperationException No progress logging in this implementation.
*/
@Override
public void setProgressLogger(final ProgressLoggerInterface logger) {
throw new UnsupportedOperationException("Progress logging not supported");
}
}

View File

@ -205,7 +205,7 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
if ( doNotWriteGenotypes ) options.add(Options.DO_NOT_WRITE_GENOTYPES);
if ( engine.lenientVCFProcessing() ) options.add(Options.ALLOW_MISSING_FIELDS_IN_HEADER);
if ( indexOnTheFly && ! isCompressed() ) options.add(Options.INDEX_ON_THE_FLY);
if ( indexOnTheFly) options.add(Options.INDEX_ON_THE_FLY);
if ( forceBCF || (getOutputFile() != null && VariantContextWriterFactory.isBCFOutput(getOutputFile())) )
options.add(Options.FORCE_BCF);

View File

@ -29,10 +29,11 @@ import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import org.apache.log4j.Logger;
import org.broad.tribble.index.Index;
import org.broad.tribble.index.MutableIndex;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
@ -70,23 +71,29 @@ public class IndexDictionaryUtils {
* @param dict the sequence dictionary to add contigs to
* @return the filled-in sequence dictionary
*/
static SAMSequenceDictionary createSequenceDictionaryFromContigList(Index index, SAMSequenceDictionary dict) {
LinkedHashSet<String> seqNames = index.getSequenceNames();
static SAMSequenceDictionary createSequenceDictionaryFromContigList(final Index index, final SAMSequenceDictionary dict) {
final List<String> seqNames = index.getSequenceNames();
if (seqNames == null) {
return dict;
}
for (String name : seqNames) {
for (final String name : seqNames) {
SAMSequenceRecord seq = new SAMSequenceRecord(name, 0);
dict.addSequence(seq);
}
return dict;
}
/**
* Sets the sequence dictionary of the given index. THE INDEX MUST BE MUTABLE (i.e. not Tabix).
*
* @param index the (mutable) index file to use
* @param dict the dictionary to use
*/
public static void setIndexSequenceDictionary(Index index, SAMSequenceDictionary dict) {
for ( SAMSequenceRecord seq : dict.getSequences() ) {
final String contig = IndexDictionaryUtils.SequenceDictionaryPropertyPredicate + seq.getSequenceName();
final String length = String.valueOf(seq.getSequenceLength());
index.addProperty(contig,length);
((MutableIndex)index).addProperty(contig, length);
}
}

View File

@ -26,8 +26,8 @@
package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.ProgressLoggerInterface;
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import java.util.ArrayList;
@ -119,4 +119,12 @@ public class ArtificialStingSAMFileWriter implements StingSAMFileWriter {
@Override
public void setMaxRecordsInRam(int maxRecordsInRam) {
}
/**
* @throws java.lang.UnsupportedOperationException No progress logging in this implementation.
*/
@Override
public void setProgressLogger(final ProgressLoggerInterface logger) {
throw new UnsupportedOperationException("Progress logging not supported");
}
}

View File

@ -26,6 +26,7 @@
package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.*;
import net.sf.samtools.util.ProgressLoggerInterface;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.utils.Utils;
@ -174,4 +175,11 @@ public class NWaySAMFileWriter implements SAMFileWriter {
public void close() {
for ( SAMFileWriter w : writerMap.values() ) w.close();
}
@Override
public void setProgressLogger(final ProgressLoggerInterface logger) {
for (final SAMFileWriter writer: writerMap.values()) {
writer.setProgressLogger(logger);
}
}
}

View File

@ -28,6 +28,7 @@ package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.ProgressLoggerInterface;
/**
* XXX
@ -77,4 +78,9 @@ public class SimplifyingSAMFileWriter implements SAMFileWriter {
read.setAttribute("RG", rg);
return read;
}
@Override
public void setProgressLogger(final ProgressLoggerInterface logger) {
dest.setProgressLogger(logger);
}
}

View File

@ -194,14 +194,13 @@ public class GATKVCFUtils {
public static IndexCreator getIndexCreator(GATKVCFIndexType type, int parameter, File outFile) {
IndexCreator idxCreator;
switch (type) {
case DYNAMIC_SEEK: idxCreator = new DynamicIndexCreator(IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME); break;
case DYNAMIC_SIZE: idxCreator = new DynamicIndexCreator(IndexFactory.IndexBalanceApproach.FOR_SIZE); break;
case LINEAR: idxCreator = new LinearIndexCreator(); break;
case INTERVAL: idxCreator = new IntervalIndexCreator(); break;
case DYNAMIC_SEEK: idxCreator = new DynamicIndexCreator(outFile, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME); break;
case DYNAMIC_SIZE: idxCreator = new DynamicIndexCreator(outFile, IndexFactory.IndexBalanceApproach.FOR_SIZE); break;
case LINEAR: idxCreator = new LinearIndexCreator(outFile, parameter); break;
case INTERVAL: idxCreator = new IntervalIndexCreator(outFile, parameter); break;
default: throw new IllegalArgumentException("Unknown IndexCreator type: " + type);
}
idxCreator.initialize(outFile, parameter);
return idxCreator;
}

View File

@ -43,6 +43,7 @@ import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
import java.lang.reflect.Method;
import java.util.Arrays;
import java.util.Collections;
import java.util.Set;
@ -94,36 +95,44 @@ public class GATKVCFUtilsUnitTest extends BaseTest {
private final GATKVCFIndexType type;
private final int parameter;
private final Class expectedClass;
private final int expectedDefaultBinSize;
private final int expectedBinSize;
private final Integer expectedDimension;
private final Method dimensionGetter;
private IndexCreatorTest(GATKVCFIndexType type, int parameter, Class expectedClass, int expectedDefaultBinSize, int expectedBinSize) {
private IndexCreatorTest(GATKVCFIndexType type, int parameter, Class expectedClass, Integer expectedDimension,
String dimensionGetterName) {
super(IndexCreatorTest.class);
this.type = type;
this.parameter = parameter;
this.expectedClass = expectedClass;
this.expectedDefaultBinSize = expectedDefaultBinSize;
this.expectedBinSize = expectedBinSize;
this.expectedDimension = expectedDimension;
try {
// Conditional matches testGetIndexCreator's if-statement
this.dimensionGetter = this.expectedDimension == null ? null : expectedClass.getDeclaredMethod(dimensionGetterName);
} catch (NoSuchMethodException e) {
throw new RuntimeException(e);
}
}
}
@DataProvider(name = "indexCreator")
public Object[][] indexCreatorData() {
new IndexCreatorTest(GATKVCFIndexType.DYNAMIC_SEEK, 0, DynamicIndexCreator.class, -1, -1);
new IndexCreatorTest(GATKVCFIndexType.DYNAMIC_SIZE, 0, DynamicIndexCreator.class, -1, -1);
new IndexCreatorTest(GATKVCFIndexType.LINEAR, 100, LinearIndexCreator.class, LinearIndexCreator.DEFAULT_BIN_WIDTH, 100);
new IndexCreatorTest(GATKVCFIndexType.INTERVAL, 200, IntervalIndexCreator.class, IntervalIndexCreator.DEFAULT_FEATURE_COUNT, 200);
new IndexCreatorTest(GATKVCFIndexType.DYNAMIC_SEEK, 0, DynamicIndexCreator.class, null, null);
new IndexCreatorTest(GATKVCFIndexType.DYNAMIC_SIZE, 0, DynamicIndexCreator.class, null, null);
new IndexCreatorTest(GATKVCFIndexType.LINEAR, 100, LinearIndexCreator.class, 100, "getBinSize");
new IndexCreatorTest(GATKVCFIndexType.INTERVAL, 200, IntervalIndexCreator.class, 200, "getFeaturesPerInterval");
return IndexCreatorTest.getTests(IndexCreatorTest.class);
}
@Test(dataProvider = "indexCreator")
public void testGetIndexCreator(IndexCreatorTest spec) {
public void testGetIndexCreator(IndexCreatorTest spec) throws Exception{
File dummy = new File("");
IndexCreator ic = GATKVCFUtils.getIndexCreator(spec.type, spec.parameter, dummy);
Assert.assertEquals(ic.getClass(), spec.expectedClass, "Wrong IndexCreator type");
Assert.assertEquals(ic.defaultBinSize(), spec.expectedDefaultBinSize, "Wrong default bin size");
Assert.assertEquals(ic.getBinSize(), spec.expectedBinSize, "Wrong bin size");
if (spec.expectedDimension != null) {
Integer dimension = (int)spec.dimensionGetter.invoke(ic);
Assert.assertEquals(dimension, spec.expectedDimension, "Wrong dimension");
}
}
}

View File

@ -3,23 +3,23 @@
<modelVersion>4.0.0</modelVersion>
<groupId>net.sf</groupId>
<artifactId>picard</artifactId>
<version>1.107.1683</version>
<version>1.109.1722</version>
<name>picard</name>
<dependencies>
<dependency>
<groupId>net.sf</groupId>
<artifactId>sam</artifactId>
<version>1.107.1683</version>
<version>1.109.1722</version>
</dependency>
<dependency>
<groupId>org.broadinstitute</groupId>
<artifactId>variant</artifactId>
<version>1.107.1683</version>
<version>1.109.1722</version>
</dependency>
<dependency>
<groupId>org.broad</groupId>
<artifactId>tribble</artifactId>
<version>1.107.1683</version>
<version>1.109.1722</version>
</dependency>
<!-- TODO: Picard is using a custom zip with just ant's BZip2 classes. See also: http://www.kohsuke.org/bzip2 -->
<dependency>

View File

@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>net.sf</groupId>
<artifactId>sam</artifactId>
<version>1.107.1683</version>
<version>1.109.1722</version>
<name>sam-jdk</name>
<dependencies>
<dependency>

View File

@ -3,13 +3,13 @@
<modelVersion>4.0.0</modelVersion>
<groupId>org.broad</groupId>
<artifactId>tribble</artifactId>
<version>1.107.1683</version>
<version>1.109.1722</version>
<name>tribble</name>
<dependencies>
<dependency>
<groupId>net.sf</groupId>
<artifactId>sam</artifactId>
<version>1.107.1683</version>
<version>1.109.1722</version>
</dependency>
</dependencies>
</project>

View File

@ -3,18 +3,18 @@
<modelVersion>4.0.0</modelVersion>
<groupId>org.broadinstitute</groupId>
<artifactId>variant</artifactId>
<version>1.107.1683</version>
<version>1.109.1722</version>
<name>variant</name>
<dependencies>
<dependency>
<groupId>org.broad</groupId>
<artifactId>tribble</artifactId>
<version>1.107.1683</version>
<version>1.109.1722</version>
</dependency>
<dependency>
<groupId>net.sf</groupId>
<artifactId>sam</artifactId>
<version>1.107.1683</version>
<version>1.109.1722</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>

View File

@ -43,7 +43,7 @@
<test.args>-Xmx${test.maxmemory} -XX:+UseParallelOldGC -XX:ParallelGCThreads=${java.gc.threads} -XX:GCTimeLimit=${java.gc.timeLimit} -XX:GCHeapFreeLimit=${java.gc.heapFreeLimit}</test.args>
<!-- Version numbers for picard sam-jdk. Usually kept in sync. -->
<picard.public.version>1.107.1683</picard.public.version>
<picard.public.version>1.109.1722</picard.public.version>
<sam.version>${picard.public.version}</sam.version>
<picard.version>${picard.public.version}</picard.version>
<variant.version>${picard.public.version}</variant.version>