GATK now uses a optimized indexing scheme in Tribble. 5x or more performance gain on files with many genotypes. Updated integrationtest that was failing and was clearly wrong. DB=; isn't a valid annotation.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3596 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-06-19 21:36:41 +00:00
parent 8ff93f77e6
commit 57a13805da
8 changed files with 27 additions and 4 deletions

View File

@ -31,6 +31,7 @@ import java.io.IOException;
import java.util.ArrayList;
import org.apache.log4j.Logger;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.exception.CodecLineParsingException;
import org.broad.tribble.util.AsciiLineReader;
@ -74,7 +75,11 @@ public class AnnotatorInputTableCodec implements FeatureCodec<AnnotatorInputTabl
return null; // TODO: do we want the header to be a concrete type?
}
// todo -- probably worth implementing for performance reasons
public Feature decodeLoc(String line) {
return decode(line);
}
/**
* Parses the line into an AnnotatorInputTableFeature object.
*

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.refdata.features.beagle;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableFeature;
/*
@ -57,6 +58,10 @@ public class BeagleCodec implements FeatureCodec<BeagleFeature> {
private static final String delimiterRegex = "\\s+";
public Feature decodeLoc(String line) {
return decode(line);
}
public static String[] readHeader(final File source) throws IOException {
FileInputStream is = new FileInputStream(source);
try {

View File

@ -76,6 +76,10 @@ public class SAMPileupCodec implements FeatureCodec<SAMPileupFeature> {
return null; // we don't have a header
}
public Feature decodeLoc(String line) {
return decode(line);
}
public SAMPileupFeature decode(String line) {
// 0 1 2 3 4 5 6 7
//* chrX 466 T Y 170 170 88 32 ... (piles of read bases and quals follow)

View File

@ -24,6 +24,7 @@
package org.broadinstitute.sting.gatk.refdata.features.samread;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.exception.CodecLineParsingException;
import org.broad.tribble.util.ParsingUtils;
@ -66,6 +67,10 @@ public class SAMReadCodec implements FeatureCodec<SAMReadFeature> {
return null; // we haven't stored the header
}
public Feature decodeLoc(String line) {
return decode(line);
}
/**
* Decode a single line in a SAM text file.
* @param line line to decode.

View File

@ -135,6 +135,10 @@ public class VCF4Codec implements FeatureCodec {
// a key optimization -- we need a per thread string parts array, so we don't allocate a big array over and over
private String[] parts = null;
public Feature decodeLoc(String line) {
return decode(line);
}
public Feature decode(String line) {
if ( parts == null )
parts = REQUIRE_HEADER ? new String[header.getColumnCount()] : new String[10000]; // todo -- remove require header

View File

@ -122,7 +122,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
public void testDBTag() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString() + " -D " + GATKDataLocation + "dbsnp_129_b36.rod -G \"Standard\" -B variant,VCF," + validationDataLocation + "vcfexample3empty.vcf -BTI variant", 1,
Arrays.asList("05a9dcb87efc65e5c726c201460192d9"));
Arrays.asList("c3e0361af8e98bda1bc3a27260cb2c4a"));
executeTest("getting DB tag", spec);
}
}
}

View File

@ -1,3 +1,3 @@
<ivy-module version="1.0">
<info organisation="org.broad" module="tribble" revision="104" status="integration" publication="201006171154200" />
<info organisation="org.broad" module="tribble" revision="105" status="integration" publication="201006181154200" />
</ivy-module>