Final support for variable length lists of strings in BCF2

-- Updating many MD5s as well.
This commit is contained in:
Mark DePristo 2012-06-13 17:50:21 -04:00
parent bd9d40fb84
commit 71da76039e
11 changed files with 49 additions and 31 deletions

View File

@ -188,8 +188,17 @@ public final class BCF2Decoder {
final byte[] bytes = new byte[size]; // TODO -- in principle should just grab bytes from underlying array
try {
recordStream.read(bytes);
final String s = new String(bytes);
return BCF2Utils.isCollapsedString(s) ? BCF2Utils.exploreStringList(s) : s;
int goodLength = 0;
for ( ; goodLength < bytes.length ; goodLength++ )
if ( bytes[goodLength] == 0 ) break;
if ( goodLength == 0 )
return null;
else {
final String s = new String(bytes, 0, goodLength);
return BCF2Utils.isCollapsedString(s) ? BCF2Utils.exploreStringList(s) : s;
}
} catch ( IOException e ) {
throw new ReviewedStingException("readByte failure", e);
}

View File

@ -32,10 +32,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.*;
/**
* An efficient scheme for building and obtaining specialized
@ -60,7 +57,7 @@ public class BCF2GenotypeFieldDecoders {
genotypeFieldDecoder.put(VCFConstants.GENOTYPE_KEY, new GTDecoder());
// currently the generic decoder handles FILTER values properly, in so far as we don't tolerate multiple filter field values per genotype
genotypeFieldDecoder.put(VCFConstants.GENOTYPE_FILTER_KEY, new GenericDecoder());
genotypeFieldDecoder.put(VCFConstants.GENOTYPE_FILTER_KEY, new FTDecoder());
genotypeFieldDecoder.put(VCFConstants.DEPTH_KEY, new DPDecoder());
genotypeFieldDecoder.put(VCFConstants.GENOTYPE_ALLELE_DEPTHS, new ADDecoder());
genotypeFieldDecoder.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, new PLDecoder());
@ -270,4 +267,16 @@ public class BCF2GenotypeFieldDecoders {
}
}
}
private class FTDecoder implements Decoder {
@Override
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
for ( final GenotypeBuilder gb : gbs ) {
Object value = decoder.decodeTypedValue(typeDescriptor);
if ( value != null ) { // don't add missing values
gb.filters(value instanceof String ? Collections.singletonList((String)value) : (List<String>)value);
}
}
}
}
}

View File

@ -39,7 +39,7 @@ public enum BCF2Type {
INT16(2, 2, 0xFFFF8000, -32767, 32767),
INT32(3, 4, 0x80000000, -2147483647, 2147483647),
FLOAT(5, 4, 0x7F800001),
CHAR (7);
CHAR (7, 1, 0x00000000);
private final int id;
private final Object missingJavaValue;

View File

@ -765,11 +765,11 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
// todo -- all of these on the fly parsing of the missing value should be static constants
if (gtKey.equals(VCFConstants.GENOTYPE_KEY)) {
genotypeAlleleLocation = i;
} else if ( missing ) {
// if its truly missing (there no provided value) skip adding it to the attributes
} else if (gtKey.equals(VCFConstants.GENOTYPE_FILTER_KEY)) {
final List<String> filters = parseFilters(getCachedString(GTValueArray[i]));
if ( filters != null ) gb.filters(filters);
} else if ( missing ) {
// if its truly missing (there no provided value) skip adding it to the attributes
} else if ( GTValueArray[i].equals(VCFConstants.MISSING_VALUE_v4) ) {
// don't add missing values to the map
} else {

View File

@ -41,7 +41,7 @@ public class BeagleIntegrationTest extends WalkerTest {
"--beagleR2:BEAGLE " + beagleValidationDataLocation + "inttestbgl.r2 " +
"--beagleProbs:BEAGLE " + beagleValidationDataLocation + "inttestbgl.gprobs " +
"--beaglePhased:BEAGLE " + beagleValidationDataLocation + "inttestbgl.phased " +
"-o %s --no_cmdline_in_header", 1, Arrays.asList("74db5bb05f08f4c1dd5a7cf844c903b6"));
"-o %s --no_cmdline_in_header", 1, Arrays.asList("0f7ffd3c9c8010e765c26fce994be389"));
executeTest("test BeagleOutputToVCF", spec);
}
@ -50,7 +50,7 @@ public class BeagleIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T ProduceBeagleInput -R " + hg19Reference + " " +
"--variant:VCF3 " + beagleValidationDataLocation + "inttestbgl.input.vcf " +
"-o %s", 1, Arrays.asList("689773807c87638de3a74564bd6cee2e"));
"-o %s", 1, Arrays.asList("f301b089d21da259873f04bdc468835d"));
executeTest("test BeagleInput", spec);
}
@ -72,7 +72,7 @@ public class BeagleIntegrationTest extends WalkerTest {
"--beagleR2:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.r2 "+
"--beagleProbs:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.gprobs.bgl "+
"--beaglePhased:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.phased.bgl "+
"-L 20:1-70000 -o %s --no_cmdline_in_header ",1,Arrays.asList("22908352f0e476234706922d6bccdc91"));
"-L 20:1-70000 -o %s --no_cmdline_in_header ",1,Arrays.asList("c92561016b7d8bd1d5c107bce8386b33"));
executeTest("testBeagleChangesSitesToRef",spec);
}

View File

@ -52,9 +52,9 @@ public class DiffObjectsIntegrationTest extends WalkerTest {
@DataProvider(name = "data")
public Object[][] createData() {
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", true, "fc06e758e5588a52d2dddafdff1665a4");
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", true, "bf7ef17436a7eccf27be41a9477904f6");
new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", true, "3f46f5a964f7c34015d972256fe49a35");
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", false, "54dff80e3f9569146dd66d5369c82b48");
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", false, "8ab29169cff232e670db9a4c54fc4358");
new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", false, "47bf16c27c9e2c657a7e1d13f20880c9");
return TestParams.getTests(TestParams.class);
}

View File

@ -29,7 +29,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
"-o %s"
),
2,
Arrays.asList("d54a142d68dca54e478c13f9a0e4c95c","313cc749c7ee97713e4551de39e01ac5")
Arrays.asList("cd112ec37a9e28d366aff29a85fdcaa0","313cc749c7ee97713e4551de39e01ac5")
);
executeTest("testTrueNegativeMV", spec);
}
@ -48,7 +48,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
"-o %s"
),
2,
Arrays.asList("883ea7fd2b200c4b7fa95a4f7aa15931","dd90dad9fd11e1b16e6660c3ca0553e7")
Arrays.asList("27ccd6feb51de7e7dcdf35f4697fa4eb","dd90dad9fd11e1b16e6660c3ca0553e7")
);
executeTest("testTruePositiveMV", spec);
}
@ -67,7 +67,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
"-o %s"
),
2,
Arrays.asList("e812d62a3449b74b6948ee7deb8a0790","b35a86d2cad17f0db7b5e84ddc0e5545")
Arrays.asList("719d681bb0a52a40bc854bba107c5c94","b35a86d2cad17f0db7b5e84ddc0e5545")
);
executeTest("testFalsePositiveMV", spec);
}
@ -86,7 +86,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
"-o %s"
),
2,
Arrays.asList("e3c572f933a40e1878a2cfa52049517a","c53b5fd377bef48e9c6035a94db398db")
Arrays.asList("7f4a277aee2c7398fcfa84d6c98d5fb3","c53b5fd377bef48e9c6035a94db398db")
);
executeTest("testSpecialCases", spec);
}
@ -108,7 +108,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
"-o %s"
),
2,
Arrays.asList("b42af3b73a2cb38cfc92f8047dd686b3","6f596470740e1a57679bbb38c0126364")
Arrays.asList("44e09d2f9e4d8a9488226d03a97fe999","6f596470740e1a57679bbb38c0126364")
);
executeTest("testPriorOption", spec);
}
@ -149,7 +149,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
"-fatherAlleleFirst"
),
2,
Arrays.asList("c158a3816357597543ef85c4478c41e8","6d550784382aa910f78b533d889c91c0")
Arrays.asList("60ced3d078792a150a03640b62926857","6d550784382aa910f78b533d889c91c0")
);
executeTest("testFatherAlleleFirst", spec);
}

View File

@ -26,7 +26,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
+ " -L chr20:332341-382503",
1,
Arrays.asList("d78f694499d917b13f0d3e797f04353a"));
Arrays.asList("0a41b96b04a87fdb99bc3342d48d2eba"));
executeTest("MAX 10 het sites [TEST ONE]; require PQ >= 10", spec);
}
@ -36,7 +36,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
+ " -L chr20:1232503-1332503",
1,
Arrays.asList("9d9c3cb8b323c3d73af7fc96bc163619"));
Arrays.asList("f7517896c899a872c24d8e823ac9deae"));
executeTest("MAX 10 het sites [TEST TWO]; require PQ >= 10", spec);
}
@ -46,7 +46,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 2, 30)
+ " -L chr20:332341-382503",
1,
Arrays.asList("321f815590992cb52da7a4989c3f2f4c"));
Arrays.asList("cdbdd2f68c232012b6fe9a322b0ea24c"));
executeTest("MAX 2 het sites [TEST THREE]; require PQ >= 30", spec);
}
@ -56,7 +56,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 5, 100)
+ " -L chr20:332341-382503",
1,
Arrays.asList("318f93ca4678a0b246a9f229252ff31d"));
Arrays.asList("6b70e3e4e28f9583d35d98bf8a7d0d59"));
executeTest("MAX 5 het sites [TEST FOUR]; require PQ >= 100", spec);
}
@ -66,7 +66,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 1000, 7, 10)
+ " -L chr20:332341-482503",
1,
Arrays.asList("ed5552077aa123814022485ed555b6e0"));
Arrays.asList("6163a1fba27532da77765a7a11c55332"));
executeTest("MAX 7 het sites [TEST FIVE]; require PQ >= 10; cacheWindow = 1000", spec);
}
@ -76,7 +76,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
+ " -L chr20:652810-681757",
1,
Arrays.asList("5223d1395d373d2a968d6dd22741ad6c"));
Arrays.asList("61a7d05f9eb4317cf0e6937d72e1e7ec"));
executeTest("MAX 10 het sites [TEST SIX]; require PQ >= 10; cacheWindow = 20000; has inconsistent sites", spec);
}

View File

@ -81,7 +81,7 @@ public class ValidationSiteSelectorIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(sampleGL + freqAF + "--variant " + testfile),
1,
Arrays.asList("3bf094e1aef563daf7c936032259d490")
Arrays.asList("0ee4a565a0d4f6b6942abd72a373becd")
);
executeTest("testPolyGLFreqAF--" + testfile, spec);

View File

@ -28,7 +28,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf",
"0ddd1e0e483d2eaf56004615cea23ec7", // tranches
"6e1f98bb819ccf03e17a2288742160d3", // recal file
"1050c387d170639f8cec221e5dddd626"); // cut VCF
"c58ff4140e8914f0b656ed625c7f73b9"); // cut VCF
@DataProvider(name = "VRTest")
public Object[][] createData1() {
@ -76,7 +76,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
VRTest indel = new VRTest("combined.phase1.chr20.raw.indels.sites.vcf",
"da4458d05f6396f5c4ab96f274e5ccdc", // tranches
"8e2417336fa62e6c4d9f61b6deebdd82", // recal file
"bf0e8ed5e250d52f0545074c61217d16"); // cut VCF
"05e88052e0798f1c1e83f0a8938bce56"); // cut VCF
@DataProvider(name = "VRIndelTest")
public Object[][] createData2() {

View File

@ -60,7 +60,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest {
" --no_cmdline_in_header " +
" -o %s",
1,
Arrays.asList("c5e93b0e2e8610785d43e5d9e7fb5a7b")
Arrays.asList("b532a20b5af4e8ea7a073888976c71ba")
);
executeTest("testSimpleVCFStreaming", spec);