Final support for variable length lists of strings in BCF2
-- Updating many MD5s as well.
This commit is contained in:
parent
bd9d40fb84
commit
71da76039e
|
|
@ -188,8 +188,17 @@ public final class BCF2Decoder {
|
|||
final byte[] bytes = new byte[size]; // TODO -- in principle should just grab bytes from underlying array
|
||||
try {
|
||||
recordStream.read(bytes);
|
||||
final String s = new String(bytes);
|
||||
return BCF2Utils.isCollapsedString(s) ? BCF2Utils.exploreStringList(s) : s;
|
||||
|
||||
int goodLength = 0;
|
||||
for ( ; goodLength < bytes.length ; goodLength++ )
|
||||
if ( bytes[goodLength] == 0 ) break;
|
||||
|
||||
if ( goodLength == 0 )
|
||||
return null;
|
||||
else {
|
||||
final String s = new String(bytes, 0, goodLength);
|
||||
return BCF2Utils.isCollapsedString(s) ? BCF2Utils.exploreStringList(s) : s;
|
||||
}
|
||||
} catch ( IOException e ) {
|
||||
throw new ReviewedStingException("readByte failure", e);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,10 +32,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
|||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* An efficient scheme for building and obtaining specialized
|
||||
|
|
@ -60,7 +57,7 @@ public class BCF2GenotypeFieldDecoders {
|
|||
|
||||
genotypeFieldDecoder.put(VCFConstants.GENOTYPE_KEY, new GTDecoder());
|
||||
// currently the generic decoder handles FILTER values properly, in so far as we don't tolerate multiple filter field values per genotype
|
||||
genotypeFieldDecoder.put(VCFConstants.GENOTYPE_FILTER_KEY, new GenericDecoder());
|
||||
genotypeFieldDecoder.put(VCFConstants.GENOTYPE_FILTER_KEY, new FTDecoder());
|
||||
genotypeFieldDecoder.put(VCFConstants.DEPTH_KEY, new DPDecoder());
|
||||
genotypeFieldDecoder.put(VCFConstants.GENOTYPE_ALLELE_DEPTHS, new ADDecoder());
|
||||
genotypeFieldDecoder.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, new PLDecoder());
|
||||
|
|
@ -270,4 +267,16 @@ public class BCF2GenotypeFieldDecoders {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
private class FTDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final List<GenotypeBuilder> gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
Object value = decoder.decodeTypedValue(typeDescriptor);
|
||||
if ( value != null ) { // don't add missing values
|
||||
gb.filters(value instanceof String ? Collections.singletonList((String)value) : (List<String>)value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ public enum BCF2Type {
|
|||
INT16(2, 2, 0xFFFF8000, -32767, 32767),
|
||||
INT32(3, 4, 0x80000000, -2147483647, 2147483647),
|
||||
FLOAT(5, 4, 0x7F800001),
|
||||
CHAR (7);
|
||||
CHAR (7, 1, 0x00000000);
|
||||
|
||||
private final int id;
|
||||
private final Object missingJavaValue;
|
||||
|
|
|
|||
|
|
@ -765,11 +765,11 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
// todo -- all of these on the fly parsing of the missing value should be static constants
|
||||
if (gtKey.equals(VCFConstants.GENOTYPE_KEY)) {
|
||||
genotypeAlleleLocation = i;
|
||||
} else if ( missing ) {
|
||||
// if its truly missing (there no provided value) skip adding it to the attributes
|
||||
} else if (gtKey.equals(VCFConstants.GENOTYPE_FILTER_KEY)) {
|
||||
final List<String> filters = parseFilters(getCachedString(GTValueArray[i]));
|
||||
if ( filters != null ) gb.filters(filters);
|
||||
} else if ( missing ) {
|
||||
// if its truly missing (there no provided value) skip adding it to the attributes
|
||||
} else if ( GTValueArray[i].equals(VCFConstants.MISSING_VALUE_v4) ) {
|
||||
// don't add missing values to the map
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ public class BeagleIntegrationTest extends WalkerTest {
|
|||
"--beagleR2:BEAGLE " + beagleValidationDataLocation + "inttestbgl.r2 " +
|
||||
"--beagleProbs:BEAGLE " + beagleValidationDataLocation + "inttestbgl.gprobs " +
|
||||
"--beaglePhased:BEAGLE " + beagleValidationDataLocation + "inttestbgl.phased " +
|
||||
"-o %s --no_cmdline_in_header", 1, Arrays.asList("74db5bb05f08f4c1dd5a7cf844c903b6"));
|
||||
"-o %s --no_cmdline_in_header", 1, Arrays.asList("0f7ffd3c9c8010e765c26fce994be389"));
|
||||
executeTest("test BeagleOutputToVCF", spec);
|
||||
}
|
||||
|
||||
|
|
@ -50,7 +50,7 @@ public class BeagleIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T ProduceBeagleInput -R " + hg19Reference + " " +
|
||||
"--variant:VCF3 " + beagleValidationDataLocation + "inttestbgl.input.vcf " +
|
||||
"-o %s", 1, Arrays.asList("689773807c87638de3a74564bd6cee2e"));
|
||||
"-o %s", 1, Arrays.asList("f301b089d21da259873f04bdc468835d"));
|
||||
executeTest("test BeagleInput", spec);
|
||||
}
|
||||
|
||||
|
|
@ -72,7 +72,7 @@ public class BeagleIntegrationTest extends WalkerTest {
|
|||
"--beagleR2:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.r2 "+
|
||||
"--beagleProbs:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.gprobs.bgl "+
|
||||
"--beaglePhased:beagle /humgen/gsa-hpprojects/GATK/data/Validation_Data/EUR_beagle_in_test.phased.bgl "+
|
||||
"-L 20:1-70000 -o %s --no_cmdline_in_header ",1,Arrays.asList("22908352f0e476234706922d6bccdc91"));
|
||||
"-L 20:1-70000 -o %s --no_cmdline_in_header ",1,Arrays.asList("c92561016b7d8bd1d5c107bce8386b33"));
|
||||
|
||||
executeTest("testBeagleChangesSitesToRef",spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -52,9 +52,9 @@ public class DiffObjectsIntegrationTest extends WalkerTest {
|
|||
|
||||
@DataProvider(name = "data")
|
||||
public Object[][] createData() {
|
||||
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", true, "fc06e758e5588a52d2dddafdff1665a4");
|
||||
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", true, "bf7ef17436a7eccf27be41a9477904f6");
|
||||
new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", true, "3f46f5a964f7c34015d972256fe49a35");
|
||||
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", false, "54dff80e3f9569146dd66d5369c82b48");
|
||||
new TestParams(testDir + "diffTestMaster.vcf", testDir + "diffTestTest.vcf", false, "8ab29169cff232e670db9a4c54fc4358");
|
||||
new TestParams(testDir + "exampleBAM.bam", testDir + "exampleBAM.simple.bam", false, "47bf16c27c9e2c657a7e1d13f20880c9");
|
||||
return TestParams.getTests(TestParams.class);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
2,
|
||||
Arrays.asList("d54a142d68dca54e478c13f9a0e4c95c","313cc749c7ee97713e4551de39e01ac5")
|
||||
Arrays.asList("cd112ec37a9e28d366aff29a85fdcaa0","313cc749c7ee97713e4551de39e01ac5")
|
||||
);
|
||||
executeTest("testTrueNegativeMV", spec);
|
||||
}
|
||||
|
|
@ -48,7 +48,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
2,
|
||||
Arrays.asList("883ea7fd2b200c4b7fa95a4f7aa15931","dd90dad9fd11e1b16e6660c3ca0553e7")
|
||||
Arrays.asList("27ccd6feb51de7e7dcdf35f4697fa4eb","dd90dad9fd11e1b16e6660c3ca0553e7")
|
||||
);
|
||||
executeTest("testTruePositiveMV", spec);
|
||||
}
|
||||
|
|
@ -67,7 +67,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
2,
|
||||
Arrays.asList("e812d62a3449b74b6948ee7deb8a0790","b35a86d2cad17f0db7b5e84ddc0e5545")
|
||||
Arrays.asList("719d681bb0a52a40bc854bba107c5c94","b35a86d2cad17f0db7b5e84ddc0e5545")
|
||||
);
|
||||
executeTest("testFalsePositiveMV", spec);
|
||||
}
|
||||
|
|
@ -86,7 +86,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
2,
|
||||
Arrays.asList("e3c572f933a40e1878a2cfa52049517a","c53b5fd377bef48e9c6035a94db398db")
|
||||
Arrays.asList("7f4a277aee2c7398fcfa84d6c98d5fb3","c53b5fd377bef48e9c6035a94db398db")
|
||||
);
|
||||
executeTest("testSpecialCases", spec);
|
||||
}
|
||||
|
|
@ -108,7 +108,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
2,
|
||||
Arrays.asList("b42af3b73a2cb38cfc92f8047dd686b3","6f596470740e1a57679bbb38c0126364")
|
||||
Arrays.asList("44e09d2f9e4d8a9488226d03a97fe999","6f596470740e1a57679bbb38c0126364")
|
||||
);
|
||||
executeTest("testPriorOption", spec);
|
||||
}
|
||||
|
|
@ -149,7 +149,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
|
|||
"-fatherAlleleFirst"
|
||||
),
|
||||
2,
|
||||
Arrays.asList("c158a3816357597543ef85c4478c41e8","6d550784382aa910f78b533d889c91c0")
|
||||
Arrays.asList("60ced3d078792a150a03640b62926857","6d550784382aa910f78b533d889c91c0")
|
||||
);
|
||||
executeTest("testFatherAlleleFirst", spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
|
||||
+ " -L chr20:332341-382503",
|
||||
1,
|
||||
Arrays.asList("d78f694499d917b13f0d3e797f04353a"));
|
||||
Arrays.asList("0a41b96b04a87fdb99bc3342d48d2eba"));
|
||||
executeTest("MAX 10 het sites [TEST ONE]; require PQ >= 10", spec);
|
||||
}
|
||||
|
||||
|
|
@ -36,7 +36,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
|
||||
+ " -L chr20:1232503-1332503",
|
||||
1,
|
||||
Arrays.asList("9d9c3cb8b323c3d73af7fc96bc163619"));
|
||||
Arrays.asList("f7517896c899a872c24d8e823ac9deae"));
|
||||
executeTest("MAX 10 het sites [TEST TWO]; require PQ >= 10", spec);
|
||||
}
|
||||
|
||||
|
|
@ -46,7 +46,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 2, 30)
|
||||
+ " -L chr20:332341-382503",
|
||||
1,
|
||||
Arrays.asList("321f815590992cb52da7a4989c3f2f4c"));
|
||||
Arrays.asList("cdbdd2f68c232012b6fe9a322b0ea24c"));
|
||||
executeTest("MAX 2 het sites [TEST THREE]; require PQ >= 30", spec);
|
||||
}
|
||||
|
||||
|
|
@ -56,7 +56,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 5, 100)
|
||||
+ " -L chr20:332341-382503",
|
||||
1,
|
||||
Arrays.asList("318f93ca4678a0b246a9f229252ff31d"));
|
||||
Arrays.asList("6b70e3e4e28f9583d35d98bf8a7d0d59"));
|
||||
executeTest("MAX 5 het sites [TEST FOUR]; require PQ >= 100", spec);
|
||||
}
|
||||
|
||||
|
|
@ -66,7 +66,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 1000, 7, 10)
|
||||
+ " -L chr20:332341-482503",
|
||||
1,
|
||||
Arrays.asList("ed5552077aa123814022485ed555b6e0"));
|
||||
Arrays.asList("6163a1fba27532da77765a7a11c55332"));
|
||||
executeTest("MAX 7 het sites [TEST FIVE]; require PQ >= 10; cacheWindow = 1000", spec);
|
||||
}
|
||||
|
||||
|
|
@ -76,7 +76,7 @@ public class ReadBackedPhasingIntegrationTest extends WalkerTest {
|
|||
baseTestString(hg18Reference, "phasing_test_chr20_332341_1332503.bam", "phasing_test_chr20_332341_1332503.vcf", 20000, 10, 10)
|
||||
+ " -L chr20:652810-681757",
|
||||
1,
|
||||
Arrays.asList("5223d1395d373d2a968d6dd22741ad6c"));
|
||||
Arrays.asList("61a7d05f9eb4317cf0e6937d72e1e7ec"));
|
||||
executeTest("MAX 10 het sites [TEST SIX]; require PQ >= 10; cacheWindow = 20000; has inconsistent sites", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -81,7 +81,7 @@ public class ValidationSiteSelectorIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(sampleGL + freqAF + "--variant " + testfile),
|
||||
1,
|
||||
Arrays.asList("3bf094e1aef563daf7c936032259d490")
|
||||
Arrays.asList("0ee4a565a0d4f6b6942abd72a373becd")
|
||||
);
|
||||
|
||||
executeTest("testPolyGLFreqAF--" + testfile, spec);
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
VRTest lowPass = new VRTest("phase1.projectConsensus.chr20.raw.snps.vcf",
|
||||
"0ddd1e0e483d2eaf56004615cea23ec7", // tranches
|
||||
"6e1f98bb819ccf03e17a2288742160d3", // recal file
|
||||
"1050c387d170639f8cec221e5dddd626"); // cut VCF
|
||||
"c58ff4140e8914f0b656ed625c7f73b9"); // cut VCF
|
||||
|
||||
@DataProvider(name = "VRTest")
|
||||
public Object[][] createData1() {
|
||||
|
|
@ -76,7 +76,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
|
|||
VRTest indel = new VRTest("combined.phase1.chr20.raw.indels.sites.vcf",
|
||||
"da4458d05f6396f5c4ab96f274e5ccdc", // tranches
|
||||
"8e2417336fa62e6c4d9f61b6deebdd82", // recal file
|
||||
"bf0e8ed5e250d52f0545074c61217d16"); // cut VCF
|
||||
"05e88052e0798f1c1e83f0a8938bce56"); // cut VCF
|
||||
|
||||
@DataProvider(name = "VRIndelTest")
|
||||
public Object[][] createData2() {
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest {
|
|||
" --no_cmdline_in_header " +
|
||||
" -o %s",
|
||||
1,
|
||||
Arrays.asList("c5e93b0e2e8610785d43e5d9e7fb5a7b")
|
||||
Arrays.asList("b532a20b5af4e8ea7a073888976c71ba")
|
||||
);
|
||||
|
||||
executeTest("testSimpleVCFStreaming", spec);
|
||||
|
|
|
|||
Loading…
Reference in New Issue