Merge pull request #1016 from broadinstitute/rhl_allele_rep_span_dels
Add spannning deletions allele
This commit is contained in:
commit
9522be8762
|
|
@ -165,7 +165,6 @@ public class CombineGVCFs extends RodWalker<CombineGVCFs.PositionalState, Combin
|
|||
// take care of the VCF headers
|
||||
final Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit());
|
||||
final Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), true);
|
||||
headerLines.add(new VCFSimpleHeaderLine(GATKVCFConstants.SYMBOLIC_ALLELE_DEFINITION_HEADER_TAG, GATKVCFConstants.SPANNING_DELETION_SYMBOLIC_ALLELE_NAME, "Represents any possible spanning deletion allele at this location"));
|
||||
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY)); // needed for gVCFs without DP tags
|
||||
|
||||
final Set<String> samples = SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
|
||||
|
|
|
|||
|
|
@ -219,7 +219,6 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
|
|||
headerLines.addAll(genotypingEngine.getAppropriateVCFInfoHeaders());
|
||||
|
||||
// add headers for annotations added by this tool
|
||||
headerLines.add(new VCFSimpleHeaderLine(GATKVCFConstants.SYMBOLIC_ALLELE_DEFINITION_HEADER_TAG, GATKVCFConstants.SPANNING_DELETION_SYMBOLIC_ALLELE_NAME, "Represents any possible spanning deletion allele at this location"));
|
||||
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_COUNT_KEY));
|
||||
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY));
|
||||
headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.REFERENCE_GENOTYPE_QUALITY));
|
||||
|
|
@ -302,7 +301,13 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
|
|||
* @return true if it has proper alternate alleles, false otherwise
|
||||
*/
|
||||
private boolean isProperlyPolymorphic(final VariantContext vc) {
|
||||
return ( vc != null && !vc.isSymbolic() );
|
||||
return ( vc != null &&
|
||||
!vc.getAlternateAlleles().isEmpty() &&
|
||||
(!vc.isBiallelic() ||
|
||||
(!vc.getAlternateAllele(0).equals(Allele.SPAN_DEL) &&
|
||||
!vc.getAlternateAllele(0).equals(GATKVCFConstants.SPANNING_DELETION_SYMBOLIC_ALLELE_DEPRECATED))
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -126,14 +126,15 @@ public class ReferenceConfidenceVariantContextMerger {
|
|||
final boolean isSpanningEvent = loc.getStart() != vc.getStart();
|
||||
// record whether it's also a spanning deletion/event (we know this because the VariantContext type is no
|
||||
// longer "symbolic" but "mixed" because there are real alleles mixed in with the symbolic non-ref allele)
|
||||
sawSpanningDeletion |= ( isSpanningEvent && vc.isMixed() ) || vc.getAlternateAlleles().contains(GATKVCFConstants.SPANNING_DELETION_SYMBOLIC_ALLELE);
|
||||
sawSpanningDeletion |= ( isSpanningEvent && vc.isMixed() ) || vc.getAlternateAlleles().contains(Allele.SPAN_DEL) ||
|
||||
vc.getAlternateAlleles().contains(GATKVCFConstants.SPANNING_DELETION_SYMBOLIC_ALLELE_DEPRECATED );
|
||||
sawNonSpanningEvent |= ( !isSpanningEvent && vc.isMixed() );
|
||||
|
||||
vcAndNewAllelePairs.add(new Pair<>(vc, isSpanningEvent ? replaceWithNoCallsAndDels(vc) : remapAlleles(vc, refAllele, finalAlleleSet)));
|
||||
}
|
||||
|
||||
// Add <DEL> and <NON_REF> to the end if at all required in in the output.
|
||||
if ( sawSpanningDeletion && (sawNonSpanningEvent || !removeNonRefSymbolicAllele) ) finalAlleleSet.add(GATKVCFConstants.SPANNING_DELETION_SYMBOLIC_ALLELE);
|
||||
if ( sawSpanningDeletion && (sawNonSpanningEvent || !removeNonRefSymbolicAllele) ) finalAlleleSet.add(Allele.SPAN_DEL);
|
||||
if (!removeNonRefSymbolicAllele) finalAlleleSet.add(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
|
||||
|
||||
final List<Allele> allelesList = new ArrayList<>(finalAlleleSet);
|
||||
|
|
@ -328,7 +329,7 @@ public class ReferenceConfidenceVariantContextMerger {
|
|||
if ( allele.equals(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE) )
|
||||
replacement = allele;
|
||||
else if ( allele.length() < vc.getReference().length() )
|
||||
replacement = GATKVCFConstants.SPANNING_DELETION_SYMBOLIC_ALLELE;
|
||||
replacement = Allele.SPAN_DEL;
|
||||
else
|
||||
replacement = Allele.NO_CALL;
|
||||
|
||||
|
|
|
|||
|
|
@ -129,7 +129,7 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
|
|||
eventLength = 5;
|
||||
alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases);
|
||||
|
||||
Assert.assertEquals(alleles.size(),0);
|
||||
Assert.assertEquals(alleles.size(),2);
|
||||
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -100,7 +100,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "tetraploid-gvcf-3.vcf" +
|
||||
" -L " + privateTestDir + "tetraploid-gvcfs.intervals",
|
||||
1,
|
||||
Arrays.asList("ebe26077809961f53d5244643d24fd45"));
|
||||
Arrays.asList("7b3153135e4f8e1d137d3f4beb46f182"));
|
||||
executeTest("combineSingleSamplePipelineGVCF", spec);
|
||||
}
|
||||
|
||||
|
|
@ -112,7 +112,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "diploid-gvcf-3.vcf" +
|
||||
" -L " + privateTestDir + "tetraploid-gvcfs.intervals",
|
||||
1,
|
||||
Arrays.asList("2d36a5f996cad47e5d05fcd78f6e572e"));
|
||||
Arrays.asList("4f546634213ece6f08ec9258620b92bb"));
|
||||
executeTest("combineSingleSamplePipelineGVCF", spec);
|
||||
}
|
||||
|
||||
|
|
@ -190,7 +190,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testMD5s() throws Exception {
|
||||
final String cmd = baseTestString(" -L 1:69485-69791");
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("83ea9f4a9aadb1218c21c9d3780e8009"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("b7c753452ab0c05f9cee538e420b87fa"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testMD5s", spec);
|
||||
}
|
||||
|
|
@ -198,7 +198,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testBasepairResolutionOutput() throws Exception {
|
||||
final String cmd = baseTestString(" -L 1:69485-69791 --convertToBasePairResolution");
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("f153cb6e986efc9b50f0b8833fe5d3da"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("bb6420ead95da4c72e76ca4bf5860ef0"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testBasepairResolutionOutput", spec);
|
||||
}
|
||||
|
|
@ -206,7 +206,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testBreakBlocks() throws Exception {
|
||||
final String cmd = baseTestString(" -L 1:69485-69791 --breakBandsAtMultiplesOf 5");
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("6626ff272e7e76fba091f5bde4a1f963"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("dd31182124c4b78a8a03edb1e0cf618b"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testBreakBlocks", spec);
|
||||
}
|
||||
|
|
@ -217,7 +217,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
|
|||
"-T CombineGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference +
|
||||
" -V " + privateTestDir + "spanningDel.1.g.vcf -V " + privateTestDir + "spanningDel.2.g.vcf",
|
||||
1,
|
||||
Arrays.asList("fba48ce2bf8761366ff2cd0b45d0421f"));
|
||||
Arrays.asList("58984edf9a3a92c9fc97039b97755861"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testSpanningDeletions", spec);
|
||||
}
|
||||
|
|
@ -226,7 +226,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
|
|||
public void testWrongReferenceBaseBugFix() throws Exception {
|
||||
final String cmd = "-T CombineGVCFs -R " + b37KGReference + " -V " + (privateTestDir + "combine-gvcf-wrong-ref-input1.vcf"
|
||||
+ " -V " + (privateTestDir + "combine-gvcf-wrong-ref-input2.vcf") + " -o %s --no_cmdline_in_header");
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("331c1a4a6a72ea1617c1697a5d945d56"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("c0fdba537399cf28b28771963e2c5174"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testWrongReferenceBaseBugFix",spec);
|
||||
|
||||
|
|
@ -235,7 +235,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testBasepairResolutionInput() throws Exception {
|
||||
final String cmd = "-T CombineGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -V " + privateTestDir + "gvcf.basepairResolution.vcf";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("207e89b5677fbf0ef4d1ff768262cf0c"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("6aeb88ca94cb5223f26175da72b985f2"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testBasepairResolutionInput", spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -82,7 +82,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -V " + privateTestDir + "testUpdatePGT.vcf", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("4dfea9a9b1a77c4c6b9edc61f9ea8da2"));
|
||||
Arrays.asList("23ff3e22262929138ca1f00fc111cadf"));
|
||||
executeTest("testUpdatePGT", spec);
|
||||
}
|
||||
|
||||
|
|
@ -91,7 +91,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -V " + privateTestDir + "testUpdatePGT.vcf -A StrandAlleleCountsBySample", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("a96b79e7c3689c8d5506083cb6d27390"));
|
||||
Arrays.asList("88fa4a021e4aac9a0e48bd54b2949ece"));
|
||||
executeTest("testUpdatePGT, adding StrandAlleleCountsBySample annotation", spec);
|
||||
}
|
||||
|
||||
|
|
@ -103,7 +103,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||
" -L 20:10,000,000-20,000,000", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("bf3c1982ab6ffee410cb6a1fff6e7105"));
|
||||
Arrays.asList("06b4e2589c5b903f7c51ae9968bebe77"));
|
||||
executeTest("combineSingleSamplePipelineGVCF", spec);
|
||||
}
|
||||
|
||||
|
|
@ -115,7 +115,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "tetraploid-gvcf-3.vcf" +
|
||||
" -L " + privateTestDir + "tetraploid-gvcfs.intervals", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("47d454936dc1f17cf4c4f84f02841346"));
|
||||
Arrays.asList("599394c205c1d6641b9bebabbd29e13c"));
|
||||
executeTest("combineSingleSamplePipelineGVCF", spec);
|
||||
}
|
||||
|
||||
|
|
@ -127,7 +127,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "diploid-gvcf-3.vcf" +
|
||||
" -L " + privateTestDir + "tetraploid-gvcfs.intervals", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("5d79ea9de8ada8520d01284cf0c9f720"));
|
||||
Arrays.asList("f7d5344a85e6d7fc2437d4253b424cb0"));
|
||||
executeTest("combineSingleSamplePipelineGVCF", spec);
|
||||
}
|
||||
|
||||
|
|
@ -139,7 +139,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||
" --includeNonVariantSites -L 20:10,030,000-10,033,000 -L 20:10,386,000-10,386,500", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("d69b43cac448f45218e77308fc01e9e6"));
|
||||
Arrays.asList("c9e4d1e52ee1f3a5233f1fb100f24d5e"));
|
||||
executeTest("combineSingleSamplePipelineGVCF_includeNonVariants", spec);
|
||||
}
|
||||
|
||||
|
|
@ -152,7 +152,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||
" -L 20:10,000,000-20,000,000", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("7c93d82758bfb6e7efec257ef8a46217"));
|
||||
Arrays.asList("aa19980b9a525afed43e98c821114ae5"));
|
||||
executeTest("combineSingleSamplePipelineGVCFHierarchical", spec);
|
||||
}
|
||||
|
||||
|
|
@ -164,7 +164,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
|
||||
" -L 20:10,000,000-11,000,000 --dbsnp " + b37dbSNP132, b37KGReference),
|
||||
1,
|
||||
Arrays.asList("5b60a7a9575ea83407aa61123960a0cc"));
|
||||
Arrays.asList("f23c9d62542a69b5cbf0e9f89fdd235d"));
|
||||
executeTest("combineSingleSamplePipelineGVCF_addDbsnp", spec);
|
||||
}
|
||||
|
||||
|
|
@ -174,7 +174,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
"-T GenotypeGVCFs --no_cmdline_in_header -L 1:69485-69791 -o %s -R " + b37KGReference +
|
||||
" -V " + privateTestDir + "gvcfExample1.vcf",
|
||||
1,
|
||||
Arrays.asList("9e59b94c84dd673b8db9d35cae7e0f68"));
|
||||
Arrays.asList("d602d9e5d336798e4ccb52d2b5f91677"));
|
||||
executeTest("testJustOneSample", spec);
|
||||
}
|
||||
|
||||
|
|
@ -185,14 +185,14 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V " + privateTestDir + "gvcfExample1.vcf" +
|
||||
" -V " + privateTestDir + "gvcfExample2.vcf",
|
||||
1,
|
||||
Arrays.asList("8407cb9a1ab34e705e5a54a0d4146d84"));
|
||||
Arrays.asList("6c6d6ef90386eb6c6ed649379aac0c13"));
|
||||
executeTest("testSamplesWithDifferentLs", spec);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testNoPLsException() {
|
||||
// Test with input files with (1) 0/0 and (2) ./.
|
||||
final String md5 = "3e69805dc1c0ada0a050a65b89ecab30";
|
||||
final String md5 = "d04b32cf2fa97d303ff7fdc779a653d4";
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(
|
||||
"-T GenotypeGVCFs --no_cmdline_in_header -L 1:1115550-1115551 -o %s -R " + hg19Reference +
|
||||
" --variant " + privateTestDir + "combined_genotype_gvcf_exception.vcf",
|
||||
|
|
@ -212,7 +212,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseBPResolutionString("-nda"),
|
||||
1,
|
||||
Arrays.asList("5a036de16b7a87626d2b76727376d9df"));
|
||||
Arrays.asList("7132a43d93a9855d03b27b4b0381194c"));
|
||||
executeTest("testNDA", spec);
|
||||
}
|
||||
|
||||
|
|
@ -221,7 +221,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseBPResolutionString("-maxAltAlleles 1"),
|
||||
1,
|
||||
Arrays.asList("2f3e6879fa27128a8be7b067ded78966"));
|
||||
Arrays.asList("07844593a4e1ff1110ef8c1de42cc290"));
|
||||
executeTest("testMaxAltAlleles", spec);
|
||||
}
|
||||
|
||||
|
|
@ -230,7 +230,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseBPResolutionString("-stand_call_conf 300 -stand_emit_conf 100"),
|
||||
1,
|
||||
Arrays.asList("2e4a1ad71e8fc127b594077166c0344b"));
|
||||
Arrays.asList("56caad762b26479ba5e2cc99222b9030"));
|
||||
executeTest("testStandardConf", spec);
|
||||
}
|
||||
|
||||
|
|
@ -274,7 +274,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
" -V:combined2 " + privateTestDir + "combine.single.sample.pipeline.combined.vcf" +
|
||||
" --uniquifySamples", b37KGReference),
|
||||
1,
|
||||
Arrays.asList("9a472c4e101fff4892efb9255c5cd8b3"));
|
||||
Arrays.asList("ba36b36145e038e3cb004adf11bce96c"));
|
||||
executeTest("testUniquifiedSamples", spec);
|
||||
|
||||
}
|
||||
|
|
@ -446,7 +446,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
}
|
||||
|
||||
|
||||
private static final String simpleSpanningDeletionsMD5 = "e8616a396d40b4918ad30189856ceb01";
|
||||
private static final String simpleSpanningDeletionsMD5 = "1cf4ea1da40306741ec4b9a5fe1568b9";
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testSpanningDeletionsMD5() {
|
||||
|
|
@ -476,7 +476,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
"-T GenotypeGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference +
|
||||
" -V " + privateTestDir + "spanningDel.1.g.vcf -V " + privateTestDir + "spanningDel.2.g.vcf -V " + privateTestDir + "spanningDel.3.g.vcf",
|
||||
1,
|
||||
Arrays.asList("1c418229117bc8f148a69eda9c496309"));
|
||||
Arrays.asList("0aa7ceae6af1dc4fda6732e978ace864"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testMultipleSpanningDeletionsMD5", spec);
|
||||
}
|
||||
|
|
@ -487,6 +487,17 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
|
|||
"-T GenotypeGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference +
|
||||
" -V " + privateTestDir + "spanningDel.delOnly.g.vcf",
|
||||
1,
|
||||
Arrays.asList("02cca337e097b86c5471929036ad4b64"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testSpanningDeletionDoesNotGetGenotypedWithNoOtherAlleles", spec);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testDeprecatedSpanningDeletionDoesNotGetGenotypedWithNoOtherAlleles() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T GenotypeGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference +
|
||||
" -V " + privateTestDir + "spanningDel.depr.delOnly.g.vcf",
|
||||
1,
|
||||
Arrays.asList("46169d08f93e5ff57856c7b64717314b"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testSpanningDeletionDoesNotGetGenotypedWithNoOtherAlleles", spec);
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ public class VariantContextMergerUnitTest extends BaseTest {
|
|||
ATCATCT = Allele.create("ATCATCT");
|
||||
ATref = Allele.create("AT",true);
|
||||
Anoref = Allele.create("A",false);
|
||||
del = GATKVCFConstants.SPANNING_DELETION_SYMBOLIC_ALLELE;
|
||||
del = Allele.SPAN_DEL;
|
||||
GT = Allele.create("GT",false);
|
||||
genomeLocParser = new GenomeLocParser(new CachingIndexedFastaSequenceFile(new File(hg18Reference)));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -67,23 +67,6 @@ import java.util.ArrayList;
|
|||
*/
|
||||
public class VariantsToVCFIntegrationTest extends WalkerTest {
|
||||
|
||||
@Test
|
||||
public void testVariantsToVCFUsingDbsnpInput() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
md5.add("72e6ce7aff7dec7ca9e7580be7ddd435");
|
||||
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-R " + b36KGReference +
|
||||
" --variant:OldDbsnp " + GATKDataLocation + "Comparisons/Validated/dbSNP/dbsnp_129_b36.rod" +
|
||||
" -T VariantsToVCF" +
|
||||
" -L 1:1-30,000,000" +
|
||||
" -o %s" +
|
||||
" --no_cmdline_in_header",
|
||||
1, // just one output file
|
||||
md5);
|
||||
executeTest("testVariantsToVCFUsingDbsnpInput", spec).getFirst();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testVariantsToVCFUsingGeliInput() {
|
||||
List<String> md5 = new ArrayList<String>();
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ public class CramIntegrationTest extends WalkerTest {
|
|||
@DataProvider(name="cramData")
|
||||
public Object[][] getCRAMData() {
|
||||
return new Object[][] {
|
||||
{"PrintReads", "exampleBAM.bam", "", "cram", "026ebc00c2a8f9832e37f1a6a0f53521"},
|
||||
{"PrintReads", "exampleBAM.bam", "", "cram", "fc6e3919a8a34266c89ef66e97ceaba9"},
|
||||
//{"PrintReads", "exampleCRAM.cram", "", "cram", "026ebc00c2a8f9832e37f1a6a0f53521"}, https://github.com/samtools/htsjdk/issues/148
|
||||
{"PrintReads", "exampleCRAM.cram", "", "bam", "99e5f740b43594a5b8e5bc1a007719e0"},
|
||||
{"PrintReads", "exampleCRAM-noindex.cram", "", "bam", "99e5f740b43594a5b8e5bc1a007719e0"},
|
||||
|
|
|
|||
|
|
@ -44,8 +44,8 @@
|
|||
<test.listeners>org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.gatk.utils.TestNGTestTransformer,org.broadinstitute.gatk.utils.GATKTextReporter,org.uncommons.reportng.HTMLReporter</test.listeners>
|
||||
|
||||
<!-- Version numbers for picard and htsjdk -->
|
||||
<htsjdk.version>1.132</htsjdk.version>
|
||||
<picard.version>1.131</picard.version>
|
||||
<htsjdk.version>1.134</htsjdk.version>
|
||||
<picard.version>1.133</picard.version>
|
||||
</properties>
|
||||
|
||||
<!-- Dependency configuration (versions, etc.) -->
|
||||
|
|
|
|||
|
|
@ -28,7 +28,6 @@ package org.broadinstitute.gatk.utils.refdata;
|
|||
import htsjdk.samtools.util.SequenceUtil;
|
||||
import htsjdk.tribble.Feature;
|
||||
import htsjdk.tribble.annotation.Strand;
|
||||
import htsjdk.tribble.dbsnp.OldDbSNPFeature;
|
||||
import htsjdk.tribble.gelitext.GeliTextFeature;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
|
|
@ -111,139 +110,6 @@ public class VariantContextAdaptors {
|
|||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// dbSNP to VariantContext
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private static class DBSnpAdaptor implements VCAdaptor {
|
||||
private static boolean isSNP(OldDbSNPFeature feature) {
|
||||
return feature.getVariantType().contains("single") && feature.getLocationType().contains("exact");
|
||||
}
|
||||
|
||||
private static boolean isMNP(OldDbSNPFeature feature) {
|
||||
return feature.getVariantType().contains("mnp") && feature.getLocationType().contains("range");
|
||||
}
|
||||
|
||||
private static boolean isInsertion(OldDbSNPFeature feature) {
|
||||
return feature.getVariantType().contains("insertion");
|
||||
}
|
||||
|
||||
private static boolean isDeletion(OldDbSNPFeature feature) {
|
||||
return feature.getVariantType().contains("deletion");
|
||||
}
|
||||
|
||||
private static boolean isIndel(OldDbSNPFeature feature) {
|
||||
return isInsertion(feature) || isDeletion(feature) || isComplexIndel(feature);
|
||||
}
|
||||
|
||||
public static boolean isComplexIndel(OldDbSNPFeature feature) {
|
||||
return feature.getVariantType().contains("in-del");
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate alleles. This method should return all the alleles present at the location,
|
||||
* NOT including the reference base. This is returned as a string list with no guarantee ordering
|
||||
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
|
||||
* frequency).
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
public static List<String> getAlternateAlleleList(OldDbSNPFeature feature) {
|
||||
List<String> ret = new ArrayList<String>();
|
||||
for (String allele : getAlleleList(feature))
|
||||
if (!allele.equals(String.valueOf(feature.getNCBIRefBase()))) ret.add(allele);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alleles. This method should return all the alleles present at the location,
|
||||
* including the reference base. The first allele should always be the reference allele, followed
|
||||
* by an unordered list of alternate alleles.
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
public static List<String> getAlleleList(OldDbSNPFeature feature) {
|
||||
List<String> alleleList = new ArrayList<String>();
|
||||
// add ref first
|
||||
if ( feature.getStrand() == Strand.POSITIVE )
|
||||
alleleList = Arrays.asList(feature.getObserved());
|
||||
else
|
||||
for (String str : feature.getObserved())
|
||||
alleleList.add(SequenceUtil.reverseComplement(str));
|
||||
if ( alleleList.size() > 0 && alleleList.contains(feature.getNCBIRefBase())
|
||||
&& !alleleList.get(0).equals(feature.getNCBIRefBase()) )
|
||||
Collections.swap(alleleList, alleleList.indexOf(feature.getNCBIRefBase()), 0);
|
||||
|
||||
return alleleList;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts non-VCF formatted dbSNP records to VariantContext.
|
||||
* @return OldDbSNPFeature.
|
||||
*/
|
||||
@Override
|
||||
public Class<? extends Feature> getAdaptableFeatureType() { return OldDbSNPFeature.class; }
|
||||
|
||||
@Override
|
||||
public VariantContext convert(String name, Object input, ReferenceContext ref) {
|
||||
OldDbSNPFeature dbsnp = (OldDbSNPFeature)input;
|
||||
|
||||
int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
|
||||
if ( index < 0 )
|
||||
return null; // we weren't given enough reference context to create the VariantContext
|
||||
|
||||
final byte refBaseForIndel = ref.getBases()[index];
|
||||
final boolean refBaseIsDash = dbsnp.getNCBIRefBase().equals("-");
|
||||
|
||||
boolean addPaddingBase;
|
||||
if ( isSNP(dbsnp) || isMNP(dbsnp) )
|
||||
addPaddingBase = false;
|
||||
else if ( isIndel(dbsnp) || dbsnp.getVariantType().contains("mixed") )
|
||||
addPaddingBase = refBaseIsDash || GATKVariantContextUtils.requiresPaddingBase(stripNullDashes(getAlleleList(dbsnp)));
|
||||
else
|
||||
return null; // can't handle anything else
|
||||
|
||||
Allele refAllele;
|
||||
if ( refBaseIsDash )
|
||||
refAllele = Allele.create(refBaseForIndel, true);
|
||||
else if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) )
|
||||
return null;
|
||||
else
|
||||
refAllele = Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + dbsnp.getNCBIRefBase(), true);
|
||||
|
||||
final List<Allele> alleles = new ArrayList<Allele>();
|
||||
alleles.add(refAllele);
|
||||
|
||||
// add all of the alt alleles
|
||||
for ( String alt : getAlternateAlleleList(dbsnp) ) {
|
||||
if ( Allele.wouldBeNullAllele(alt.getBytes()))
|
||||
alt = "";
|
||||
else if ( ! Allele.acceptableAlleleBases(alt) )
|
||||
return null;
|
||||
|
||||
alleles.add(Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + alt, false));
|
||||
}
|
||||
|
||||
final VariantContextBuilder builder = new VariantContextBuilder();
|
||||
builder.source(name).id(dbsnp.getRsID());
|
||||
builder.loc(dbsnp.getChr(), dbsnp.getStart() - (addPaddingBase ? 1 : 0), dbsnp.getEnd() - (addPaddingBase && refAllele.length() == 1 ? 1 : 0));
|
||||
builder.alleles(alleles);
|
||||
return builder.make();
|
||||
}
|
||||
|
||||
private static List<String> stripNullDashes(final List<String> alleles) {
|
||||
final List<String> newAlleles = new ArrayList<String>(alleles.size());
|
||||
for ( final String allele : alleles ) {
|
||||
if ( allele.equals("-") )
|
||||
newAlleles.add("");
|
||||
else
|
||||
newAlleles.add(allele);
|
||||
}
|
||||
return newAlleles;
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
|
|||
|
|
@ -144,6 +144,6 @@ public final class GATKVCFConstants {
|
|||
public final static String SYMBOLIC_ALLELE_DEFINITION_HEADER_TAG = "ALT";
|
||||
public final static String NON_REF_SYMBOLIC_ALLELE_NAME = "NON_REF";
|
||||
public final static Allele NON_REF_SYMBOLIC_ALLELE = Allele.create("<"+NON_REF_SYMBOLIC_ALLELE_NAME+">", false); // represents any possible non-ref allele at this site
|
||||
public final static String SPANNING_DELETION_SYMBOLIC_ALLELE_NAME = "*:DEL";
|
||||
public final static Allele SPANNING_DELETION_SYMBOLIC_ALLELE = Allele.create("<"+SPANNING_DELETION_SYMBOLIC_ALLELE_NAME+">", false); // represents any possible spanning deletion allele at this site
|
||||
public final static String SPANNING_DELETION_SYMBOLIC_ALLELE_NAME_DEPRECATED = "*:DEL";
|
||||
public final static Allele SPANNING_DELETION_SYMBOLIC_ALLELE_DEPRECATED = Allele.create("<"+SPANNING_DELETION_SYMBOLIC_ALLELE_NAME_DEPRECATED+">", false); // represents any possible spanning deletion allele at this site
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue