Add spannning deletions allele

This commit is contained in:
Ron Levine 2015-06-01 11:25:10 -04:00
parent 456fefa860
commit dbed660183
12 changed files with 54 additions and 189 deletions

View File

@ -165,7 +165,6 @@ public class CombineGVCFs extends RodWalker<CombineGVCFs.PositionalState, Combin
// take care of the VCF headers
final Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit());
final Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), true);
headerLines.add(new VCFSimpleHeaderLine(GATKVCFConstants.SYMBOLIC_ALLELE_DEFINITION_HEADER_TAG, GATKVCFConstants.SPANNING_DELETION_SYMBOLIC_ALLELE_NAME, "Represents any possible spanning deletion allele at this location"));
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY)); // needed for gVCFs without DP tags
final Set<String> samples = SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);

View File

@ -219,7 +219,6 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
headerLines.addAll(genotypingEngine.getAppropriateVCFInfoHeaders());
// add headers for annotations added by this tool
headerLines.add(new VCFSimpleHeaderLine(GATKVCFConstants.SYMBOLIC_ALLELE_DEFINITION_HEADER_TAG, GATKVCFConstants.SPANNING_DELETION_SYMBOLIC_ALLELE_NAME, "Represents any possible spanning deletion allele at this location"));
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_COUNT_KEY));
headerLines.add(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.MLE_ALLELE_FREQUENCY_KEY));
headerLines.add(GATKVCFHeaderLines.getFormatLine(GATKVCFConstants.REFERENCE_GENOTYPE_QUALITY));
@ -302,7 +301,13 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
* @return true if it has proper alternate alleles, false otherwise
*/
private boolean isProperlyPolymorphic(final VariantContext vc) {
return ( vc != null && !vc.isSymbolic() );
return ( vc != null &&
!vc.getAlternateAlleles().isEmpty() &&
(!vc.isBiallelic() ||
(!vc.getAlternateAllele(0).equals(Allele.SPAN_DEL) &&
!vc.getAlternateAllele(0).equals(GATKVCFConstants.SPANNING_DELETION_SYMBOLIC_ALLELE_DEPRECATED))
)
);
}
/**

View File

@ -126,14 +126,15 @@ public class ReferenceConfidenceVariantContextMerger {
final boolean isSpanningEvent = loc.getStart() != vc.getStart();
// record whether it's also a spanning deletion/event (we know this because the VariantContext type is no
// longer "symbolic" but "mixed" because there are real alleles mixed in with the symbolic non-ref allele)
sawSpanningDeletion |= ( isSpanningEvent && vc.isMixed() ) || vc.getAlternateAlleles().contains(GATKVCFConstants.SPANNING_DELETION_SYMBOLIC_ALLELE);
sawSpanningDeletion |= ( isSpanningEvent && vc.isMixed() ) || vc.getAlternateAlleles().contains(Allele.SPAN_DEL) ||
vc.getAlternateAlleles().contains(GATKVCFConstants.SPANNING_DELETION_SYMBOLIC_ALLELE_DEPRECATED );
sawNonSpanningEvent |= ( !isSpanningEvent && vc.isMixed() );
vcAndNewAllelePairs.add(new Pair<>(vc, isSpanningEvent ? replaceWithNoCallsAndDels(vc) : remapAlleles(vc, refAllele, finalAlleleSet)));
}
// Add <DEL> and <NON_REF> to the end if at all required in in the output.
if ( sawSpanningDeletion && (sawNonSpanningEvent || !removeNonRefSymbolicAllele) ) finalAlleleSet.add(GATKVCFConstants.SPANNING_DELETION_SYMBOLIC_ALLELE);
if ( sawSpanningDeletion && (sawNonSpanningEvent || !removeNonRefSymbolicAllele) ) finalAlleleSet.add(Allele.SPAN_DEL);
if (!removeNonRefSymbolicAllele) finalAlleleSet.add(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE);
final List<Allele> allelesList = new ArrayList<>(finalAlleleSet);
@ -328,7 +329,7 @@ public class ReferenceConfidenceVariantContextMerger {
if ( allele.equals(GATKVCFConstants.NON_REF_SYMBOLIC_ALLELE) )
replacement = allele;
else if ( allele.length() < vc.getReference().length() )
replacement = GATKVCFConstants.SPANNING_DELETION_SYMBOLIC_ALLELE;
replacement = Allele.SPAN_DEL;
else
replacement = Allele.NO_CALL;

View File

@ -129,7 +129,7 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
eventLength = 5;
alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases);
Assert.assertEquals(alleles.size(),0);
Assert.assertEquals(alleles.size(),2);
}

View File

@ -100,7 +100,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
" -V:sample3 " + privateTestDir + "tetraploid-gvcf-3.vcf" +
" -L " + privateTestDir + "tetraploid-gvcfs.intervals",
1,
Arrays.asList("ebe26077809961f53d5244643d24fd45"));
Arrays.asList("7b3153135e4f8e1d137d3f4beb46f182"));
executeTest("combineSingleSamplePipelineGVCF", spec);
}
@ -112,7 +112,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
" -V:sample3 " + privateTestDir + "diploid-gvcf-3.vcf" +
" -L " + privateTestDir + "tetraploid-gvcfs.intervals",
1,
Arrays.asList("2d36a5f996cad47e5d05fcd78f6e572e"));
Arrays.asList("4f546634213ece6f08ec9258620b92bb"));
executeTest("combineSingleSamplePipelineGVCF", spec);
}
@ -190,7 +190,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
@Test
public void testMD5s() throws Exception {
final String cmd = baseTestString(" -L 1:69485-69791");
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("83ea9f4a9aadb1218c21c9d3780e8009"));
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("b7c753452ab0c05f9cee538e420b87fa"));
spec.disableShadowBCF();
executeTest("testMD5s", spec);
}
@ -198,7 +198,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
@Test
public void testBasepairResolutionOutput() throws Exception {
final String cmd = baseTestString(" -L 1:69485-69791 --convertToBasePairResolution");
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("f153cb6e986efc9b50f0b8833fe5d3da"));
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("bb6420ead95da4c72e76ca4bf5860ef0"));
spec.disableShadowBCF();
executeTest("testBasepairResolutionOutput", spec);
}
@ -206,7 +206,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
@Test
public void testBreakBlocks() throws Exception {
final String cmd = baseTestString(" -L 1:69485-69791 --breakBandsAtMultiplesOf 5");
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("6626ff272e7e76fba091f5bde4a1f963"));
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("dd31182124c4b78a8a03edb1e0cf618b"));
spec.disableShadowBCF();
executeTest("testBreakBlocks", spec);
}
@ -217,7 +217,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
"-T CombineGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference +
" -V " + privateTestDir + "spanningDel.1.g.vcf -V " + privateTestDir + "spanningDel.2.g.vcf",
1,
Arrays.asList("fba48ce2bf8761366ff2cd0b45d0421f"));
Arrays.asList("58984edf9a3a92c9fc97039b97755861"));
spec.disableShadowBCF();
executeTest("testSpanningDeletions", spec);
}
@ -226,7 +226,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
public void testWrongReferenceBaseBugFix() throws Exception {
final String cmd = "-T CombineGVCFs -R " + b37KGReference + " -V " + (privateTestDir + "combine-gvcf-wrong-ref-input1.vcf"
+ " -V " + (privateTestDir + "combine-gvcf-wrong-ref-input2.vcf") + " -o %s --no_cmdline_in_header");
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("331c1a4a6a72ea1617c1697a5d945d56"));
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("c0fdba537399cf28b28771963e2c5174"));
spec.disableShadowBCF();
executeTest("testWrongReferenceBaseBugFix",spec);
@ -235,7 +235,7 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
@Test
public void testBasepairResolutionInput() throws Exception {
final String cmd = "-T CombineGVCFs -R " + b37KGReference + " -o %s --no_cmdline_in_header -V " + privateTestDir + "gvcf.basepairResolution.vcf";
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("207e89b5677fbf0ef4d1ff768262cf0c"));
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("6aeb88ca94cb5223f26175da72b985f2"));
spec.disableShadowBCF();
executeTest("testBasepairResolutionInput", spec);
}

View File

@ -82,7 +82,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -V " + privateTestDir + "testUpdatePGT.vcf", b37KGReference),
1,
Arrays.asList("4dfea9a9b1a77c4c6b9edc61f9ea8da2"));
Arrays.asList("23ff3e22262929138ca1f00fc111cadf"));
executeTest("testUpdatePGT", spec);
}
@ -91,7 +91,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -V " + privateTestDir + "testUpdatePGT.vcf -A StrandAlleleCountsBySample", b37KGReference),
1,
Arrays.asList("a96b79e7c3689c8d5506083cb6d27390"));
Arrays.asList("88fa4a021e4aac9a0e48bd54b2949ece"));
executeTest("testUpdatePGT, adding StrandAlleleCountsBySample annotation", spec);
}
@ -103,7 +103,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
" -L 20:10,000,000-20,000,000", b37KGReference),
1,
Arrays.asList("bf3c1982ab6ffee410cb6a1fff6e7105"));
Arrays.asList("06b4e2589c5b903f7c51ae9968bebe77"));
executeTest("combineSingleSamplePipelineGVCF", spec);
}
@ -115,7 +115,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
" -V:sample3 " + privateTestDir + "tetraploid-gvcf-3.vcf" +
" -L " + privateTestDir + "tetraploid-gvcfs.intervals", b37KGReference),
1,
Arrays.asList("47d454936dc1f17cf4c4f84f02841346"));
Arrays.asList("599394c205c1d6641b9bebabbd29e13c"));
executeTest("combineSingleSamplePipelineGVCF", spec);
}
@ -127,7 +127,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
" -V:sample3 " + privateTestDir + "diploid-gvcf-3.vcf" +
" -L " + privateTestDir + "tetraploid-gvcfs.intervals", b37KGReference),
1,
Arrays.asList("5d79ea9de8ada8520d01284cf0c9f720"));
Arrays.asList("f7d5344a85e6d7fc2437d4253b424cb0"));
executeTest("combineSingleSamplePipelineGVCF", spec);
}
@ -139,7 +139,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
" --includeNonVariantSites -L 20:10,030,000-10,033,000 -L 20:10,386,000-10,386,500", b37KGReference),
1,
Arrays.asList("d69b43cac448f45218e77308fc01e9e6"));
Arrays.asList("c9e4d1e52ee1f3a5233f1fb100f24d5e"));
executeTest("combineSingleSamplePipelineGVCF_includeNonVariants", spec);
}
@ -152,7 +152,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
" -L 20:10,000,000-20,000,000", b37KGReference),
1,
Arrays.asList("7c93d82758bfb6e7efec257ef8a46217"));
Arrays.asList("aa19980b9a525afed43e98c821114ae5"));
executeTest("combineSingleSamplePipelineGVCFHierarchical", spec);
}
@ -164,7 +164,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
" -V:sample3 " + privateTestDir + "combine.single.sample.pipeline.3.vcf" +
" -L 20:10,000,000-11,000,000 --dbsnp " + b37dbSNP132, b37KGReference),
1,
Arrays.asList("5b60a7a9575ea83407aa61123960a0cc"));
Arrays.asList("f23c9d62542a69b5cbf0e9f89fdd235d"));
executeTest("combineSingleSamplePipelineGVCF_addDbsnp", spec);
}
@ -174,7 +174,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
"-T GenotypeGVCFs --no_cmdline_in_header -L 1:69485-69791 -o %s -R " + b37KGReference +
" -V " + privateTestDir + "gvcfExample1.vcf",
1,
Arrays.asList("9e59b94c84dd673b8db9d35cae7e0f68"));
Arrays.asList("d602d9e5d336798e4ccb52d2b5f91677"));
executeTest("testJustOneSample", spec);
}
@ -185,14 +185,14 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
" -V " + privateTestDir + "gvcfExample1.vcf" +
" -V " + privateTestDir + "gvcfExample2.vcf",
1,
Arrays.asList("8407cb9a1ab34e705e5a54a0d4146d84"));
Arrays.asList("6c6d6ef90386eb6c6ed649379aac0c13"));
executeTest("testSamplesWithDifferentLs", spec);
}
@Test(enabled = true)
public void testNoPLsException() {
// Test with input files with (1) 0/0 and (2) ./.
final String md5 = "3e69805dc1c0ada0a050a65b89ecab30";
final String md5 = "d04b32cf2fa97d303ff7fdc779a653d4";
WalkerTestSpec spec1 = new WalkerTestSpec(
"-T GenotypeGVCFs --no_cmdline_in_header -L 1:1115550-1115551 -o %s -R " + hg19Reference +
" --variant " + privateTestDir + "combined_genotype_gvcf_exception.vcf",
@ -212,7 +212,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseBPResolutionString("-nda"),
1,
Arrays.asList("5a036de16b7a87626d2b76727376d9df"));
Arrays.asList("7132a43d93a9855d03b27b4b0381194c"));
executeTest("testNDA", spec);
}
@ -221,7 +221,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseBPResolutionString("-maxAltAlleles 1"),
1,
Arrays.asList("2f3e6879fa27128a8be7b067ded78966"));
Arrays.asList("07844593a4e1ff1110ef8c1de42cc290"));
executeTest("testMaxAltAlleles", spec);
}
@ -230,7 +230,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseBPResolutionString("-stand_call_conf 300 -stand_emit_conf 100"),
1,
Arrays.asList("2e4a1ad71e8fc127b594077166c0344b"));
Arrays.asList("56caad762b26479ba5e2cc99222b9030"));
executeTest("testStandardConf", spec);
}
@ -274,7 +274,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
" -V:combined2 " + privateTestDir + "combine.single.sample.pipeline.combined.vcf" +
" --uniquifySamples", b37KGReference),
1,
Arrays.asList("9a472c4e101fff4892efb9255c5cd8b3"));
Arrays.asList("ba36b36145e038e3cb004adf11bce96c"));
executeTest("testUniquifiedSamples", spec);
}
@ -446,7 +446,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
}
private static final String simpleSpanningDeletionsMD5 = "e8616a396d40b4918ad30189856ceb01";
private static final String simpleSpanningDeletionsMD5 = "1cf4ea1da40306741ec4b9a5fe1568b9";
@Test(enabled = true)
public void testSpanningDeletionsMD5() {
@ -476,7 +476,7 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
"-T GenotypeGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference +
" -V " + privateTestDir + "spanningDel.1.g.vcf -V " + privateTestDir + "spanningDel.2.g.vcf -V " + privateTestDir + "spanningDel.3.g.vcf",
1,
Arrays.asList("1c418229117bc8f148a69eda9c496309"));
Arrays.asList("0aa7ceae6af1dc4fda6732e978ace864"));
spec.disableShadowBCF();
executeTest("testMultipleSpanningDeletionsMD5", spec);
}
@ -487,6 +487,17 @@ public class GenotypeGVCFsIntegrationTest extends WalkerTest {
"-T GenotypeGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference +
" -V " + privateTestDir + "spanningDel.delOnly.g.vcf",
1,
Arrays.asList("02cca337e097b86c5471929036ad4b64"));
spec.disableShadowBCF();
executeTest("testSpanningDeletionDoesNotGetGenotypedWithNoOtherAlleles", spec);
}
@Test(enabled = true)
public void testDeprecatedSpanningDeletionDoesNotGetGenotypedWithNoOtherAlleles() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T GenotypeGVCFs --no_cmdline_in_header -o %s -R " + b37KGReference +
" -V " + privateTestDir + "spanningDel.depr.delOnly.g.vcf",
1,
Arrays.asList("46169d08f93e5ff57856c7b64717314b"));
spec.disableShadowBCF();
executeTest("testSpanningDeletionDoesNotGetGenotypedWithNoOtherAlleles", spec);

View File

@ -95,7 +95,7 @@ public class VariantContextMergerUnitTest extends BaseTest {
ATCATCT = Allele.create("ATCATCT");
ATref = Allele.create("AT",true);
Anoref = Allele.create("A",false);
del = GATKVCFConstants.SPANNING_DELETION_SYMBOLIC_ALLELE;
del = Allele.SPAN_DEL;
GT = Allele.create("GT",false);
genomeLocParser = new GenomeLocParser(new CachingIndexedFastaSequenceFile(new File(hg18Reference)));
}

View File

@ -67,23 +67,6 @@ import java.util.ArrayList;
*/
public class VariantsToVCFIntegrationTest extends WalkerTest {
@Test
public void testVariantsToVCFUsingDbsnpInput() {
List<String> md5 = new ArrayList<String>();
md5.add("72e6ce7aff7dec7ca9e7580be7ddd435");
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-R " + b36KGReference +
" --variant:OldDbsnp " + GATKDataLocation + "Comparisons/Validated/dbSNP/dbsnp_129_b36.rod" +
" -T VariantsToVCF" +
" -L 1:1-30,000,000" +
" -o %s" +
" --no_cmdline_in_header",
1, // just one output file
md5);
executeTest("testVariantsToVCFUsingDbsnpInput", spec).getFirst();
}
@Test
public void testVariantsToVCFUsingGeliInput() {
List<String> md5 = new ArrayList<String>();

View File

@ -38,7 +38,7 @@ public class CramIntegrationTest extends WalkerTest {
@DataProvider(name="cramData")
public Object[][] getCRAMData() {
return new Object[][] {
{"PrintReads", "exampleBAM.bam", "", "cram", "026ebc00c2a8f9832e37f1a6a0f53521"},
{"PrintReads", "exampleBAM.bam", "", "cram", "fc6e3919a8a34266c89ef66e97ceaba9"},
//{"PrintReads", "exampleCRAM.cram", "", "cram", "026ebc00c2a8f9832e37f1a6a0f53521"}, https://github.com/samtools/htsjdk/issues/148
{"PrintReads", "exampleCRAM.cram", "", "bam", "99e5f740b43594a5b8e5bc1a007719e0"},
{"PrintReads", "exampleCRAM-noindex.cram", "", "bam", "99e5f740b43594a5b8e5bc1a007719e0"},

View File

@ -44,8 +44,8 @@
<test.listeners>org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.gatk.utils.TestNGTestTransformer,org.broadinstitute.gatk.utils.GATKTextReporter,org.uncommons.reportng.HTMLReporter</test.listeners>
<!-- Version numbers for picard and htsjdk -->
<htsjdk.version>1.132</htsjdk.version>
<picard.version>1.131</picard.version>
<htsjdk.version>1.134</htsjdk.version>
<picard.version>1.133</picard.version>
</properties>
<!-- Dependency configuration (versions, etc.) -->

View File

@ -28,7 +28,6 @@ package org.broadinstitute.gatk.utils.refdata;
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.tribble.Feature;
import htsjdk.tribble.annotation.Strand;
import htsjdk.tribble.dbsnp.OldDbSNPFeature;
import htsjdk.tribble.gelitext.GeliTextFeature;
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
import org.broadinstitute.gatk.utils.GenomeLoc;
@ -111,139 +110,6 @@ public class VariantContextAdaptors {
}
}
// --------------------------------------------------------------------------------------------------------------
//
// dbSNP to VariantContext
//
// --------------------------------------------------------------------------------------------------------------
private static class DBSnpAdaptor implements VCAdaptor {
private static boolean isSNP(OldDbSNPFeature feature) {
return feature.getVariantType().contains("single") && feature.getLocationType().contains("exact");
}
private static boolean isMNP(OldDbSNPFeature feature) {
return feature.getVariantType().contains("mnp") && feature.getLocationType().contains("range");
}
private static boolean isInsertion(OldDbSNPFeature feature) {
return feature.getVariantType().contains("insertion");
}
private static boolean isDeletion(OldDbSNPFeature feature) {
return feature.getVariantType().contains("deletion");
}
private static boolean isIndel(OldDbSNPFeature feature) {
return isInsertion(feature) || isDeletion(feature) || isComplexIndel(feature);
}
public static boolean isComplexIndel(OldDbSNPFeature feature) {
return feature.getVariantType().contains("in-del");
}
/**
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return an alternate allele list
*/
public static List<String> getAlternateAlleleList(OldDbSNPFeature feature) {
List<String> ret = new ArrayList<String>();
for (String allele : getAlleleList(feature))
if (!allele.equals(String.valueOf(feature.getNCBIRefBase()))) ret.add(allele);
return ret;
}
/**
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
*
* @return an alternate allele list
*/
public static List<String> getAlleleList(OldDbSNPFeature feature) {
List<String> alleleList = new ArrayList<String>();
// add ref first
if ( feature.getStrand() == Strand.POSITIVE )
alleleList = Arrays.asList(feature.getObserved());
else
for (String str : feature.getObserved())
alleleList.add(SequenceUtil.reverseComplement(str));
if ( alleleList.size() > 0 && alleleList.contains(feature.getNCBIRefBase())
&& !alleleList.get(0).equals(feature.getNCBIRefBase()) )
Collections.swap(alleleList, alleleList.indexOf(feature.getNCBIRefBase()), 0);
return alleleList;
}
/**
* Converts non-VCF formatted dbSNP records to VariantContext.
* @return OldDbSNPFeature.
*/
@Override
public Class<? extends Feature> getAdaptableFeatureType() { return OldDbSNPFeature.class; }
@Override
public VariantContext convert(String name, Object input, ReferenceContext ref) {
OldDbSNPFeature dbsnp = (OldDbSNPFeature)input;
int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
if ( index < 0 )
return null; // we weren't given enough reference context to create the VariantContext
final byte refBaseForIndel = ref.getBases()[index];
final boolean refBaseIsDash = dbsnp.getNCBIRefBase().equals("-");
boolean addPaddingBase;
if ( isSNP(dbsnp) || isMNP(dbsnp) )
addPaddingBase = false;
else if ( isIndel(dbsnp) || dbsnp.getVariantType().contains("mixed") )
addPaddingBase = refBaseIsDash || GATKVariantContextUtils.requiresPaddingBase(stripNullDashes(getAlleleList(dbsnp)));
else
return null; // can't handle anything else
Allele refAllele;
if ( refBaseIsDash )
refAllele = Allele.create(refBaseForIndel, true);
else if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) )
return null;
else
refAllele = Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + dbsnp.getNCBIRefBase(), true);
final List<Allele> alleles = new ArrayList<Allele>();
alleles.add(refAllele);
// add all of the alt alleles
for ( String alt : getAlternateAlleleList(dbsnp) ) {
if ( Allele.wouldBeNullAllele(alt.getBytes()))
alt = "";
else if ( ! Allele.acceptableAlleleBases(alt) )
return null;
alleles.add(Allele.create((addPaddingBase ? (char)refBaseForIndel : "") + alt, false));
}
final VariantContextBuilder builder = new VariantContextBuilder();
builder.source(name).id(dbsnp.getRsID());
builder.loc(dbsnp.getChr(), dbsnp.getStart() - (addPaddingBase ? 1 : 0), dbsnp.getEnd() - (addPaddingBase && refAllele.length() == 1 ? 1 : 0));
builder.alleles(alleles);
return builder.make();
}
private static List<String> stripNullDashes(final List<String> alleles) {
final List<String> newAlleles = new ArrayList<String>(alleles.size());
for ( final String allele : alleles ) {
if ( allele.equals("-") )
newAlleles.add("");
else
newAlleles.add(allele);
}
return newAlleles;
}
}
// --------------------------------------------------------------------------------------------------------------
//

View File

@ -144,6 +144,6 @@ public final class GATKVCFConstants {
public final static String SYMBOLIC_ALLELE_DEFINITION_HEADER_TAG = "ALT";
public final static String NON_REF_SYMBOLIC_ALLELE_NAME = "NON_REF";
public final static Allele NON_REF_SYMBOLIC_ALLELE = Allele.create("<"+NON_REF_SYMBOLIC_ALLELE_NAME+">", false); // represents any possible non-ref allele at this site
public final static String SPANNING_DELETION_SYMBOLIC_ALLELE_NAME = "*:DEL";
public final static Allele SPANNING_DELETION_SYMBOLIC_ALLELE = Allele.create("<"+SPANNING_DELETION_SYMBOLIC_ALLELE_NAME+">", false); // represents any possible spanning deletion allele at this site
public final static String SPANNING_DELETION_SYMBOLIC_ALLELE_NAME_DEPRECATED = "*:DEL";
public final static Allele SPANNING_DELETION_SYMBOLIC_ALLELE_DEPRECATED = Allele.create("<"+SPANNING_DELETION_SYMBOLIC_ALLELE_NAME_DEPRECATED+">", false); // represents any possible spanning deletion allele at this site
}