diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/fasta/FastaAlternateReferenceIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/fasta/FastaAlternateReferenceIntegrationTest.java index fe807874b..929583452 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/fasta/FastaAlternateReferenceIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/fasta/FastaAlternateReferenceIntegrationTest.java @@ -53,6 +53,7 @@ package org.broadinstitute.gatk.tools.walkers.fasta; import org.broadinstitute.gatk.engine.walkers.WalkerTest; import org.broadinstitute.gatk.utils.exceptions.UserException; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.util.Arrays; @@ -176,4 +177,33 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest { Arrays.asList("8fd887bca9f3949f2c23c3565f7dcc1b")); executeTest("test iupac", spec); } + + @Test + void testSpanDel() { + + WalkerTestSpec spec = new WalkerTestSpec( + "-T FastaAlternateReferenceMaker -R " + b37KGReference + " -V " + privateTestDir + "spanningDel.delOnly.starFirst.vcf -L 1:1273247 -o %s", + 1, + Arrays.asList("69852222a8c9c9e1604808b62df96f8a")); + executeTest("test spanning deletion", spec); + } + + @DataProvider(name = "iupacSsample") + public Object[][] getIupacSampleData() { + return new Object[][]{ + {"NA1", "b5d95e28263c88b20325d7a545576ad4"}, + {"NA2", "a8b4b79dea8ad1fde2c0d8ff42ca132d"}, + {"NA3", "69852222a8c9c9e1604808b62df96f8a"} + }; + } + + @Test(dataProvider = "iupacSsample") + void testSpanDelIUPAC(final String sample, final String md5) { + + WalkerTestSpec spec = new WalkerTestSpec( + "-T FastaAlternateReferenceMaker -R " + b37KGReference + " --use_IUPAC_sample " + sample + " -V " + privateTestDir + "spanningDel.delOnly.starFirst.vcf -L 1:1273247 -o %s", + 1, + Arrays.asList(md5)); + executeTest("test spanning deletion using IUPAC codes", spec); + } } diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/fasta/FastaAlternateReferenceMaker.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/fasta/FastaAlternateReferenceMaker.java index 70354081f..e165a87c3 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/fasta/FastaAlternateReferenceMaker.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/fasta/FastaAlternateReferenceMaker.java @@ -42,12 +42,15 @@ import org.broadinstitute.gatk.utils.collections.Pair; import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature; import org.broadinstitute.gatk.utils.help.HelpConstants; +import htsjdk.variant.variantcontext.Allele; import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; +import org.broadinstitute.gatk.utils.variant.GATKVCFConstants; import java.util.Arrays; import java.util.List; import java.util.Set; +import java.util.Optional; /** @@ -122,6 +125,8 @@ public class FastaAlternateReferenceMaker extends FastaReferenceMaker { private int deletionBasesRemaining = 0; + private static final String EMPTY_BASE = " "; + @Override public void initialize() { super.initialize(); @@ -159,11 +164,16 @@ public class FastaAlternateReferenceMaker extends FastaReferenceMaker { deletionBasesRemaining = vc.getReference().length() - 1; // delete the next n bases, not this one return new Pair<>(context.getLocation(), refBase); - } else if ( vc.isSimpleInsertion()) { - return new Pair<>(context.getLocation(), vc.getAlternateAllele(0).toString()); - } else if (vc.isSNP()) { - final String base = (iupacSample != null) ? getIUPACbase(vc.getGenotype(iupacSample), refBase) : vc.getAlternateAllele(0).toString(); - return new Pair<>(context.getLocation(), base); + } else if ( vc.isSimpleInsertion() || vc.isSNP() ) { + // Get the first alt allele that is not a spanning deletion. If none present, use the empty allele + final Optional optionalAllele = getFirstNonSpanDelAltAllele(vc.getAlternateAlleles()); + final Allele allele = optionalAllele.isPresent() ? optionalAllele.get() : Allele.create(EMPTY_BASE, false); + if ( vc.isSimpleInsertion() ) { + return new Pair<>(context.getLocation(), allele.toString()); + } else { + final String base = (iupacSample != null) ? getIUPACbase(vc.getGenotype(iupacSample), refBase) : allele.toString(); + return new Pair<>(context.getLocation(), base); + } } } @@ -177,6 +187,21 @@ public class FastaAlternateReferenceMaker extends FastaReferenceMaker { return new Pair<>(context.getLocation(), refBase); } + /** + * Get the first non spanning deletion (* or <*:DEL>) alt allele + * @param altAlleles the alternate alleles + * @return the first non spanning deletion allele or null + */ + private Optional getFirstNonSpanDelAltAllele( final List altAlleles ) { + for (final Allele allele : altAlleles) { + if (!allele.equals(Allele.SPAN_DEL) && !allele.equals(GATKVCFConstants.SPANNING_DELETION_SYMBOLIC_ALLELE_DEPRECATED)) { + return Optional.of(allele); + } + } + + return Optional.empty(); + } + /** * Mask a SNP (inserting N's in the sequence) * @@ -199,12 +224,22 @@ public class FastaAlternateReferenceMaker extends FastaReferenceMaker { * * @param genotype the genotype to encode * @param ref the reference base - * @return non-null, non-empty String + * @return non-null, non-empty String of bases */ private String getIUPACbase(final Genotype genotype, final String ref) { if ( genotype == null ) throw new IllegalStateException("The genotype is null for sample " + iupacSample); + // If have a spanning deletion, if both alleles are spanning deletions, use the empty allele. Otherwise, use the allele is not a + // spanning deletion. + if ( genotype.getAlleles().contains(Allele.SPAN_DEL) ) { + if ( genotype.isHomVar() ) { + return EMPTY_BASE; + } else { + return genotype.getAllele(0).equals(Allele.SPAN_DEL) ? genotype.getAllele(1).getBaseString() : genotype.getAllele(0).getBaseString(); + } + } + if ( !genotype.isHet() ) return genotype.isHom() ? genotype.getAllele(0).getBaseString() : ref;