Assign correct ambiguity code for * allele

This commit is contained in:
Ron Levine 2016-09-09 14:20:18 -04:00
parent 3b90b9c7de
commit 4ef396f72e
2 changed files with 71 additions and 6 deletions

View File

@ -53,6 +53,7 @@ package org.broadinstitute.gatk.tools.walkers.fasta;
import org.broadinstitute.gatk.engine.walkers.WalkerTest; import org.broadinstitute.gatk.engine.walkers.WalkerTest;
import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test; import org.testng.annotations.Test;
import java.util.Arrays; import java.util.Arrays;
@ -176,4 +177,33 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest {
Arrays.asList("8fd887bca9f3949f2c23c3565f7dcc1b")); Arrays.asList("8fd887bca9f3949f2c23c3565f7dcc1b"));
executeTest("test iupac", spec); executeTest("test iupac", spec);
} }
@Test
void testSpanDel() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T FastaAlternateReferenceMaker -R " + b37KGReference + " -V " + privateTestDir + "spanningDel.delOnly.starFirst.vcf -L 1:1273247 -o %s",
1,
Arrays.asList("69852222a8c9c9e1604808b62df96f8a"));
executeTest("test spanning deletion", spec);
}
@DataProvider(name = "iupacSsample")
public Object[][] getIupacSampleData() {
return new Object[][]{
{"NA1", "b5d95e28263c88b20325d7a545576ad4"},
{"NA2", "a8b4b79dea8ad1fde2c0d8ff42ca132d"},
{"NA3", "69852222a8c9c9e1604808b62df96f8a"}
};
}
@Test(dataProvider = "iupacSsample")
void testSpanDelIUPAC(final String sample, final String md5) {
WalkerTestSpec spec = new WalkerTestSpec(
"-T FastaAlternateReferenceMaker -R " + b37KGReference + " --use_IUPAC_sample " + sample + " -V " + privateTestDir + "spanningDel.delOnly.starFirst.vcf -L 1:1273247 -o %s",
1,
Arrays.asList(md5));
executeTest("test spanning deletion using IUPAC codes", spec);
}
} }

View File

@ -42,12 +42,15 @@ import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature; import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
import org.broadinstitute.gatk.utils.help.HelpConstants; import org.broadinstitute.gatk.utils.help.HelpConstants;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContext;
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Set;
import java.util.Optional;
/** /**
@ -122,6 +125,8 @@ public class FastaAlternateReferenceMaker extends FastaReferenceMaker {
private int deletionBasesRemaining = 0; private int deletionBasesRemaining = 0;
private static final String EMPTY_BASE = " ";
@Override @Override
public void initialize() { public void initialize() {
super.initialize(); super.initialize();
@ -159,11 +164,16 @@ public class FastaAlternateReferenceMaker extends FastaReferenceMaker {
deletionBasesRemaining = vc.getReference().length() - 1; deletionBasesRemaining = vc.getReference().length() - 1;
// delete the next n bases, not this one // delete the next n bases, not this one
return new Pair<>(context.getLocation(), refBase); return new Pair<>(context.getLocation(), refBase);
} else if ( vc.isSimpleInsertion()) { } else if ( vc.isSimpleInsertion() || vc.isSNP() ) {
return new Pair<>(context.getLocation(), vc.getAlternateAllele(0).toString()); // Get the first alt allele that is not a spanning deletion. If none present, use the empty allele
} else if (vc.isSNP()) { final Optional<Allele> optionalAllele = getFirstNonSpanDelAltAllele(vc.getAlternateAlleles());
final String base = (iupacSample != null) ? getIUPACbase(vc.getGenotype(iupacSample), refBase) : vc.getAlternateAllele(0).toString(); final Allele allele = optionalAllele.isPresent() ? optionalAllele.get() : Allele.create(EMPTY_BASE, false);
return new Pair<>(context.getLocation(), base); if ( vc.isSimpleInsertion() ) {
return new Pair<>(context.getLocation(), allele.toString());
} else {
final String base = (iupacSample != null) ? getIUPACbase(vc.getGenotype(iupacSample), refBase) : allele.toString();
return new Pair<>(context.getLocation(), base);
}
} }
} }
@ -177,6 +187,21 @@ public class FastaAlternateReferenceMaker extends FastaReferenceMaker {
return new Pair<>(context.getLocation(), refBase); return new Pair<>(context.getLocation(), refBase);
} }
/**
* Get the first non spanning deletion (* or <*:DEL>) alt allele
* @param altAlleles the alternate alleles
* @return the first non spanning deletion allele or null
*/
private Optional<Allele> getFirstNonSpanDelAltAllele( final List<Allele> altAlleles ) {
for (final Allele allele : altAlleles) {
if (!allele.equals(Allele.SPAN_DEL) && !allele.equals(GATKVCFConstants.SPANNING_DELETION_SYMBOLIC_ALLELE_DEPRECATED)) {
return Optional.of(allele);
}
}
return Optional.empty();
}
/** /**
* Mask a SNP (inserting N's in the sequence) * Mask a SNP (inserting N's in the sequence)
* *
@ -199,12 +224,22 @@ public class FastaAlternateReferenceMaker extends FastaReferenceMaker {
* *
* @param genotype the genotype to encode * @param genotype the genotype to encode
* @param ref the reference base * @param ref the reference base
* @return non-null, non-empty String * @return non-null, non-empty String of bases
*/ */
private String getIUPACbase(final Genotype genotype, final String ref) { private String getIUPACbase(final Genotype genotype, final String ref) {
if ( genotype == null ) if ( genotype == null )
throw new IllegalStateException("The genotype is null for sample " + iupacSample); throw new IllegalStateException("The genotype is null for sample " + iupacSample);
// If have a spanning deletion, if both alleles are spanning deletions, use the empty allele. Otherwise, use the allele is not a
// spanning deletion.
if ( genotype.getAlleles().contains(Allele.SPAN_DEL) ) {
if ( genotype.isHomVar() ) {
return EMPTY_BASE;
} else {
return genotype.getAllele(0).equals(Allele.SPAN_DEL) ? genotype.getAllele(1).getBaseString() : genotype.getAllele(0).getBaseString();
}
}
if ( !genotype.isHet() ) if ( !genotype.isHet() )
return genotype.isHom() ? genotype.getAllele(0).getBaseString() : ref; return genotype.isHom() ? genotype.getAllele(0).getBaseString() : ref;