Merge pull request #1480 from broadinstitute/rhl_farm_wrong_ambig_code_star_allele

Assign correct ambiguity code for * allele
This commit is contained in:
Ron Levine 2016-09-22 13:03:04 -04:00 committed by GitHub
commit 6bfdaa37c3
2 changed files with 71 additions and 6 deletions

View File

@ -53,6 +53,7 @@ package org.broadinstitute.gatk.tools.walkers.fasta;
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.Arrays;
@ -176,4 +177,33 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest {
Arrays.asList("8fd887bca9f3949f2c23c3565f7dcc1b"));
executeTest("test iupac", spec);
}
@Test
void testSpanDel() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T FastaAlternateReferenceMaker -R " + b37KGReference + " -V " + privateTestDir + "spanningDel.delOnly.starFirst.vcf -L 1:1273247 -o %s",
1,
Arrays.asList("69852222a8c9c9e1604808b62df96f8a"));
executeTest("test spanning deletion", spec);
}
@DataProvider(name = "iupacSsample")
public Object[][] getIupacSampleData() {
return new Object[][]{
{"NA1", "b5d95e28263c88b20325d7a545576ad4"},
{"NA2", "a8b4b79dea8ad1fde2c0d8ff42ca132d"},
{"NA3", "69852222a8c9c9e1604808b62df96f8a"}
};
}
@Test(dataProvider = "iupacSsample")
void testSpanDelIUPAC(final String sample, final String md5) {
WalkerTestSpec spec = new WalkerTestSpec(
"-T FastaAlternateReferenceMaker -R " + b37KGReference + " --use_IUPAC_sample " + sample + " -V " + privateTestDir + "spanningDel.delOnly.starFirst.vcf -L 1:1273247 -o %s",
1,
Arrays.asList(md5));
executeTest("test spanning deletion using IUPAC codes", spec);
}
}

View File

@ -42,12 +42,15 @@ import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
import org.broadinstitute.gatk.utils.help.HelpConstants;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
import org.broadinstitute.gatk.utils.variant.GATKVCFConstants;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.Optional;
/**
@ -122,6 +125,8 @@ public class FastaAlternateReferenceMaker extends FastaReferenceMaker {
private int deletionBasesRemaining = 0;
private static final String EMPTY_BASE = " ";
@Override
public void initialize() {
super.initialize();
@ -159,11 +164,16 @@ public class FastaAlternateReferenceMaker extends FastaReferenceMaker {
deletionBasesRemaining = vc.getReference().length() - 1;
// delete the next n bases, not this one
return new Pair<>(context.getLocation(), refBase);
} else if ( vc.isSimpleInsertion()) {
return new Pair<>(context.getLocation(), vc.getAlternateAllele(0).toString());
} else if (vc.isSNP()) {
final String base = (iupacSample != null) ? getIUPACbase(vc.getGenotype(iupacSample), refBase) : vc.getAlternateAllele(0).toString();
return new Pair<>(context.getLocation(), base);
} else if ( vc.isSimpleInsertion() || vc.isSNP() ) {
// Get the first alt allele that is not a spanning deletion. If none present, use the empty allele
final Optional<Allele> optionalAllele = getFirstNonSpanDelAltAllele(vc.getAlternateAlleles());
final Allele allele = optionalAllele.isPresent() ? optionalAllele.get() : Allele.create(EMPTY_BASE, false);
if ( vc.isSimpleInsertion() ) {
return new Pair<>(context.getLocation(), allele.toString());
} else {
final String base = (iupacSample != null) ? getIUPACbase(vc.getGenotype(iupacSample), refBase) : allele.toString();
return new Pair<>(context.getLocation(), base);
}
}
}
@ -177,6 +187,21 @@ public class FastaAlternateReferenceMaker extends FastaReferenceMaker {
return new Pair<>(context.getLocation(), refBase);
}
/**
* Get the first non spanning deletion (* or <*:DEL>) alt allele
* @param altAlleles the alternate alleles
* @return the first non spanning deletion allele or null
*/
private Optional<Allele> getFirstNonSpanDelAltAllele( final List<Allele> altAlleles ) {
for (final Allele allele : altAlleles) {
if (!allele.equals(Allele.SPAN_DEL) && !allele.equals(GATKVCFConstants.SPANNING_DELETION_SYMBOLIC_ALLELE_DEPRECATED)) {
return Optional.of(allele);
}
}
return Optional.empty();
}
/**
* Mask a SNP (inserting N's in the sequence)
*
@ -199,12 +224,22 @@ public class FastaAlternateReferenceMaker extends FastaReferenceMaker {
*
* @param genotype the genotype to encode
* @param ref the reference base
* @return non-null, non-empty String
* @return non-null, non-empty String of bases
*/
private String getIUPACbase(final Genotype genotype, final String ref) {
if ( genotype == null )
throw new IllegalStateException("The genotype is null for sample " + iupacSample);
// If have a spanning deletion, if both alleles are spanning deletions, use the empty allele. Otherwise, use the allele is not a
// spanning deletion.
if ( genotype.getAlleles().contains(Allele.SPAN_DEL) ) {
if ( genotype.isHomVar() ) {
return EMPTY_BASE;
} else {
return genotype.getAllele(0).equals(Allele.SPAN_DEL) ? genotype.getAllele(1).getBaseString() : genotype.getAllele(0).getBaseString();
}
}
if ( !genotype.isHet() )
return genotype.isHom() ? genotype.getAllele(0).getBaseString() : ref;