From 27b1aa5dd3b8ecfc84aa96b810defc4c0aa2d7e9 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Thu, 10 May 2012 10:29:19 -0400 Subject: [PATCH] Don't allow N's in insertions when discovering indels. Maybe better solution will be to use them as wildcards and merge them with compatible regular insertion alleles but for now it's easier to ignore them. Minor refactoring of Allele.accepableAlleleBases to support this. Added unit test to test consensus allele counter in presence of N's --- .../genotyper/ConsensusAlleleCounter.java | 4 ++-- .../sting/utils/variantcontext/Allele.java | 21 +++++++++++++++---- .../org/broadinstitute/sting/BaseTest.java | 2 +- .../IndelGenotypeLikelihoodsUnitTest.java | 15 +++++++++++++ 4 files changed, 35 insertions(+), 7 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java index e64a4f42d..1ddfd7ed1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ConsensusAlleleCounter.java @@ -253,14 +253,14 @@ public class ConsensusAlleleCounter { stop = loc.getStart() + dLen; final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen); - if (Allele.acceptableAlleleBases(refBases)) { + if (Allele.acceptableAlleleBases(refBases, false)) { refAllele = Allele.create(refBases, true); altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false); } else continue; // don't go on with this allele if refBases are non-standard } else { // insertion case - if (Allele.acceptableAlleleBases(s)) { + if (Allele.acceptableAlleleBases(s, false)) { // don't allow N's in insertions refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true); altAllele = Allele.create(s, false); stop = loc.getStart(); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java index 52b4109fe..2ca3e0055 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Allele.java @@ -226,7 +226,11 @@ public class Allele implements Comparable { * @return true if the bases represent the well formatted allele */ public static boolean acceptableAlleleBases(String bases) { - return acceptableAlleleBases(bases.getBytes()); + return acceptableAlleleBases(bases.getBytes(), true); + } + + public static boolean acceptableAlleleBases(String bases, boolean allowNsAsAcceptable) { + return acceptableAlleleBases(bases.getBytes(), allowNsAsAcceptable); } /** @@ -234,13 +238,22 @@ public class Allele implements Comparable { * @return true if the bases represent the well formatted allele */ public static boolean acceptableAlleleBases(byte[] bases) { + return acceptableAlleleBases(bases, true); // default: N bases are acceptable + } + + public static boolean acceptableAlleleBases(byte[] bases, boolean allowNsAsAcceptable) { if ( wouldBeNullAllele(bases) || wouldBeNoCallAllele(bases) || wouldBeSymbolicAllele(bases) ) return true; - for ( int i = 0; i < bases.length; i++ ) { - switch (bases[i]) { - case 'A': case 'C': case 'G': case 'T': case 'N' : case 'a': case 'c': case 'g': case 't': case 'n' : + for (byte base : bases ) { + switch (base) { + case 'A': case 'C': case 'G': case 'T': case 'a': case 'c': case 'g': case 't': break; + case 'N' : case 'n' : + if (allowNsAsAcceptable) + break; + else + return false; default: return false; } diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java index c49adf805..a415481fd 100755 --- a/public/java/test/org/broadinstitute/sting/BaseTest.java +++ b/public/java/test/org/broadinstitute/sting/BaseTest.java @@ -84,7 +84,7 @@ public abstract class BaseTest { public static final String hg19Intervals = intervalsLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list"; public static final String hg19Chr20Intervals = intervalsLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.chr20.interval_list"; - public static final boolean REQUIRE_NETWORK_CONNECTION = true; + public static final boolean REQUIRE_NETWORK_CONNECTION = false; public static final String networkTempDir; public static final File networkTempDirFile; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java index e4c3b8dae..c7ef51d0c 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsUnitTest.java @@ -90,6 +90,21 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest { Assert.assertEquals(alleles.size(),2); alleles = getConsensusAlleles(eventLength,false,10,0.5001, altBases); Assert.assertEquals(alleles.size(),0); + + // test N's in insertions + altBases = "CCTCNTGAGA"; + eventLength = 4; + alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases); + + Assert.assertEquals(alleles.size(),2); + Assert.assertEquals(alleles.get(1).getBaseString(), altBases.substring(0,eventLength)); + + altBases = "CCTCNTGAGA"; + eventLength = 5; + alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases); + + Assert.assertEquals(alleles.size(),0); + } private List getConsensusAlleles(int eventLength, boolean isInsertion, int minCnt, double minFraction, String altBases) {