Don't allow N's in insertions when discovering indels. Maybe better solution will be to use them as wildcards and merge them with compatible regular insertion alleles but for now it's easier to ignore them. Minor refactoring of Allele.accepableAlleleBases to support this. Added unit test to test consensus allele counter in presence of N's

This commit is contained in:
Guillermo del Angel 2012-05-10 10:29:19 -04:00
parent 4f37d6d399
commit 27b1aa5dd3
4 changed files with 35 additions and 7 deletions

View File

@ -253,14 +253,14 @@ public class ConsensusAlleleCounter {
stop = loc.getStart() + dLen;
final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen);
if (Allele.acceptableAlleleBases(refBases)) {
if (Allele.acceptableAlleleBases(refBases, false)) {
refAllele = Allele.create(refBases, true);
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
}
else continue; // don't go on with this allele if refBases are non-standard
} else {
// insertion case
if (Allele.acceptableAlleleBases(s)) {
if (Allele.acceptableAlleleBases(s, false)) { // don't allow N's in insertions
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
altAllele = Allele.create(s, false);
stop = loc.getStart();

View File

@ -226,7 +226,11 @@ public class Allele implements Comparable<Allele> {
* @return true if the bases represent the well formatted allele
*/
public static boolean acceptableAlleleBases(String bases) {
return acceptableAlleleBases(bases.getBytes());
return acceptableAlleleBases(bases.getBytes(), true);
}
public static boolean acceptableAlleleBases(String bases, boolean allowNsAsAcceptable) {
return acceptableAlleleBases(bases.getBytes(), allowNsAsAcceptable);
}
/**
@ -234,13 +238,22 @@ public class Allele implements Comparable<Allele> {
* @return true if the bases represent the well formatted allele
*/
public static boolean acceptableAlleleBases(byte[] bases) {
return acceptableAlleleBases(bases, true); // default: N bases are acceptable
}
public static boolean acceptableAlleleBases(byte[] bases, boolean allowNsAsAcceptable) {
if ( wouldBeNullAllele(bases) || wouldBeNoCallAllele(bases) || wouldBeSymbolicAllele(bases) )
return true;
for ( int i = 0; i < bases.length; i++ ) {
switch (bases[i]) {
case 'A': case 'C': case 'G': case 'T': case 'N' : case 'a': case 'c': case 'g': case 't': case 'n' :
for (byte base : bases ) {
switch (base) {
case 'A': case 'C': case 'G': case 'T': case 'a': case 'c': case 'g': case 't':
break;
case 'N' : case 'n' :
if (allowNsAsAcceptable)
break;
else
return false;
default:
return false;
}

View File

@ -84,7 +84,7 @@ public abstract class BaseTest {
public static final String hg19Intervals = intervalsLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list";
public static final String hg19Chr20Intervals = intervalsLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.chr20.interval_list";
public static final boolean REQUIRE_NETWORK_CONNECTION = true;
public static final boolean REQUIRE_NETWORK_CONNECTION = false;
public static final String networkTempDir;
public static final File networkTempDirFile;

View File

@ -90,6 +90,21 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
Assert.assertEquals(alleles.size(),2);
alleles = getConsensusAlleles(eventLength,false,10,0.5001, altBases);
Assert.assertEquals(alleles.size(),0);
// test N's in insertions
altBases = "CCTCNTGAGA";
eventLength = 4;
alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases);
Assert.assertEquals(alleles.size(),2);
Assert.assertEquals(alleles.get(1).getBaseString(), altBases.substring(0,eventLength));
altBases = "CCTCNTGAGA";
eventLength = 5;
alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases);
Assert.assertEquals(alleles.size(),0);
}
private List<Allele> getConsensusAlleles(int eventLength, boolean isInsertion, int minCnt, double minFraction, String altBases) {