Don't allow N's in insertions when discovering indels. Maybe better solution will be to use them as wildcards and merge them with compatible regular insertion alleles but for now it's easier to ignore them. Minor refactoring of Allele.accepableAlleleBases to support this. Added unit test to test consensus allele counter in presence of N's
This commit is contained in:
parent
4f37d6d399
commit
27b1aa5dd3
|
|
@ -253,14 +253,14 @@ public class ConsensusAlleleCounter {
|
|||
stop = loc.getStart() + dLen;
|
||||
final byte[] refBases = Arrays.copyOfRange(ref.getBases(), startIdxInReference, startIdxInReference + dLen);
|
||||
|
||||
if (Allele.acceptableAlleleBases(refBases)) {
|
||||
if (Allele.acceptableAlleleBases(refBases, false)) {
|
||||
refAllele = Allele.create(refBases, true);
|
||||
altAllele = Allele.create(Allele.NULL_ALLELE_STRING, false);
|
||||
}
|
||||
else continue; // don't go on with this allele if refBases are non-standard
|
||||
} else {
|
||||
// insertion case
|
||||
if (Allele.acceptableAlleleBases(s)) {
|
||||
if (Allele.acceptableAlleleBases(s, false)) { // don't allow N's in insertions
|
||||
refAllele = Allele.create(Allele.NULL_ALLELE_STRING, true);
|
||||
altAllele = Allele.create(s, false);
|
||||
stop = loc.getStart();
|
||||
|
|
|
|||
|
|
@ -226,7 +226,11 @@ public class Allele implements Comparable<Allele> {
|
|||
* @return true if the bases represent the well formatted allele
|
||||
*/
|
||||
public static boolean acceptableAlleleBases(String bases) {
|
||||
return acceptableAlleleBases(bases.getBytes());
|
||||
return acceptableAlleleBases(bases.getBytes(), true);
|
||||
}
|
||||
|
||||
public static boolean acceptableAlleleBases(String bases, boolean allowNsAsAcceptable) {
|
||||
return acceptableAlleleBases(bases.getBytes(), allowNsAsAcceptable);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -234,13 +238,22 @@ public class Allele implements Comparable<Allele> {
|
|||
* @return true if the bases represent the well formatted allele
|
||||
*/
|
||||
public static boolean acceptableAlleleBases(byte[] bases) {
|
||||
return acceptableAlleleBases(bases, true); // default: N bases are acceptable
|
||||
}
|
||||
|
||||
public static boolean acceptableAlleleBases(byte[] bases, boolean allowNsAsAcceptable) {
|
||||
if ( wouldBeNullAllele(bases) || wouldBeNoCallAllele(bases) || wouldBeSymbolicAllele(bases) )
|
||||
return true;
|
||||
|
||||
for ( int i = 0; i < bases.length; i++ ) {
|
||||
switch (bases[i]) {
|
||||
case 'A': case 'C': case 'G': case 'T': case 'N' : case 'a': case 'c': case 'g': case 't': case 'n' :
|
||||
for (byte base : bases ) {
|
||||
switch (base) {
|
||||
case 'A': case 'C': case 'G': case 'T': case 'a': case 'c': case 'g': case 't':
|
||||
break;
|
||||
case 'N' : case 'n' :
|
||||
if (allowNsAsAcceptable)
|
||||
break;
|
||||
else
|
||||
return false;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ public abstract class BaseTest {
|
|||
public static final String hg19Intervals = intervalsLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list";
|
||||
public static final String hg19Chr20Intervals = intervalsLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.chr20.interval_list";
|
||||
|
||||
public static final boolean REQUIRE_NETWORK_CONNECTION = true;
|
||||
public static final boolean REQUIRE_NETWORK_CONNECTION = false;
|
||||
public static final String networkTempDir;
|
||||
public static final File networkTempDirFile;
|
||||
|
||||
|
|
|
|||
|
|
@ -90,6 +90,21 @@ public class IndelGenotypeLikelihoodsUnitTest extends BaseTest {
|
|||
Assert.assertEquals(alleles.size(),2);
|
||||
alleles = getConsensusAlleles(eventLength,false,10,0.5001, altBases);
|
||||
Assert.assertEquals(alleles.size(),0);
|
||||
|
||||
// test N's in insertions
|
||||
altBases = "CCTCNTGAGA";
|
||||
eventLength = 4;
|
||||
alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases);
|
||||
|
||||
Assert.assertEquals(alleles.size(),2);
|
||||
Assert.assertEquals(alleles.get(1).getBaseString(), altBases.substring(0,eventLength));
|
||||
|
||||
altBases = "CCTCNTGAGA";
|
||||
eventLength = 5;
|
||||
alleles = getConsensusAlleles(eventLength,true,10,0.1, altBases);
|
||||
|
||||
Assert.assertEquals(alleles.size(),0);
|
||||
|
||||
}
|
||||
|
||||
private List<Allele> getConsensusAlleles(int eventLength, boolean isInsertion, int minCnt, double minFraction, String altBases) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue