The CachingIndexedFastaSequenceFile now automatically converts IUPAC bases to Ns and errors out on other non-standard bases.

This way walkers won't see anything except the standard bases plus Ns in the reference.
Added option to turn off this feature (to maintain backwards compatibility).

As part of this commit I cleaned up the BaseUtils code by adding a Base enum and removing all of the static indexes for
each of the bases.  This uncovered a bug in the way the DepthOfCoverage walker counts deletions (it was counting Ns instead!) that isn't covered by tests.  Fortunately that walker is being deprecated soon...
This commit is contained in:
Eric Banks 2013-01-16 10:22:43 -05:00
parent 4fb3e48099
commit 392b5cbcdf
13 changed files with 153 additions and 92 deletions

View File

@ -95,9 +95,9 @@ public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnota
for ( byte base : ref.getBases() ) {
int baseIndex = BaseUtils.simpleBaseToBaseIndex(base);
if ( baseIndex == BaseUtils.gIndex || baseIndex == BaseUtils.cIndex )
if ( baseIndex == BaseUtils.Base.G.ordinal() || baseIndex == BaseUtils.Base.C.ordinal() )
gc++;
else if ( baseIndex == BaseUtils.aIndex || baseIndex == BaseUtils.tIndex )
else if ( baseIndex == BaseUtils.Base.A.ordinal() || baseIndex == BaseUtils.Base.T.ordinal() )
at++;
else
; // ignore

View File

@ -938,7 +938,7 @@ public class DepthOfCoverage extends LocusWalker<Map<DoCOutputType.Partition,Map
int nbases = 0;
for ( byte b : BaseUtils.EXTENDED_BASES ) {
nbases++;
if ( includeDeletions || b != BaseUtils.D ) {
if ( includeDeletions || b != BaseUtils.Base.D.base ) {
s.append((char)b);
s.append(":");
s.append(counts[BaseUtils.extendedBaseToBaseIndex(b)]);

View File

@ -223,7 +223,7 @@ public class DepthOfCoverageStats {
int total = 0;
int[] counts = countsBySample.get(s);
for ( byte base : BaseUtils.EXTENDED_BASES ) {
if ( includeDeletions || ! ( base == BaseUtils.D) ) { // note basesAreEqual assigns TRUE to (N,D) as both have simple index -1
if ( includeDeletions || ! ( base == BaseUtils.Base.D.base) ) { // note basesAreEqual assigns TRUE to (N,D) as both have simple index -1
total += counts[BaseUtils.extendedBaseToBaseIndex(base)];
}
}

View File

@ -376,7 +376,7 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
if ( lowerCaseSNPs ) {
sequence.append(Character.toLowerCase((char) ref.getBase()));
} else {
sequence.append((char) BaseUtils.N);
sequence.append((char) BaseUtils.Base.N.base);
}
rawSequence.append(Character.toUpperCase((char) ref.getBase()));

View File

@ -111,8 +111,8 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
private Pair<VariantContext,VariantContext> getData1() {
Allele reference_A = Allele.create(BaseUtils.A,true);
Allele alt_C = Allele.create(BaseUtils.C);
Allele reference_A = Allele.create(BaseUtils.Base.A.base,true);
Allele alt_C = Allele.create(BaseUtils.Base.C.base);
Genotype sam_1_1_eval = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A));
Genotype sam_1_2_eval = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,alt_C));
@ -160,9 +160,9 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
private Pair<VariantContext,VariantContext> getData2() {
Allele reference_A = Allele.create(BaseUtils.A,true);
Allele alt_C = Allele.create(BaseUtils.C);
Allele alt_T = Allele.create(BaseUtils.T);
Allele reference_A = Allele.create(BaseUtils.Base.A.base,true);
Allele alt_C = Allele.create(BaseUtils.Base.C.base);
Allele alt_T = Allele.create(BaseUtils.Base.T.base);
Genotype sam_1_1_eval = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A));
Genotype sam_1_2_eval = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,alt_T));
@ -213,10 +213,10 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
private Pair<VariantContext,VariantContext> getData3() {
Allele reference_ACT = Allele.create(new byte[]{BaseUtils.A,BaseUtils.C,BaseUtils.T},true);
Allele alt_AC = Allele.create(new byte[]{BaseUtils.A,BaseUtils.C});
Allele alt_A = Allele.create(BaseUtils.A);
Allele alt_ATT = Allele.create(new byte[]{BaseUtils.A,BaseUtils.T,BaseUtils.T});
Allele reference_ACT = Allele.create(new byte[]{BaseUtils.Base.A.base,BaseUtils.Base.C.base,BaseUtils.Base.T.base},true);
Allele alt_AC = Allele.create(new byte[]{BaseUtils.Base.A.base,BaseUtils.Base.C.base});
Allele alt_A = Allele.create(BaseUtils.Base.A.base);
Allele alt_ATT = Allele.create(new byte[]{BaseUtils.Base.A.base,BaseUtils.Base.T.base,BaseUtils.Base.T.base});
Genotype sam_1_1_eval = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_ACT,alt_ATT));
Genotype sam_1_2_eval = GenotypeBuilder.create("test1_sample2", Arrays.asList(alt_A,alt_A));
@ -267,9 +267,9 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
private Pair<VariantContext,VariantContext> getData4() {
Allele reference_A = Allele.create(BaseUtils.A,true);
Allele alt_C = Allele.create(BaseUtils.C);
Allele alt_T = Allele.create(BaseUtils.T);
Allele reference_A = Allele.create(BaseUtils.Base.A.base,true);
Allele alt_C = Allele.create(BaseUtils.Base.C.base);
Allele alt_T = Allele.create(BaseUtils.Base.T.base);
Genotype sam_1_1_eval = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A));
Genotype sam_1_2_eval = GenotypeBuilder.create("test1_sample2", Arrays.asList(Allele.NO_CALL,Allele.NO_CALL));
@ -316,9 +316,9 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
private Pair<VariantContext,VariantContext> getData5() {
Allele reference_A = Allele.create(BaseUtils.A,true);
Allele alt_C = Allele.create(BaseUtils.C);
Allele alt_T = Allele.create(BaseUtils.T);
Allele reference_A = Allele.create(BaseUtils.Base.A.base,true);
Allele alt_C = Allele.create(BaseUtils.Base.C.base);
Allele alt_T = Allele.create(BaseUtils.Base.T.base);
Genotype sam_1_1_eval = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A));
Genotype sam_1_2_eval = GenotypeBuilder.create("test1_sample2", new ArrayList<Allele>(0));
@ -368,8 +368,8 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
private List<Pair<VariantContext,VariantContext>> getData6() {
Allele reference_A = Allele.create(BaseUtils.A,true);
Allele alt_C = Allele.create(BaseUtils.C);
Allele reference_A = Allele.create(BaseUtils.Base.A.base,true);
Allele alt_C = Allele.create(BaseUtils.Base.C.base);
// site 1 -
@ -396,8 +396,8 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
Pair<VariantContext,VariantContext> testDataSite1 = new Pair<VariantContext, VariantContext>(eval_1_builder.make(),truth_1_builder.make());
reference_A = Allele.create(BaseUtils.A,true);
Allele alt_T = Allele.create(BaseUtils.T);
reference_A = Allele.create(BaseUtils.Base.A.base,true);
Allele alt_T = Allele.create(BaseUtils.Base.T.base);
// site 2 -
// sample 1: no-call/hom-ref
@ -421,7 +421,7 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
Pair<VariantContext,VariantContext> testDataSite2 = new Pair<VariantContext, VariantContext>(eval_1_builder.make(),truth_1_builder.make());
Allele alt_G = Allele.create(BaseUtils.G);
Allele alt_G = Allele.create(BaseUtils.Base.G.base);
// site 3 -
// sample 1: alleles do not match
@ -605,10 +605,10 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
public List<Pair<VariantContext,VariantContext>> getData7() {
Allele ref1 = Allele.create(BaseUtils.T,true);
Allele alt1 = Allele.create(BaseUtils.C);
Allele alt2 = Allele.create(BaseUtils.G);
Allele alt3 = Allele.create(BaseUtils.A);
Allele ref1 = Allele.create(BaseUtils.Base.T.base,true);
Allele alt1 = Allele.create(BaseUtils.Base.C.base);
Allele alt2 = Allele.create(BaseUtils.Base.G.base);
Allele alt3 = Allele.create(BaseUtils.Base.A.base);
GenomeLoc loc1 = genomeLocParser.createGenomeLoc("chr1",1,1);
VariantContextBuilder site1Eval = new VariantContextBuilder();

View File

@ -217,9 +217,9 @@ public class CoverageUtils {
private static void updateCounts(int[] counts, PileupElement e) {
if ( e.isDeletion() ) {
counts[BaseUtils.DELETION_INDEX] += e.getRepresentativeCount();
} else if ( BaseUtils.basesAreEqual((byte) 'N', e.getBase()) ) {
counts[BaseUtils.NO_CALL_INDEX] += e.getRepresentativeCount();
counts[BaseUtils.Base.D.ordinal()] += e.getRepresentativeCount();
} else if ( BaseUtils.basesAreEqual(BaseUtils.Base.N.base, e.getBase()) ) {
counts[BaseUtils.Base.N.ordinal()] += e.getRepresentativeCount();
} else {
try {
counts[BaseUtils.simpleBaseToBaseIndex(e.getBase())] += e.getRepresentativeCount();

View File

@ -86,7 +86,7 @@ public class GCContentByInterval extends LocusWalker<Long, Long> {
if (tracker == null)
return null;
int baseIndex = ref.getBaseIndex();
return (baseIndex == BaseUtils.gIndex || baseIndex == BaseUtils.cIndex) ? 1L : 0L;
return (baseIndex == BaseUtils.Base.G.ordinal() || baseIndex == BaseUtils.Base.C.ordinal()) ? 1L : 0L;
}
public Long reduce(Long toAdd, Long runningCount) {

View File

@ -33,6 +33,7 @@ import net.sf.samtools.SAMSequenceRecord;
import net.sf.samtools.util.StringUtil;
import org.apache.log4j.Priority;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.variant.utils.BaseUtils;
import java.io.File;
import java.io.FileNotFoundException;
@ -41,9 +42,10 @@ import java.util.Arrays;
/**
* A caching version of the IndexedFastaSequenceFile that avoids going to disk as often as the raw indexer.
*
* Thread-safe! Uses a thread-local cache
* Thread-safe! Uses a thread-local cache.
*
* Automatically upper-cases the bases coming in, unless they the flag preserveCase is explicitly set
* Automatically upper-cases the bases coming in, unless the flag preserveCase is explicitly set.
* Automatically converts IUPAC bases to Ns, unless the flag preserveIUPAC is explicitly set.
*/
public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
protected static final org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(CachingIndexedFastaSequenceFile.class);
@ -64,10 +66,15 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
private final long cacheMissBackup;
/**
* If true, we will preserve the case of the original base in the genome, not
* If true, we will preserve the case of the original base in the genome
*/
private final boolean preserveCase;
/**
* If true, we will preserve the IUPAC bases in the genome
*/
private final boolean preserveIUPAC;
// information about checking efficiency
long cacheHits = 0;
long cacheMisses = 0;
@ -97,13 +104,15 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
* @param index the index of the fasta file, used for efficient random access
* @param cacheSize the size in bp of the cache we will use for this reader
* @param preserveCase If true, we will keep the case of the underlying bases in the FASTA, otherwise everything is converted to upper case
* @param preserveIUPAC If true, we will keep the IUPAC bases in the FASTA, otherwise they are converted to Ns
*/
public CachingIndexedFastaSequenceFile(final File fasta, final FastaSequenceIndex index, final long cacheSize, final boolean preserveCase) {
public CachingIndexedFastaSequenceFile(final File fasta, final FastaSequenceIndex index, final long cacheSize, final boolean preserveCase, final boolean preserveIUPAC) {
super(fasta, index);
if ( cacheSize < 0 ) throw new IllegalArgumentException("cacheSize must be > 0");
this.cacheSize = cacheSize;
this.cacheMissBackup = Math.max(cacheSize / 1000, 1);
this.preserveCase = preserveCase;
this.preserveIUPAC = preserveIUPAC;
}
/**
@ -122,19 +131,9 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
this.cacheSize = cacheSize;
this.cacheMissBackup = Math.max(cacheSize / 1000, 1);
this.preserveCase = preserveCase;
preserveIUPAC = false;
}
// /**
// * Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened.
// *
// * @param fasta The file to open.
// * @param index Pre-built FastaSequenceIndex, for the case in which one does not exist on disk.
// * @throws java.io.FileNotFoundException If the fasta or any of its supporting files cannot be found.
// */
// public CachingIndexedFastaSequenceFile(final File fasta, final FastaSequenceIndex index) {
// this(fasta, index, DEFAULT_CACHE_SIZE);
// }
/**
* Same as general constructor but allows one to override the default cacheSize
*
@ -145,7 +144,7 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
* @param cacheSize the size in bp of the cache we will use for this reader
*/
public CachingIndexedFastaSequenceFile(final File fasta, final FastaSequenceIndex index, final long cacheSize) {
this(fasta, index, cacheSize, false);
this(fasta, index, cacheSize, false, false);
}
/**
@ -240,6 +239,15 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
return ! isPreservingCase();
}
/**
* Is this CachingIndexedFastaReader keeping the IUPAC bases in the fasta, or is it turning them into Ns?
*
* @return true if the IUPAC bases coming from this reader are not modified
*/
public boolean isPreservingIUPAC() {
return preserveIUPAC;
}
/**
* Gets the subsequence of the contig in the range [start,stop]
*
@ -261,8 +269,9 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
cacheMisses++;
result = super.getSubsequenceAt(contig, start, stop);
if ( ! preserveCase ) StringUtil.toUpperCase(result.getBases());
if ( ! preserveIUPAC ) BaseUtils.convertIUPACtoN(result.getBases(), true);
} else {
// todo -- potential optimization is to check if contig.name == contig, as this in generally will be true
// todo -- potential optimization is to check if contig.name == contig, as this in general will be true
SAMSequenceRecord contigInfo = super.getSequenceDictionary().getSequence(contig);
if (stop > contigInfo.getSequenceLength())
@ -276,6 +285,7 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
// convert all of the bases in the sequence to upper case if we aren't preserving cases
if ( ! preserveCase ) StringUtil.toUpperCase(myCache.seq.getBases());
if ( ! preserveIUPAC ) BaseUtils.convertIUPACtoN(myCache.seq.getBases(), true);
} else {
cacheHits++;
}

View File

@ -31,7 +31,6 @@ import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -52,7 +51,7 @@ public class PileupElement implements Comparable<PileupElement> {
private final static EnumSet<CigarOperator> ON_GENOME_OPERATORS =
EnumSet.of(CigarOperator.M, CigarOperator.EQ, CigarOperator.X, CigarOperator.D);
public static final byte DELETION_BASE = BaseUtils.D;
public static final byte DELETION_BASE = BaseUtils.Base.D.base;
public static final byte DELETION_QUAL = (byte) 16;
public static final byte A_FOLLOWED_BY_INSERTION_BASE = (byte) 87;
public static final byte C_FOLLOWED_BY_INSERTION_BASE = (byte) 88;

View File

@ -31,7 +31,6 @@ import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -402,13 +401,13 @@ public class AlignmentUtils {
switch (ce.getOperator()) {
case I:
if (alignPos > 0) {
if (alignment[alignPos - 1] == BaseUtils.A) {
if (alignment[alignPos - 1] == BaseUtils.Base.A.base) {
alignment[alignPos - 1] = PileupElement.A_FOLLOWED_BY_INSERTION_BASE;
} else if (alignment[alignPos - 1] == BaseUtils.C) {
} else if (alignment[alignPos - 1] == BaseUtils.Base.C.base) {
alignment[alignPos - 1] = PileupElement.C_FOLLOWED_BY_INSERTION_BASE;
} else if (alignment[alignPos - 1] == BaseUtils.T) {
} else if (alignment[alignPos - 1] == BaseUtils.Base.T.base) {
alignment[alignPos - 1] = PileupElement.T_FOLLOWED_BY_INSERTION_BASE;
} else if (alignment[alignPos - 1] == BaseUtils.G) {
} else if (alignment[alignPos - 1] == BaseUtils.Base.G.base) {
alignment[alignPos - 1] = PileupElement.G_FOLLOWED_BY_INSERTION_BASE;
}
}

View File

@ -26,6 +26,7 @@
package org.broadinstitute.variant.utils;
import net.sf.samtools.util.StringUtil;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.util.Arrays;
import java.util.Random;
@ -34,42 +35,66 @@ import java.util.Random;
* BaseUtils contains some basic utilities for manipulating nucleotides.
*/
public class BaseUtils {
public final static byte A = (byte) 'A';
public final static byte C = (byte) 'C';
public final static byte G = (byte) 'G';
public final static byte T = (byte) 'T';
public final static byte N = (byte) 'N';
public final static byte D = (byte) 'D';
public enum Base {
A ((byte)'A'),
C ((byte)'C'),
G ((byte)'G'),
T ((byte)'T'),
N ((byte)'N'),
D ((byte)'D');
//
// todo -- we need a generalized base abstraction using the Base enum.
//
public byte base;
private Base(final byte base) {
this.base = base;
}
}
// todo -- add this to the generalized base abstraction using the Base enum.
public final static byte[] BASES = {'A', 'C', 'G', 'T'};
public final static byte[] EXTENDED_BASES = {'A', 'C', 'G', 'T', 'N', 'D'};
static private final int[] baseIndexMap = new int[256];
static {
Arrays.fill(baseIndexMap, -1);
baseIndexMap['A'] = 0;
baseIndexMap['a'] = 0;
baseIndexMap['*'] = 0; // the wildcard character counts as an A
baseIndexMap['C'] = 1;
baseIndexMap['c'] = 1;
baseIndexMap['G'] = 2;
baseIndexMap['g'] = 2;
baseIndexMap['T'] = 3;
baseIndexMap['t'] = 3;
baseIndexMap['A'] = Base.A.ordinal();
baseIndexMap['a'] = Base.A.ordinal();
baseIndexMap['*'] = Base.A.ordinal(); // the wildcard character counts as an A
baseIndexMap['C'] = Base.C.ordinal();
baseIndexMap['c'] = Base.C.ordinal();
baseIndexMap['G'] = Base.G.ordinal();
baseIndexMap['g'] = Base.G.ordinal();
baseIndexMap['T'] = Base.T.ordinal();
baseIndexMap['t'] = Base.T.ordinal();
}
// todo -- fix me (enums?)
public static final byte DELETION_INDEX = 4;
public static final byte NO_CALL_INDEX = 5; // (this is 'N')
public static final int aIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'A');
public static final int cIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'C');
public static final int gIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'G');
public static final int tIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'T');
static private final int[] baseIndexWithIupacMap = baseIndexMap.clone();
static {
baseIndexWithIupacMap['*'] = -1; // the wildcard character is bad
baseIndexWithIupacMap['N'] = Base.N.ordinal();
baseIndexWithIupacMap['n'] = Base.N.ordinal();
baseIndexWithIupacMap['R'] = Base.N.ordinal();
baseIndexWithIupacMap['r'] = Base.N.ordinal();
baseIndexWithIupacMap['Y'] = Base.N.ordinal();
baseIndexWithIupacMap['y'] = Base.N.ordinal();
baseIndexWithIupacMap['M'] = Base.N.ordinal();
baseIndexWithIupacMap['m'] = Base.N.ordinal();
baseIndexWithIupacMap['K'] = Base.N.ordinal();
baseIndexWithIupacMap['k'] = Base.N.ordinal();
baseIndexWithIupacMap['W'] = Base.N.ordinal();
baseIndexWithIupacMap['w'] = Base.N.ordinal();
baseIndexWithIupacMap['S'] = Base.N.ordinal();
baseIndexWithIupacMap['s'] = Base.N.ordinal();
baseIndexWithIupacMap['B'] = Base.N.ordinal();
baseIndexWithIupacMap['b'] = Base.N.ordinal();
baseIndexWithIupacMap['D'] = Base.N.ordinal();
baseIndexWithIupacMap['d'] = Base.N.ordinal();
baseIndexWithIupacMap['H'] = Base.N.ordinal();
baseIndexWithIupacMap['h'] = Base.N.ordinal();
baseIndexWithIupacMap['V'] = Base.N.ordinal();
baseIndexWithIupacMap['v'] = Base.N.ordinal();
}
// Use a fixed random seed to allow for deterministic results when using random bases
private static final Random randomNumberGen = new Random(47382911L);
@ -96,10 +121,10 @@ public class BaseUtils {
}
public static boolean isTransition(byte base1, byte base2) {
int b1 = simpleBaseToBaseIndex(base1);
int b2 = simpleBaseToBaseIndex(base2);
return b1 == 0 && b2 == 2 || b1 == 2 && b2 == 0 ||
b1 == 1 && b2 == 3 || b1 == 3 && b2 == 1;
final int b1 = simpleBaseToBaseIndex(base1);
final int b2 = simpleBaseToBaseIndex(base2);
return b1 == Base.A.ordinal() && b2 == Base.G.ordinal() || b1 == Base.G.ordinal() && b2 == Base.A.ordinal() ||
b1 == Base.C.ordinal() && b2 == Base.T.ordinal() || b1 == Base.T.ordinal() && b2 == Base.C.ordinal();
}
public static boolean isTransversion(byte base1, byte base2) {
@ -141,6 +166,19 @@ public class BaseUtils {
return base >= 'A' && base <= 'Z';
}
public static byte[] convertIUPACtoN(final byte[] bases, final boolean errorOnBadReferenceBase) {
final int length = bases.length;
for ( int i = 0; i < length; i++ ) {
final int baseIndex = baseIndexWithIupacMap[bases[i]];
if ( baseIndex == Base.N.ordinal() ) {
bases[i] = 'N';
} else if ( errorOnBadReferenceBase && baseIndex == -1 ) {
throw new UserException.BadInput("We encountered a non-standard non-IUPAC base in the provided reference: '" + bases[i] + "'");
}
}
return bases;
}
/**
* Converts a IUPAC nucleotide code to a pair of bases
*
@ -231,10 +269,10 @@ public class BaseUtils {
switch (base) {
case 'd':
case 'D':
return DELETION_INDEX;
return Base.D.ordinal();
case 'n':
case 'N':
return NO_CALL_INDEX;
return Base.N.ordinal();
default:
return simpleBaseToBaseIndex(base);

View File

@ -50,6 +50,21 @@ public class BaseUtilsUnitTest extends BaseTest {
Assert.assertTrue(MathUtils.compareDoubles(fraction, expected) == 0);
}
@Test
public void testConvertIUPACtoN() {
checkBytesAreEqual(BaseUtils.convertIUPACtoN(new byte[]{'A', 'A', 'A'}, false), new byte[]{'A', 'A', 'A'});
checkBytesAreEqual(BaseUtils.convertIUPACtoN(new byte[]{'W', 'A', 'A'}, false), new byte[]{'N', 'A', 'A'});
checkBytesAreEqual(BaseUtils.convertIUPACtoN(new byte[]{'A', 'M', 'A'}, false), new byte[]{'A', 'N', 'A'});
checkBytesAreEqual(BaseUtils.convertIUPACtoN(new byte[]{'A', 'A', 'K'}, false), new byte[]{'A', 'A', 'N'});
checkBytesAreEqual(BaseUtils.convertIUPACtoN(new byte[]{'M', 'M', 'M'}, false), new byte[]{'N', 'N', 'N'});
}
private void checkBytesAreEqual(final byte[] b1, final byte[] b2) {
for ( int i = 0; i < b1.length; i++ )
Assert.assertEquals(b1[i], b2[i]);
}
@Test
public void testTransitionTransversion() {
logger.warn("Executing testTransitionTransversion");

View File

@ -154,9 +154,9 @@ public class GenotypeLikelihoodsUnitTest {
public void testGetQualFromLikelihoodsMultiAllelic() {
GenotypeLikelihoods gl = GenotypeLikelihoods.fromLog10Likelihoods(triAllelic);
Allele ref = Allele.create(BaseUtils.A,true);
Allele alt1 = Allele.create(BaseUtils.C);
Allele alt2 = Allele.create(BaseUtils.T);
Allele ref = Allele.create(BaseUtils.Base.A.base,true);
Allele alt1 = Allele.create(BaseUtils.Base.C.base);
Allele alt2 = Allele.create(BaseUtils.Base.T.base);
List<Allele> allAlleles = Arrays.asList(ref,alt1,alt2);
List<Allele> gtAlleles = Arrays.asList(alt1,alt2);
GenotypeBuilder gtBuilder = new GenotypeBuilder();