// our package package org.broadinstitute.sting.utils.baq; // the imports for unit testing. import org.testng.Assert; import org.testng.annotations.Test; import org.testng.annotations.DataProvider; import org.testng.annotations.BeforeMethod; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.walkers.qc.ValidateBAQWalker; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.sam.ArtificialSAMFileWriter; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.broadinstitute.sting.utils.Utils; import java.io.File; import java.util.Arrays; import java.util.List; import java.util.ArrayList; import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.picard.reference.ReferenceSequence; import net.sf.samtools.*; /** * Basic unit test for GenomeLoc */ public class BAQUnitTest extends BaseTest { private SAMFileHeader header; private final int startChr = 1; private final int numChr = 2; private final int chrSize = 1000; IndexedFastaSequenceFile fasta = null; @BeforeMethod public void before() { header = ArtificialSAMUtils.createArtificialSamHeader(numChr, startChr, chrSize); fasta = new IndexedFastaSequenceFile(new File(hg18Reference)); } private class BAQTest { String readBases, refBases; byte[] quals, expected; String cigar; int refOffset; int pos; public BAQTest(String _refBases, String _readBases, String _quals, String _expected) { this(0, -1, null, _readBases, _refBases, _quals, _expected); } public BAQTest(int refOffset, String _refBases, String _readBases, String _quals, String _expected) { this(refOffset, -1, null, _refBases, _readBases, _quals, _expected); } public BAQTest(long pos, String cigar, String _readBases, String _quals, String _expected) { this(0, pos, cigar, null, _readBases, _quals, _expected); } public BAQTest(int _refOffset, long _pos, String _cigar, String _refBases, String _readBases, String _quals, String _expected) { refOffset = _refOffset; pos = (int)_pos; cigar = _cigar; readBases = _readBases; refBases = _refBases; quals = new byte[_quals.getBytes().length]; expected = new byte[_quals.getBytes().length]; for ( int i = 0; i < quals.length; i++) { quals[i] = (byte)(_quals.getBytes()[i] - 33); expected[i] = (byte)(_expected.getBytes()[i] - 33); } } public String toString() { return readBases; } public SAMRecord createRead() { SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, pos > 0 ? pos + (refOffset > 0 ? refOffset : 0): 1, readBases.getBytes(), quals); //if ( cigar != null ) read.setAlignmentEnd(readBases.getBytes().length + pos); read.setCigarString( cigar == null ? String.format("%dM", quals.length) : cigar); return read; } } @DataProvider(name = "data") public Object[][] createData1() { List params = new ArrayList(); params.add(new BAQTest("GCTGCTCCTGGTACTGCTGGATGAGGGCCTCGATGAAGCTAAGCTTTTTCTCCTGCTCCTGCGTGATCCGCTGCAG", "GCTGCTCCTGGTACTGCTGGATGAGGGCCTCGATGAAGCTAAGCTTTTCCTCCTGCTCCTGCGTGATCCGCTGCAG", "?BACCBDDDFFBCFFHHFIHFEIFHIGHHGHBFEIFGIIGEGIIHGGGIHHIIHIIHIIHGICCIGEII@IGIHCG", "?BACCBDDDFFBCFFHHFIHFEIFHIGHHGHBFEIFGIIGEGII410..0HIIHIIHIIHGICCIGEII@IGIHCE")); params.add(new BAQTest("GCTTTTTCTCCTCCTG", "GCTTTTCCTCCTCCTG", "IIHGGGIHHIIHHIIH", "EI410..0HIIHHIIE")); // big and complex, also does a cap from 3 to 4! params.add(new BAQTest(-3, 9999810l, "49M1I126M1I20M1I25M", "AAATTCAAGATTTCAAAGGCTCTTAACTGCTCAAGATAATTTTTTTTTTTTGAGACAGAGTCTTGCTGTGTTGCCCAGGCTGGAGTGCAGTGGCGTGATCTTGGCTCACTGCAAGCTCCGCCTCCCGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCACCCACCACCACGCCTGGCCAATTTTTTTGTATTTTTAGTAGAGATAG", "TTCAAGATTTCAAAGGCTCTTAACTGCTCAAGATAATTTTTTTTTTTTGTAGACAGAGTCTTGCTGTGTTGCCCAGGCTGGAGTGCAGTGGCGTGATCTTGGCTCACTGCAAGCTCCGCCTCCCGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCCACCCACCACCACGCCTGGCCTAATTTTTTTGTATTTTTAGTAGAGA", ">IHFECEBDBBCBCABABAADBD?AABBACEABABC?>?B>@A@@>A?B3BBC?CBDBAABBBBBAABAABBABDACCCBCDAACBCBABBB:ABDBACBBDCCCCABCDCCBCC@@;?ACBABBBABBCA@@>AAA5;=64>???B>=6497<<;;<;>2?>BA@??A6<EHFECEBDBBCBCABABAADBD?AABBACEABABC?>?B>@A@@>A?838BC?CBDBAABBBBBAABAABBABDACCCBCDAACBCBABBB:ABDBACBBDCCCCABCDCCBCC@@;?ACBABBBABBCA@@>AAA5;=64>???B;86497<<;;<;>2?>BA@??A6<>@>AA?@@>A?>A@?>@>>?=>?'>?=>7=?A9", "A?>>@>AA?@@>A?>A@?>@>>?=>?'>?=>7=?A9")); // raw base qualities are low -- but they shouldn't be capped params.add(new BAQTest(-3, 9999993l, "36M", "CCACCACGCCTGGCCAATTTTTTTGTATTTTTAGTAGAGATA", "CCACGCTTGGCAAAGTTTTCCGTACGTTTAGCCGAG", "33'/(7+270&4),(&&-)$&,%7$',-/61(,6?8", "33'/(7+270&4),(&&-)$&,%7$',-/61(,6?8")); // soft clipping // todo soft clip testing just doesn't work right now! // params.add(new BAQTest(29, 10000109l, "29S190M", // null, "GAAGGTTGAATCAAACCTTCGGTTCCAACGGATTACAGGTGTGAGCCACCGCGACCGGCCTGCTCAAGATAATTTTTAGGGCTAACTATGACATGAACCCCAAAATTCCTGTCCTCTAGATGGCAGAAACCAAGATAAAGTATCCCCACATGGCCACAAGGTTAAGCTCTTATGGACACAAAACAAGGCAGAGAAATGTCATTTGGCATTGGTTTCAGG", // "3737088:858278273772:3<=;:?;5=9@>@?>@=<>8?>@=>>?>4=5>?=5====A==@?A@=@6@A>@A?>?AA>@?AA>A?>==?AAA@@A>=A>A=?A>AA==@A?AA?>?AA?A@@C@:?A@<;::??AA==>@@?BB=>A>A?AB=???@?BBA@?BA==?A>A?BB=A:@?ABAB>>?ABB>8A@BAIGA", // "3737088:858278273772:3<=;:?;5=9@>@?>@=<>8?>@=>>?>4=5>?=5====A==@?A@=@6@A>@A?>?AA>@?AA>A?>==?AAA@@A>=A>A=?A>AA==@A?AA?>?AA?A@@C@:?A@<;::??AA==>@@?BB=>A>A?AB=???@?BBA@?BA==?A>A?BB=A:@?ABAB>>?ABB>8A@BAI>;")); // params.add(new BAQTest(30, 10000373l, "30S69M1D2M", // null, "TGAAATCCTGCCTTATAGTTCCCCTAAACCCACGTTCTATCCCCAGATACTCCCCTCTTCATTACAGAACAACAAAGAAAGACAAATTCTTAGCATCAATG", // "###############################=89>B;6<;96*>.1799>++66=:=:8=<-.9>><;9<':-+;*+::=;8=;;.::<:;=/2=70<=?-", // "###############################=89>B;6<;96*>.1799>++66=:=:8=<-.9>><;9<':-+;*+::=;8=;;.::<:;=/2=7000%%")); // params.add(new BAQTest(5, 10000109l, "5S5M", // "GAAGGTTGAA", // null, // "HHHHHHHHHH", // "HHHHHHHHHE")); // params.add(new BAQTest(10009480l, "102M1I18M1I16M1I43M1I10M1D9M1I7M1I7M1I16M1I9M1I8M1I14M2I18M", // "AGAGATGGGGTTTCGCCATGTTGTCCAGGCTGGTCTTGAACTCCTGACCTCAAGTGATCTGCCCACCTCGGCCTCCCAAAGTGCTGGGATTACACGTGTGAAACCACCATGCCTGGTCTCTTAATTTTTCNGATTCTAATAAAATTACATTCTATTTGCTGAAAGNGTACTTTAGAGTTGAAAGAAAAAGAAAGGNGTGGAACTTCCCCTAGTAAACAAGGAAAAACNTCCATGTTATTTATTGGACCTTAAAAATAGTGAAACATCTTAAGAAAAAAAATCAATCCTA", // "@HI@BA7>=AA>9@==??C???@?>:?BB@BA>B?=A@@<=B?AB???@@@@@?=?A==B@7<==>=<=>???>=@@A?<=B:5?413577/675;><;==@=<>>968;6;>????:#;=?>:3072077726/6;3719;9A=9;774771#30532676??=8::97<7144448/4425#65688821515986255/5601548355551#218>96/5/8<4/.2344/914/55553)1047;:30312:4:63556565631=:62610", // "@HI@BA7>=AA>9@==??C???@?>:?BB@BA>B?=A@@<=B?AB???@@@@@?=?A==B@7<==>=<=>???>=@@A?<=B:5?413&!7/675;><;==@=<>>96!;6;>????:#;=?>:3!72077726/6;3719;9A=9;774771#30532676??=8::&!<7144448'$!25#65687421515986255/560!548355551#218>96!5/8<4/.2344/614(%!!53)1047;:30312:4:63556565631=:62610")); List params2 = new ArrayList(); for ( BAQTest x : params ) params2.add(new Object[]{x}); return params2.toArray(new Object[][]{}); } @Test(dataProvider = "data", enabled = true) public void testBAQWithProvidedReference(BAQTest test) { if ( test.refBases != null ) { testBAQ(test, false); } } @Test(dataProvider = "data", enabled = true) public void testBAQWithCigarAndRefLookup(BAQTest test) { if ( test.cigar != null ) { testBAQ(test, true); } } public void testBAQ(BAQTest test, boolean lookupWithFasta) { BAQ baqHMM = new BAQ(1e-3, 0.1, 7, (byte)4); // matches current samtools parameters SAMRecord read = test.createRead(); BAQ.BAQCalculationResult result; if ( lookupWithFasta && test.cigar != null ) result = baqHMM.calcBAQFromHMM(read, fasta); else result = baqHMM.calcBAQFromHMM(read, test.refBases.getBytes(), test.refOffset); System.out.println(Utils.dupString('-', 40)); System.out.println("reads : " + new String(test.readBases)); ValidateBAQWalker.printQuals(System.out, "in-quals:", test.quals, false); ValidateBAQWalker.printQuals(System.out, "bq-quals:", result.bq, false); for (int i = 0; i < test.quals.length; i++) { //result.bq[i] = baqHMM.capBaseByBAQ(result.rawQuals[i], result.bq[i], result.state[i], i); Assert.assertTrue(result.bq[i] >= baqHMM.getMinBaseQual() || test.expected[i] < baqHMM.getMinBaseQual(), "BQ < min base quality"); Assert.assertEquals(result.bq[i], test.expected[i], "Did not see the expected BAQ value at " + i); } } }