gatk-3.8/java/test/org/broadinstitute/sting/utils/baq/BAQUnitTest.java

128 lines
6.1 KiB
Java
Raw Normal View History

// our package
package org.broadinstitute.sting.utils.baq;
// the imports for unit testing.
import org.testng.Assert;
import org.testng.annotations.Test;
import org.testng.annotations.DataProvider;
import org.testng.annotations.BeforeMethod;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.walkers.qc.ValidateBAQWalker;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.sam.ArtificialSAMFileWriter;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import java.io.File;
import java.util.Arrays;
import java.util.List;
import java.util.ArrayList;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.picard.reference.ReferenceSequence;
import net.sf.samtools.SAMSequenceRecord;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMFileHeader;
/**
* Basic unit test for GenomeLoc
*/
public class BAQUnitTest extends BaseTest {
private SAMFileHeader header;
private final int startChr = 1;
private final int numChr = 2;
private final int chrSize = 1000;
@BeforeMethod
public void before() {
header = ArtificialSAMUtils.createArtificialSamHeader(numChr, startChr, chrSize);
}
@DataProvider(name = "data")
public Object[][] createData1() {
List<Object[]> params = new ArrayList<Object[]>();
params.add(new Object[]{0,
"GCTGCTCCTGGTACTGCTGGATGAGGGCCTCGATGAAGCTAAGCTTTTTCTCCTGCTCCTGCGTGATCCGCTGCAG",
"GCTGCTCCTGGTACTGCTGGATGAGGGCCTCGATGAAGCTAAGCTTTTCCTCCTGCTCCTGCGTGATCCGCTGCAG", null,
"?BACCBDDDFFBCFFHHFIHFEIFHIGHHGHBFEIFGIIGEGIIHGGGIHHIIHIIHIIHGICCIGEII@IGIHCG",
"?BACCBDDDFFBCFFHHFIHFEIFHIGHHGHBFEIFGIIGEGII410..0HIIHIIHIIHGICCIGEII@IGIHCE"});
params.add(new Object[]{0,
"GCTTTTTCTCCT",
"GCTTTTCCTCCT", null,
"IIHGGGIHHIIH",
"DI410..0HIID"});
// params.add(new Object[]{"GTTTTTTG",
// "GTTCCTTG",
// "IIIIIIII",
// "III!!III"});
// big and complex, also does a cap from 3 to 4!
params.add(new Object[]{-3,
"AAATTCAAGATTTCAAAGGCTCTTAACTGCTCAAGATAATTTTTTTTTTTTGAGACAGAGTCTTGCTGTGTTGCCCAGGCTGGAGTGCAGTGGCGTGATCTTGGCTCACTGCAAGCTCCGCCTCCCGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCACCCACCACCACGCCTGGCCAATTTTTTTGTATTTTTAGTAGAGATAG",
"TTCAAGATTTCAAAGGCTCTTAACTGCTCAAGATAATTTTTTTTTTTTGTAGACAGAGTCTTGCTGTGTTGCCCAGGCTGGAGTGCAGTGGCGTGATCTTGGCTCACTGCAAGCTCCGCCTCCCGGGTTCACGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACTACAGGCCACCCACCACCACGCCTGGCCTAATTTTTTTGTATTTTTAGTAGAGA", "49M1I126M1I20M1I25M",
">IHFECEBDBBCBCABABAADBD?AABBACEABABC?>?B>@A@@>A?B3BBC?CBDBAABBBBBAABAABBABDACCCBCDAACBCBABBB:ABDBACBBDCCCCABCDCCBCC@@;?<B@BC;CBBBAB=;A>ACBABBBABBCA@@<?>>AAA<CA@AABBABCC?BB8@<@C<>5;<A5=A;>=64>???B>=6497<<;;<;>2?>BA@??A6<<A59",
">EHFECEBDBBCBCABABAADBD?AABBACEABABC?>?B>@A@@>A?838BC?CBDBAABBBBBAABAABBABDACCCBCDAACBCBABBB:ABDBACBBDCCCCABCDCCBCC@@;?<B@BC;CBBBAB=;A>ACBABBBABBCA@@<?>>AAA<CA@AABBABCC?BB8@<@%<>5;<A5=A;>=64>???B;86497<<;;<;>2?>BA@??A6<<A59"});
// now changes
params.add(new Object[]{-3,
"CCGAGTAGCTGGGACTACAGGCACCCACCACCACGCCTGGCC",
"AGTAGCTGGGACTACAGGCACCCACCACCACGCCTG", null,
"A?>>@>AA?@@>A?>A@?>@>>?=>?'>?=>7=?A9",
"A?>>@>AA?@@>A?>A@?>@>>?=>?'>?=>7=?A9"});
// raw base qualities are low -- but they shouldn't be capped
params.add(new Object[]{-3,
"CCACCACGCCTGGCCAATTTTTTTGTATTTTTAGTAGAGATA",
"CCACGCTTGGCAAAGTTTTCCGTACGTTTAGCCGAG", null,
"33'/(7+270&4),(&&-)$&,%7$',-/61(,6?8",
"33'/(7+270&4),(&&-)$&,%7$',-/61(,6?8"});
// // capping test
// params.add(new Object[]{-3,
// "TTTAGGGCTAACTATGACATGAACCCCAAAA",
// "AGGGCTAACTATGACATGAACCCCA", null,
// "!CDDEEEDDDCEEEECBCA@A@.0'",
// "!%%%%&)DDDCEEEECBCA@A@.0'"});
return params.toArray(new Object[][]{});
}
//
// todo -- add cigar strings to test, and create synthetic reads
//
@Test(dataProvider = "data", enabled = true)
public void testBAQ1(int offset, String _ref, String _query, String cigar, String _fastqQuals, String _expected) {
byte[] ref = _ref.getBytes();
byte[] query = _query.getBytes();
byte[] fastqQuals = _fastqQuals.getBytes();
byte[] expected = _expected.getBytes();
if ( cigar == null ) cigar = String.format("%dM", query.length);
BAQ baqHMM = new BAQ(1e-3, 0.1, 7, (byte)4); // matches current samtools parameters
byte[] quals = new byte[fastqQuals.length];
for ( int i = 0; i < fastqQuals.length; i++) {
quals[i] = (byte)(fastqQuals[i] - 33);
expected[i] = (byte)(expected[i] - 33);
}
SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 1, query, quals);
read.setCigarString(cigar);
BAQ.BAQCalculationResult result = baqHMM.calcBAQFromHMM(read, ref, offset);
for (int i = 0; i < quals.length; i++) {
//result.bq[i] = baqHMM.capBaseByBAQ(result.rawQuals[i], result.bq[i], result.state[i], i);
Assert.assertTrue(result.bq[i] >= baqHMM.getMinBaseQual() || expected[i] < baqHMM.getMinBaseQual(), "BQ < min base quality");
Assert.assertEquals(result.bq[i], expected[i], "Did not see the expected BAQ value");
}
ValidateBAQWalker.printQuals(System.out, "in-quals:", quals, false);
ValidateBAQWalker.printQuals(System.out, "bq-quals:", result.bq, false);
}
}