Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
9dbd0ef06a
|
|
@ -168,7 +168,14 @@ public class ReadClipper {
|
||||||
try {
|
try {
|
||||||
GATKSAMRecord clippedRead = (GATKSAMRecord) read.clone();
|
GATKSAMRecord clippedRead = (GATKSAMRecord) read.clone();
|
||||||
for (ClippingOp op : getOps()) {
|
for (ClippingOp op : getOps()) {
|
||||||
clippedRead = op.apply(algorithm, clippedRead);
|
//check if the clipped read can still be clipped in the range requested
|
||||||
|
if (op.start < clippedRead.getReadLength()) {
|
||||||
|
ClippingOp fixedOperation = op;
|
||||||
|
if (op.stop > clippedRead.getReadLength())
|
||||||
|
fixedOperation = new ClippingOp(op.start, clippedRead.getReadLength() - 1);
|
||||||
|
|
||||||
|
clippedRead = fixedOperation.apply(algorithm, clippedRead);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
wasClipped = true;
|
wasClipped = true;
|
||||||
ops.clear();
|
ops.clear();
|
||||||
|
|
|
||||||
|
|
@ -200,6 +200,48 @@ public class ArtificialSAMUtils {
|
||||||
return rec;
|
return rec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create an artificial read based on the parameters
|
||||||
|
*
|
||||||
|
* @param header the SAM header to associate the read with
|
||||||
|
* @param name the name of the read
|
||||||
|
* @param refIndex the reference index, i.e. what chromosome to associate it with
|
||||||
|
* @param alignmentStart where to start the alignment
|
||||||
|
* @param bases the sequence of the read
|
||||||
|
* @param qual the qualities of the read
|
||||||
|
* @param cigar the cigar string of the read
|
||||||
|
*
|
||||||
|
* @return the artificial read
|
||||||
|
*/
|
||||||
|
public static GATKSAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, byte[] bases, byte[] qual, String cigar ) {
|
||||||
|
GATKSAMRecord rec = createArtificialRead(header, name, refIndex, alignmentStart, bases, qual);
|
||||||
|
rec.setCigarString(cigar);
|
||||||
|
return rec;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create an artificial read with the following default parameters :
|
||||||
|
* header:
|
||||||
|
* numberOfChromosomes = 1
|
||||||
|
* startingChromosome = 1
|
||||||
|
* chromosomeSize = 1000000
|
||||||
|
* read:
|
||||||
|
* name = "default_read"
|
||||||
|
* refIndex = 0
|
||||||
|
* alignmentStart = 1
|
||||||
|
*
|
||||||
|
* @param bases the sequence of the read
|
||||||
|
* @param qual the qualities of the read
|
||||||
|
* @param cigar the cigar string of the read
|
||||||
|
*
|
||||||
|
* @return the artificial read
|
||||||
|
*/
|
||||||
|
public static GATKSAMRecord createArtificialRead( byte[] bases, byte[] qual, String cigar ) {
|
||||||
|
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
|
||||||
|
return ArtificialSAMUtils.createArtificialRead(header, "default_read", 0, 1, bases, qual, cigar);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public final static List<GATKSAMRecord> createPair(SAMFileHeader header, String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) {
|
public final static List<GATKSAMRecord> createPair(SAMFileHeader header, String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) {
|
||||||
GATKSAMRecord left = ArtificialSAMUtils.createArtificialRead(header, name, 0, leftStart, readLen);
|
GATKSAMRecord left = ArtificialSAMUtils.createArtificialRead(header, name, 0, leftStart, readLen);
|
||||||
GATKSAMRecord right = ArtificialSAMUtils.createArtificialRead(header, name, 0, rightStart, readLen);
|
GATKSAMRecord right = ArtificialSAMUtils.createArtificialRead(header, name, 0, rightStart, readLen);
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,18 @@ public class ClipReadsTestUtils {
|
||||||
final static String BASES = "ACTG";
|
final static String BASES = "ACTG";
|
||||||
final static String QUALS = "!+5?"; //ASCII values = 33,43,53,63
|
final static String QUALS = "!+5?"; //ASCII values = 33,43,53,63
|
||||||
|
|
||||||
|
public static void assertEqualReads(GATKSAMRecord actual, GATKSAMRecord expected) {
|
||||||
|
// If they're both not empty, test their contents
|
||||||
|
if(!actual.isEmpty() && !expected.isEmpty()) {
|
||||||
|
Assert.assertEquals(actual.getReadBases(), expected.getReadBases());
|
||||||
|
Assert.assertEquals(actual.getBaseQualities(), expected.getBaseQualities());
|
||||||
|
Assert.assertEquals(actual.getCigarString(), expected.getCigarString());
|
||||||
|
}
|
||||||
|
// Otherwise test if they're both empty
|
||||||
|
else
|
||||||
|
Assert.assertEquals(actual.isEmpty(), expected.isEmpty());
|
||||||
|
}
|
||||||
|
|
||||||
public static void testBaseQualCigar(GATKSAMRecord read, byte[] readBases, byte[] baseQuals, String cigar) {
|
public static void testBaseQualCigar(GATKSAMRecord read, byte[] readBases, byte[] baseQuals, String cigar) {
|
||||||
// Because quals to char start at 33 for visibility
|
// Because quals to char start at 33 for visibility
|
||||||
baseQuals = subtractToArray(baseQuals, 33);
|
baseQuals = subtractToArray(baseQuals, 33);
|
||||||
|
|
@ -48,7 +60,7 @@ public class ClipReadsTestUtils {
|
||||||
Assert.assertTrue(read.isEmpty());
|
Assert.assertTrue(read.isEmpty());
|
||||||
}
|
}
|
||||||
|
|
||||||
private static byte[] subtractToArray(byte[] array, int n) {
|
public static byte[] subtractToArray(byte[] array, int n) {
|
||||||
if (array == null)
|
if (array == null)
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,7 @@ import net.sf.samtools.CigarElement;
|
||||||
import net.sf.samtools.CigarOperator;
|
import net.sf.samtools.CigarOperator;
|
||||||
import net.sf.samtools.TextCigarCodec;
|
import net.sf.samtools.TextCigarCodec;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
|
|
@ -230,42 +231,25 @@ public class ReadClipperUnitTest extends BaseTest {
|
||||||
|
|
||||||
@Test(enabled = true)
|
@Test(enabled = true)
|
||||||
public void testHardClipLowQualEnds() {
|
public void testHardClipLowQualEnds() {
|
||||||
// Needs a thorough redesign
|
logger.warn("Executing testHardClipLowQualEnds");
|
||||||
logger.warn("Executing testHardClipByReferenceCoordinates");
|
|
||||||
|
|
||||||
//Clip whole read
|
// Testing clipping that ends inside an insertion
|
||||||
Assert.assertEquals(readClipper.hardClipLowQualEnds((byte) 64), new GATKSAMRecord(readClipper.read.getHeader()));
|
final byte[] BASES = {'A','C','G','T','A','C','G','T'};
|
||||||
|
final byte[] QUALS = {2, 2, 2, 2, 20, 20, 20, 2};
|
||||||
|
final String CIGAR = "1S1M5I1S";
|
||||||
|
|
||||||
List<TestParameter> testList = new LinkedList<TestParameter>();
|
final byte[] CLIPPED_BASES = {};
|
||||||
testList.add(new TestParameter(1, -1, 1, 4, "1H3M"));//clip 1 base at start
|
final byte[] CLIPPED_QUALS = {};
|
||||||
testList.add(new TestParameter(11, -1, 2, 4, "2H2M"));//clip 2 bases at start
|
final String CLIPPED_CIGAR = "";
|
||||||
|
|
||||||
for (TestParameter p : testList) {
|
|
||||||
init();
|
|
||||||
//logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar);
|
|
||||||
ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipLowQualEnds((byte) p.inputStart),
|
|
||||||
ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
|
|
||||||
ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
|
|
||||||
p.cigar);
|
|
||||||
}
|
|
||||||
/* todo find a better way to test lowqual tail clipping on both sides
|
|
||||||
// Reverse Quals sequence
|
|
||||||
readClipper.getRead().setBaseQualityString("?5+!"); // 63,53,43,33
|
|
||||||
|
|
||||||
testList = new LinkedList<testParameter>();
|
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(BASES, QUALS, CIGAR);
|
||||||
testList.add(new testParameter(1,-1,0,3,"3M1H"));//clip 1 base at end
|
GATKSAMRecord expected = ArtificialSAMUtils.createArtificialRead(CLIPPED_BASES, CLIPPED_QUALS, CLIPPED_CIGAR);
|
||||||
testList.add(new testParameter(11,-1,0,2,"2M2H"));//clip 2 bases at end
|
|
||||||
|
ReadClipper lowQualClipper = new ReadClipper(read);
|
||||||
|
ClipReadsTestUtils.assertEqualReads(lowQualClipper.hardClipLowQualEnds((byte) 2), expected);
|
||||||
|
|
||||||
|
|
||||||
for ( testParameter p : testList ) {
|
|
||||||
init();
|
|
||||||
readClipper.getRead().setBaseQualityString("?5+!"); // 63,53,43,33
|
|
||||||
//logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar);
|
|
||||||
testBaseQualCigar( readClipper.hardClipLowQualEnds( (byte)p.inputStart ),
|
|
||||||
BASES.substring(p.substringStart,p.substringStop).getBytes(),
|
|
||||||
QUALS.substring(p.substringStart,p.substringStop),
|
|
||||||
p.cigar );
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = false)
|
@Test(enabled = false)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue