Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
9dbd0ef06a
|
|
@ -168,7 +168,14 @@ public class ReadClipper {
|
|||
try {
|
||||
GATKSAMRecord clippedRead = (GATKSAMRecord) read.clone();
|
||||
for (ClippingOp op : getOps()) {
|
||||
clippedRead = op.apply(algorithm, clippedRead);
|
||||
//check if the clipped read can still be clipped in the range requested
|
||||
if (op.start < clippedRead.getReadLength()) {
|
||||
ClippingOp fixedOperation = op;
|
||||
if (op.stop > clippedRead.getReadLength())
|
||||
fixedOperation = new ClippingOp(op.start, clippedRead.getReadLength() - 1);
|
||||
|
||||
clippedRead = fixedOperation.apply(algorithm, clippedRead);
|
||||
}
|
||||
}
|
||||
wasClipped = true;
|
||||
ops.clear();
|
||||
|
|
|
|||
|
|
@ -200,6 +200,48 @@ public class ArtificialSAMUtils {
|
|||
return rec;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an artificial read based on the parameters
|
||||
*
|
||||
* @param header the SAM header to associate the read with
|
||||
* @param name the name of the read
|
||||
* @param refIndex the reference index, i.e. what chromosome to associate it with
|
||||
* @param alignmentStart where to start the alignment
|
||||
* @param bases the sequence of the read
|
||||
* @param qual the qualities of the read
|
||||
* @param cigar the cigar string of the read
|
||||
*
|
||||
* @return the artificial read
|
||||
*/
|
||||
public static GATKSAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, byte[] bases, byte[] qual, String cigar ) {
|
||||
GATKSAMRecord rec = createArtificialRead(header, name, refIndex, alignmentStart, bases, qual);
|
||||
rec.setCigarString(cigar);
|
||||
return rec;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an artificial read with the following default parameters :
|
||||
* header:
|
||||
* numberOfChromosomes = 1
|
||||
* startingChromosome = 1
|
||||
* chromosomeSize = 1000000
|
||||
* read:
|
||||
* name = "default_read"
|
||||
* refIndex = 0
|
||||
* alignmentStart = 1
|
||||
*
|
||||
* @param bases the sequence of the read
|
||||
* @param qual the qualities of the read
|
||||
* @param cigar the cigar string of the read
|
||||
*
|
||||
* @return the artificial read
|
||||
*/
|
||||
public static GATKSAMRecord createArtificialRead( byte[] bases, byte[] qual, String cigar ) {
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
|
||||
return ArtificialSAMUtils.createArtificialRead(header, "default_read", 0, 1, bases, qual, cigar);
|
||||
}
|
||||
|
||||
|
||||
public final static List<GATKSAMRecord> createPair(SAMFileHeader header, String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) {
|
||||
GATKSAMRecord left = ArtificialSAMUtils.createArtificialRead(header, name, 0, leftStart, readLen);
|
||||
GATKSAMRecord right = ArtificialSAMUtils.createArtificialRead(header, name, 0, rightStart, readLen);
|
||||
|
|
|
|||
|
|
@ -24,6 +24,18 @@ public class ClipReadsTestUtils {
|
|||
final static String BASES = "ACTG";
|
||||
final static String QUALS = "!+5?"; //ASCII values = 33,43,53,63
|
||||
|
||||
public static void assertEqualReads(GATKSAMRecord actual, GATKSAMRecord expected) {
|
||||
// If they're both not empty, test their contents
|
||||
if(!actual.isEmpty() && !expected.isEmpty()) {
|
||||
Assert.assertEquals(actual.getReadBases(), expected.getReadBases());
|
||||
Assert.assertEquals(actual.getBaseQualities(), expected.getBaseQualities());
|
||||
Assert.assertEquals(actual.getCigarString(), expected.getCigarString());
|
||||
}
|
||||
// Otherwise test if they're both empty
|
||||
else
|
||||
Assert.assertEquals(actual.isEmpty(), expected.isEmpty());
|
||||
}
|
||||
|
||||
public static void testBaseQualCigar(GATKSAMRecord read, byte[] readBases, byte[] baseQuals, String cigar) {
|
||||
// Because quals to char start at 33 for visibility
|
||||
baseQuals = subtractToArray(baseQuals, 33);
|
||||
|
|
@ -48,7 +60,7 @@ public class ClipReadsTestUtils {
|
|||
Assert.assertTrue(read.isEmpty());
|
||||
}
|
||||
|
||||
private static byte[] subtractToArray(byte[] array, int n) {
|
||||
public static byte[] subtractToArray(byte[] array, int n) {
|
||||
if (array == null)
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ import net.sf.samtools.CigarElement;
|
|||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.TextCigarCodec;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.testng.Assert;
|
||||
|
|
@ -230,42 +231,25 @@ public class ReadClipperUnitTest extends BaseTest {
|
|||
|
||||
@Test(enabled = true)
|
||||
public void testHardClipLowQualEnds() {
|
||||
// Needs a thorough redesign
|
||||
logger.warn("Executing testHardClipByReferenceCoordinates");
|
||||
logger.warn("Executing testHardClipLowQualEnds");
|
||||
|
||||
//Clip whole read
|
||||
Assert.assertEquals(readClipper.hardClipLowQualEnds((byte) 64), new GATKSAMRecord(readClipper.read.getHeader()));
|
||||
// Testing clipping that ends inside an insertion
|
||||
final byte[] BASES = {'A','C','G','T','A','C','G','T'};
|
||||
final byte[] QUALS = {2, 2, 2, 2, 20, 20, 20, 2};
|
||||
final String CIGAR = "1S1M5I1S";
|
||||
|
||||
List<TestParameter> testList = new LinkedList<TestParameter>();
|
||||
testList.add(new TestParameter(1, -1, 1, 4, "1H3M"));//clip 1 base at start
|
||||
testList.add(new TestParameter(11, -1, 2, 4, "2H2M"));//clip 2 bases at start
|
||||
final byte[] CLIPPED_BASES = {};
|
||||
final byte[] CLIPPED_QUALS = {};
|
||||
final String CLIPPED_CIGAR = "";
|
||||
|
||||
for (TestParameter p : testList) {
|
||||
init();
|
||||
//logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar);
|
||||
ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipLowQualEnds((byte) p.inputStart),
|
||||
ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
|
||||
ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
|
||||
p.cigar);
|
||||
}
|
||||
/* todo find a better way to test lowqual tail clipping on both sides
|
||||
// Reverse Quals sequence
|
||||
readClipper.getRead().setBaseQualityString("?5+!"); // 63,53,43,33
|
||||
|
||||
testList = new LinkedList<testParameter>();
|
||||
testList.add(new testParameter(1,-1,0,3,"3M1H"));//clip 1 base at end
|
||||
testList.add(new testParameter(11,-1,0,2,"2M2H"));//clip 2 bases at end
|
||||
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(BASES, QUALS, CIGAR);
|
||||
GATKSAMRecord expected = ArtificialSAMUtils.createArtificialRead(CLIPPED_BASES, CLIPPED_QUALS, CLIPPED_CIGAR);
|
||||
|
||||
ReadClipper lowQualClipper = new ReadClipper(read);
|
||||
ClipReadsTestUtils.assertEqualReads(lowQualClipper.hardClipLowQualEnds((byte) 2), expected);
|
||||
|
||||
|
||||
for ( testParameter p : testList ) {
|
||||
init();
|
||||
readClipper.getRead().setBaseQualityString("?5+!"); // 63,53,43,33
|
||||
//logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar);
|
||||
testBaseQualCigar( readClipper.hardClipLowQualEnds( (byte)p.inputStart ),
|
||||
BASES.substring(p.substringStart,p.substringStop).getBytes(),
|
||||
QUALS.substring(p.substringStart,p.substringStop),
|
||||
p.cigar );
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
|
|
|
|||
Loading…
Reference in New Issue