Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Ryan Poplin 2011-12-14 17:11:56 -05:00
commit 9dbd0ef06a
4 changed files with 78 additions and 33 deletions

View File

@ -168,7 +168,14 @@ public class ReadClipper {
try {
GATKSAMRecord clippedRead = (GATKSAMRecord) read.clone();
for (ClippingOp op : getOps()) {
clippedRead = op.apply(algorithm, clippedRead);
//check if the clipped read can still be clipped in the range requested
if (op.start < clippedRead.getReadLength()) {
ClippingOp fixedOperation = op;
if (op.stop > clippedRead.getReadLength())
fixedOperation = new ClippingOp(op.start, clippedRead.getReadLength() - 1);
clippedRead = fixedOperation.apply(algorithm, clippedRead);
}
}
wasClipped = true;
ops.clear();

View File

@ -200,6 +200,48 @@ public class ArtificialSAMUtils {
return rec;
}
/**
* Create an artificial read based on the parameters
*
* @param header the SAM header to associate the read with
* @param name the name of the read
* @param refIndex the reference index, i.e. what chromosome to associate it with
* @param alignmentStart where to start the alignment
* @param bases the sequence of the read
* @param qual the qualities of the read
* @param cigar the cigar string of the read
*
* @return the artificial read
*/
public static GATKSAMRecord createArtificialRead( SAMFileHeader header, String name, int refIndex, int alignmentStart, byte[] bases, byte[] qual, String cigar ) {
GATKSAMRecord rec = createArtificialRead(header, name, refIndex, alignmentStart, bases, qual);
rec.setCigarString(cigar);
return rec;
}
/**
* Create an artificial read with the following default parameters :
* header:
* numberOfChromosomes = 1
* startingChromosome = 1
* chromosomeSize = 1000000
* read:
* name = "default_read"
* refIndex = 0
* alignmentStart = 1
*
* @param bases the sequence of the read
* @param qual the qualities of the read
* @param cigar the cigar string of the read
*
* @return the artificial read
*/
public static GATKSAMRecord createArtificialRead( byte[] bases, byte[] qual, String cigar ) {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
return ArtificialSAMUtils.createArtificialRead(header, "default_read", 0, 1, bases, qual, cigar);
}
public final static List<GATKSAMRecord> createPair(SAMFileHeader header, String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) {
GATKSAMRecord left = ArtificialSAMUtils.createArtificialRead(header, name, 0, leftStart, readLen);
GATKSAMRecord right = ArtificialSAMUtils.createArtificialRead(header, name, 0, rightStart, readLen);

View File

@ -24,6 +24,18 @@ public class ClipReadsTestUtils {
final static String BASES = "ACTG";
final static String QUALS = "!+5?"; //ASCII values = 33,43,53,63
public static void assertEqualReads(GATKSAMRecord actual, GATKSAMRecord expected) {
// If they're both not empty, test their contents
if(!actual.isEmpty() && !expected.isEmpty()) {
Assert.assertEquals(actual.getReadBases(), expected.getReadBases());
Assert.assertEquals(actual.getBaseQualities(), expected.getBaseQualities());
Assert.assertEquals(actual.getCigarString(), expected.getCigarString());
}
// Otherwise test if they're both empty
else
Assert.assertEquals(actual.isEmpty(), expected.isEmpty());
}
public static void testBaseQualCigar(GATKSAMRecord read, byte[] readBases, byte[] baseQuals, String cigar) {
// Because quals to char start at 33 for visibility
baseQuals = subtractToArray(baseQuals, 33);
@ -48,7 +60,7 @@ public class ClipReadsTestUtils {
Assert.assertTrue(read.isEmpty());
}
private static byte[] subtractToArray(byte[] array, int n) {
public static byte[] subtractToArray(byte[] array, int n) {
if (array == null)
return null;

View File

@ -30,6 +30,7 @@ import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.TextCigarCodec;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.testng.Assert;
@ -230,42 +231,25 @@ public class ReadClipperUnitTest extends BaseTest {
@Test(enabled = true)
public void testHardClipLowQualEnds() {
// Needs a thorough redesign
logger.warn("Executing testHardClipByReferenceCoordinates");
logger.warn("Executing testHardClipLowQualEnds");
//Clip whole read
Assert.assertEquals(readClipper.hardClipLowQualEnds((byte) 64), new GATKSAMRecord(readClipper.read.getHeader()));
// Testing clipping that ends inside an insertion
final byte[] BASES = {'A','C','G','T','A','C','G','T'};
final byte[] QUALS = {2, 2, 2, 2, 20, 20, 20, 2};
final String CIGAR = "1S1M5I1S";
List<TestParameter> testList = new LinkedList<TestParameter>();
testList.add(new TestParameter(1, -1, 1, 4, "1H3M"));//clip 1 base at start
testList.add(new TestParameter(11, -1, 2, 4, "2H2M"));//clip 2 bases at start
final byte[] CLIPPED_BASES = {};
final byte[] CLIPPED_QUALS = {};
final String CLIPPED_CIGAR = "";
for (TestParameter p : testList) {
init();
//logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar);
ClipReadsTestUtils.testBaseQualCigar(readClipper.hardClipLowQualEnds((byte) p.inputStart),
ClipReadsTestUtils.BASES.substring(p.substringStart, p.substringStop).getBytes(),
ClipReadsTestUtils.QUALS.substring(p.substringStart, p.substringStop).getBytes(),
p.cigar);
}
/* todo find a better way to test lowqual tail clipping on both sides
// Reverse Quals sequence
readClipper.getRead().setBaseQualityString("?5+!"); // 63,53,43,33
testList = new LinkedList<testParameter>();
testList.add(new testParameter(1,-1,0,3,"3M1H"));//clip 1 base at end
testList.add(new testParameter(11,-1,0,2,"2M2H"));//clip 2 bases at end
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(BASES, QUALS, CIGAR);
GATKSAMRecord expected = ArtificialSAMUtils.createArtificialRead(CLIPPED_BASES, CLIPPED_QUALS, CLIPPED_CIGAR);
ReadClipper lowQualClipper = new ReadClipper(read);
ClipReadsTestUtils.assertEqualReads(lowQualClipper.hardClipLowQualEnds((byte) 2), expected);
for ( testParameter p : testList ) {
init();
readClipper.getRead().setBaseQualityString("?5+!"); // 63,53,43,33
//logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar);
testBaseQualCigar( readClipper.hardClipLowQualEnds( (byte)p.inputStart ),
BASES.substring(p.substringStart,p.substringStop).getBytes(),
QUALS.substring(p.substringStart,p.substringStop),
p.cigar );
}
*/
}
@Test(enabled = false)