Merge branch 'master' of ssh://copper.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Menachem Fromer 2012-01-26 18:26:41 -05:00
commit 0de6aca29b
12 changed files with 172 additions and 79 deletions

View File

@ -176,6 +176,10 @@ public class LocusIteratorByState extends LocusIterator {
return String.format("%s ro=%d go=%d co=%d cec=%d %s", read.getReadName(), readOffset, genomeOffset, cigarOffset, cigarElementCounter, curElement);
}
public CigarOperator peekForwardOnGenome() {
return ( cigarElementCounter + 1 > curElement.getLength() && cigarOffset + 1 < nCigarElements ? cigar.getCigarElement(cigarOffset + 1) : curElement ).getOperator();
}
public CigarOperator stepForwardOnGenome() {
// we enter this method with readOffset = index of the last processed base on the read
// (-1 if we did not process a single base yet); this can be last matching base, or last base of an insertion
@ -455,6 +459,7 @@ public class LocusIteratorByState extends LocusIterator {
final SAMRecordState state = iterator.next(); // state object with the read/offset information
final GATKSAMRecord read = (GATKSAMRecord) state.getRead(); // the actual read
final CigarOperator op = state.getCurrentCigarOperator(); // current cigar operator
final CigarOperator nextOp = state.peekForwardOnGenome(); // next cigar operator
final int readOffset = state.getReadOffset(); // the base offset on this read
final int eventStartOffset = state.getReadEventStartOffset(); // this will be -1 if base is not a deletion, or if base is the first deletion in the event. Otherwise, it will give the last base before the deletion began.
@ -467,13 +472,13 @@ public class LocusIteratorByState extends LocusIterator {
if (op == CigarOperator.D) {
if (readInfo.includeReadsWithDeletionAtLoci()) { // only add deletions to the pileup if we are authorized to do so
int leftAlignedStart = (eventStartOffset < 0) ? readOffset : eventStartOffset;
pile.add(new PileupElement(read, leftAlignedStart, true));
pile.add(new PileupElement(read, leftAlignedStart, true, nextOp == CigarOperator.I, false));
size++;
nDeletions++;
}
} else {
if (!filterBaseInRead(read, location.getStart())) {
pile.add(new PileupElement(read, readOffset, false));
pile.add(new PileupElement(read, readOffset, false, nextOp == CigarOperator.I, op == CigarOperator.S));
size++;
}
}

View File

@ -212,7 +212,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
public class BAQedPileupElement extends PileupElement {
public BAQedPileupElement( final PileupElement PE ) {
super(PE.getRead(), PE.getOffset(), PE.isDeletion());
super(PE.getRead(), PE.getOffset(), PE.isDeletion(), PE.isBeforeInsertion(), PE.isSoftClipped());
}
@Override

View File

@ -159,7 +159,7 @@ public class CycleCovariate implements StandardCovariate {
}
}
else {
throw new IllegalStateException("This method hasn't been implemented yet for " + read.getReadGroup().getPlatform());
throw new UserException("The platform (" + read.getReadGroup().getPlatform() + ") associated with read group " + read.getReadGroup() + " is not a recognized platform. Implemented options are e.g. illumina, 454, and solid");
}
}

View File

@ -177,7 +177,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
for (int i = 0; i < reads.size(); i++) {
GATKSAMRecord read = reads.get(i);
int offset = offsets.get(i);
pileup.add(createNewPileupElement(read, offset, BaseUtils.simpleBaseToBaseIndex(read.getReadBases()[offset]) == BaseUtils.D));
pileup.add(createNewPileupElement(read, offset, false, false, false)); // only used to create fake pileups for testing so ancillary information is not important
}
return pileup;
@ -196,7 +196,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
UnifiedPileupElementTracker<PE> pileup = new UnifiedPileupElementTracker<PE>();
for (GATKSAMRecord read : reads) {
pileup.add(createNewPileupElement(read, offset, BaseUtils.simpleBaseToBaseIndex(read.getReadBases()[offset]) == BaseUtils.D));
pileup.add(createNewPileupElement(read, offset, false, false, false)); // only used to create fake pileups for testing so ancillary information is not important
}
return pileup;
@ -204,7 +204,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
protected abstract AbstractReadBackedPileup<RBP, PE> createNewPileup(GenomeLoc loc, PileupElementTracker<PE> pileupElementTracker);
protected abstract PE createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion);
protected abstract PE createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion, boolean isBeforeInsertion, boolean isSoftClipped);
// --------------------------------------------------------
//

View File

@ -31,6 +31,8 @@ import java.util.Arrays;
* Time: 2:57:55 PM
* To change this template use File | Settings | File Templates.
*/
// Extended events are slated for removal
public class ExtendedEventPileupElement extends PileupElement {
public enum Type {
NOEVENT, DELETION, INSERTION
@ -46,7 +48,7 @@ public class ExtendedEventPileupElement extends PileupElement {
public ExtendedEventPileupElement(GATKSAMRecord read, int offset, int eventLength, String eventBases, Type type) {
super(read, offset, type == Type.DELETION);
super(read, offset, type == Type.DELETION, false, false); // extended events are slated for removal
this.read = read;
this.offset = offset;
this.eventLength = eventLength;

View File

@ -23,47 +23,46 @@ public class PileupElement implements Comparable<PileupElement> {
protected final GATKSAMRecord read;
protected final int offset;
protected final boolean isDeletion;
protected final boolean isBeforeInsertion;
protected final boolean isSoftClipped;
/**
* Creates a new pileup element.
*
* @param read the read we are adding to the pileup
* @param offset the position in the read for this base. All deletions must be left aligned! (-1 is only allowed for reads starting with insertions)
* @param isDeletion whether or not this base is a deletion
* @param read the read we are adding to the pileup
* @param offset the position in the read for this base. All deletions must be left aligned! (-1 is only allowed for reads starting with insertions)
* @param isDeletion whether or not this base is a deletion
* @param isBeforeInsertion whether or not this base is before an insertion
* @param isSoftClipped whether or not this base was softclipped
*/
@Requires({
"read != null",
"offset >= -1",
"offset <= read.getReadLength()"})
public PileupElement(GATKSAMRecord read, int offset, boolean isDeletion) {
public PileupElement(final GATKSAMRecord read, final int offset, final boolean isDeletion, final boolean isBeforeInsertion, final boolean isSoftClipped) {
if (offset < 0 && isDeletion)
throw new ReviewedStingException("Pileup Element cannot create a deletion with a negative offset");
this.read = read;
this.offset = offset;
this.isDeletion = isDeletion;
this.isBeforeInsertion = isBeforeInsertion;
this.isSoftClipped = isSoftClipped;
}
// /**
// * Creates a NON DELETION pileup element.
// *
// * use this constructor only for insertions and matches/mismatches.
// * @param read the read we are adding to the pileup
// * @param offset the position in the read for this base. All deletions must be left aligned! (-1 is only allowed for reads starting with insertions)
// */
// @Requires({
// "read != null",
// "offset >= -1",
// "offset <= read.getReadLength()"})
// public PileupElement( GATKSAMRecord read, int offset ) {
// this(read, offset, false);
// }
//
public boolean isDeletion() {
return isDeletion;
}
public boolean isBeforeInsertion() {
return isBeforeInsertion;
}
public boolean isSoftClipped() {
return isSoftClipped;
}
public boolean isInsertionAtBeginningOfRead() {
return offset == -1;
}

View File

@ -96,7 +96,7 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<
}
@Override
protected ExtendedEventPileupElement createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion) {
protected ExtendedEventPileupElement createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion, boolean isBeforeInsertion, boolean isSoftClipped) {
throw new UnsupportedOperationException("Not enough information provided to create a new pileup element");
}

View File

@ -71,7 +71,7 @@ public class ReadBackedPileupImpl extends AbstractReadBackedPileup<ReadBackedPil
}
@Override
protected PileupElement createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion) {
return new PileupElement(read, offset, isDeletion);
protected PileupElement createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion, boolean isBeforeInsertion, boolean isSoftClipped) {
return new PileupElement(read, offset, isDeletion, isBeforeInsertion, isSoftClipped);
}
}

View File

@ -361,10 +361,10 @@ public class ArtificialSAMUtils {
final GATKSAMRecord left = pair.get(0);
final GATKSAMRecord right = pair.get(1);
pileupElements.add(new PileupElement(left, pos - leftStart, false));
pileupElements.add(new PileupElement(left, pos - leftStart, false, false, false));
if (pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd()) {
pileupElements.add(new PileupElement(right, pos - rightStart, false));
pileupElements.add(new PileupElement(right, pos - rightStart, false, false, false));
}
}

View File

@ -43,7 +43,10 @@ import java.util.Map;
*
*/
public class GATKSAMRecord extends BAMRecord {
public static final String REDUCED_READ_CONSENSUS_TAG = "RR";
// ReduceReads specific attribute tags
public static final String REDUCED_READ_CONSENSUS_TAG = "RR"; // marks a synthetic read produced by the ReduceReads tool
public static final String REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT = "OS"; // reads that are clipped may use this attribute to keep track of their original alignment start
public static final String REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT = "OE"; // reads that are clipped may use this attribute to keep track of their original alignment end
// the SAMRecord data we're caching
private String mReadString = null;
@ -321,6 +324,36 @@ public class GATKSAMRecord extends BAMRecord {
return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ;
}
/**
* Determines the original alignment start of a previously clipped read.
*
* This is useful for reads that have been trimmed to a variant region and lost the information of it's original alignment end
*
* @return the alignment start of a read before it was clipped
*/
public int getOriginalAlignmentStart() {
int originalAlignmentStart = getUnclippedStart();
Integer alignmentShift = (Integer) getAttribute(REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT);
if (alignmentShift != null)
originalAlignmentStart += alignmentShift;
return originalAlignmentStart;
}
/**
* Determines the original alignment end of a previously clipped read.
*
* This is useful for reads that have been trimmed to a variant region and lost the information of it's original alignment end
*
* @return the alignment end of a read before it was clipped
*/
public int getOriginalAlignmentEnd() {
int originalAlignmentEnd = getUnclippedEnd();
Integer alignmentShift = (Integer) getAttribute(REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT);
if (alignmentShift != null)
originalAlignmentEnd -= alignmentShift;
return originalAlignmentEnd;
}
/**
* Creates an empty GATKSAMRecord with the read's header, read group and mate
* information, but empty (not-null) fields:
@ -363,4 +396,21 @@ public class GATKSAMRecord extends BAMRecord {
return emptyRead;
}
/**
* Shallow copy of everything, except for the attribute list and the temporary attributes.
* A new list of the attributes is created for both, but the attributes themselves are copied by reference.
* This should be safe because callers should never modify a mutable value returned by any of the get() methods anyway.
*
* @return a shallow copy of the GATKSAMRecord
* @throws CloneNotSupportedException
*/
@Override
public Object clone() throws CloneNotSupportedException {
final GATKSAMRecord clone = (GATKSAMRecord) super.clone();
if (temporaryAttributes != null) {
for (Object attribute : temporaryAttributes.keySet())
clone.setTemporaryAttribute(attribute, temporaryAttributes.get(attribute));
}
return clone;
}
}

View File

@ -0,0 +1,83 @@
package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
public class GATKSAMRecordUnitTest extends BaseTest {
GATKSAMRecord read, reducedRead;
final static String BASES = "ACTG";
final static String QUALS = "!+5?";
final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40, 1};
final private static byte[] REDUCED_READ_COUNTS_TAG = new byte[]{10, 10, 20, 30, -9}; // just the offsets
@BeforeClass
public void init() {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, BASES.length());
read.setReadUnmappedFlag(true);
read.setReadBases(new String(BASES).getBytes());
read.setBaseQualityString(new String(QUALS));
reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length());
reducedRead.setReadBases(BASES.getBytes());
reducedRead.setBaseQualityString(QUALS);
reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS_TAG);
}
@Test
public void testReducedReads() {
Assert.assertFalse(read.isReducedRead(), "isReducedRead is false for normal read");
Assert.assertEquals(read.getReducedReadCounts(), null, "No reduced read tag in normal read");
Assert.assertTrue(reducedRead.isReducedRead(), "isReducedRead is true for reduced read");
for (int i = 0; i < reducedRead.getReadLength(); i++) {
Assert.assertEquals(reducedRead.getReducedCount(i), REDUCED_READ_COUNTS[i], "Reduced read count not set to the expected value at " + i);
}
}
@Test
public void testReducedReadPileupElement() {
PileupElement readp = new PileupElement(read, 0, false);
PileupElement reducedreadp = new PileupElement(reducedRead, 0, false);
Assert.assertFalse(readp.getRead().isReducedRead());
Assert.assertTrue(reducedreadp.getRead().isReducedRead());
Assert.assertEquals(reducedreadp.getRepresentativeCount(), REDUCED_READ_COUNTS[0]);
Assert.assertEquals(reducedreadp.getQual(), readp.getQual());
}
@Test
public void testGetOriginalAlignments() {
final byte [] bases = {'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'};
final byte [] quals = {20 , 20 , 20 , 20 , 20 , 20 , 20 , 20 };
GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, "6M");
// A regular read with all matches
Assert.assertEquals(read.getAlignmentStart(), read.getOriginalAlignmentStart());
Assert.assertEquals(read.getAlignmentEnd(), read.getOriginalAlignmentEnd());
// Alignment start shifted
int alignmentShift = 2;
read.setAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT, alignmentShift);
Assert.assertEquals(read.getAlignmentStart() + alignmentShift, read.getOriginalAlignmentStart());
Assert.assertEquals(read.getAlignmentEnd(), read.getOriginalAlignmentEnd());
// Both alignments shifted
read.setAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT, alignmentShift);
Assert.assertEquals(read.getAlignmentStart() + alignmentShift, read.getOriginalAlignmentStart());
Assert.assertEquals(read.getAlignmentEnd() - alignmentShift, read.getOriginalAlignmentEnd());
// Alignment end shifted
read.setAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT, null);
Assert.assertEquals(read.getAlignmentStart(), read.getOriginalAlignmentStart());
Assert.assertEquals(read.getAlignmentEnd() - alignmentShift, read.getOriginalAlignmentEnd());
}
}

View File

@ -1,57 +1,11 @@
package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.testng.Assert;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
public class ReadUtilsUnitTest extends BaseTest {
GATKSAMRecord read, reducedRead;
final static String BASES = "ACTG";
final static String QUALS = "!+5?";
final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40, 1};
final private static byte[] REDUCED_READ_COUNTS_TAG = new byte[]{10, 10, 20, 30, -9}; // just the offsets
@BeforeTest
public void init() {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, BASES.length());
read.setReadUnmappedFlag(true);
read.setReadBases(new String(BASES).getBytes());
read.setBaseQualityString(new String(QUALS));
reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length());
reducedRead.setReadBases(BASES.getBytes());
reducedRead.setBaseQualityString(QUALS);
reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS_TAG);
}
@Test
public void testReducedReads() {
Assert.assertFalse(read.isReducedRead(), "isReducedRead is false for normal read");
Assert.assertEquals(read.getReducedReadCounts(), null, "No reduced read tag in normal read");
Assert.assertTrue(reducedRead.isReducedRead(), "isReducedRead is true for reduced read");
for (int i = 0; i < reducedRead.getReadLength(); i++) {
Assert.assertEquals(reducedRead.getReducedCount(i), REDUCED_READ_COUNTS[i], "Reduced read count not set to the expected value at " + i);
}
}
@Test
public void testReducedReadPileupElement() {
PileupElement readp = new PileupElement(read, 0, false);
PileupElement reducedreadp = new PileupElement(reducedRead, 0, false);
Assert.assertFalse(readp.getRead().isReducedRead());
Assert.assertTrue(reducedreadp.getRead().isReducedRead());
Assert.assertEquals(reducedreadp.getRepresentativeCount(), REDUCED_READ_COUNTS[0]);
Assert.assertEquals(reducedreadp.getQual(), readp.getQual());
}
@Test
public void testGetAdaptorBoundary() {
final byte[] bases = {'A', 'C', 'G', 'T', 'A', 'C', 'G', 'T'};