diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java index f1ffa121b..2257cc139 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java @@ -176,6 +176,10 @@ public class LocusIteratorByState extends LocusIterator { return String.format("%s ro=%d go=%d co=%d cec=%d %s", read.getReadName(), readOffset, genomeOffset, cigarOffset, cigarElementCounter, curElement); } + public CigarOperator peekForwardOnGenome() { + return ( cigarElementCounter + 1 > curElement.getLength() && cigarOffset + 1 < nCigarElements ? cigar.getCigarElement(cigarOffset + 1) : curElement ).getOperator(); + } + public CigarOperator stepForwardOnGenome() { // we enter this method with readOffset = index of the last processed base on the read // (-1 if we did not process a single base yet); this can be last matching base, or last base of an insertion @@ -455,6 +459,7 @@ public class LocusIteratorByState extends LocusIterator { final SAMRecordState state = iterator.next(); // state object with the read/offset information final GATKSAMRecord read = (GATKSAMRecord) state.getRead(); // the actual read final CigarOperator op = state.getCurrentCigarOperator(); // current cigar operator + final CigarOperator nextOp = state.peekForwardOnGenome(); // next cigar operator final int readOffset = state.getReadOffset(); // the base offset on this read final int eventStartOffset = state.getReadEventStartOffset(); // this will be -1 if base is not a deletion, or if base is the first deletion in the event. Otherwise, it will give the last base before the deletion began. @@ -467,13 +472,13 @@ public class LocusIteratorByState extends LocusIterator { if (op == CigarOperator.D) { if (readInfo.includeReadsWithDeletionAtLoci()) { // only add deletions to the pileup if we are authorized to do so int leftAlignedStart = (eventStartOffset < 0) ? readOffset : eventStartOffset; - pile.add(new PileupElement(read, leftAlignedStart, true)); + pile.add(new PileupElement(read, leftAlignedStart, true, nextOp == CigarOperator.I, false)); size++; nDeletions++; } } else { if (!filterBaseInRead(read, location.getStart())) { - pile.add(new PileupElement(read, readOffset, false)); + pile.add(new PileupElement(read, readOffset, false, nextOp == CigarOperator.I, op == CigarOperator.S)); size++; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index d9ee2ba1b..5980ff356 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -212,7 +212,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC public class BAQedPileupElement extends PileupElement { public BAQedPileupElement( final PileupElement PE ) { - super(PE.getRead(), PE.getOffset(), PE.isDeletion()); + super(PE.getRead(), PE.getOffset(), PE.isDeletion(), PE.isBeforeInsertion(), PE.isSoftClipped()); } @Override diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java index 6b4fec04e..b0819ee69 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CycleCovariate.java @@ -159,7 +159,7 @@ public class CycleCovariate implements StandardCovariate { } } else { - throw new IllegalStateException("This method hasn't been implemented yet for " + read.getReadGroup().getPlatform()); + throw new UserException("The platform (" + read.getReadGroup().getPlatform() + ") associated with read group " + read.getReadGroup() + " is not a recognized platform. Implemented options are e.g. illumina, 454, and solid"); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java index 1fa7101ca..f4fa9e941 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java @@ -177,7 +177,7 @@ public abstract class AbstractReadBackedPileup pileup = new UnifiedPileupElementTracker(); for (GATKSAMRecord read : reads) { - pileup.add(createNewPileupElement(read, offset, BaseUtils.simpleBaseToBaseIndex(read.getReadBases()[offset]) == BaseUtils.D)); + pileup.add(createNewPileupElement(read, offset, false, false, false)); // only used to create fake pileups for testing so ancillary information is not important } return pileup; @@ -204,7 +204,7 @@ public abstract class AbstractReadBackedPileup createNewPileup(GenomeLoc loc, PileupElementTracker pileupElementTracker); - protected abstract PE createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion); + protected abstract PE createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion, boolean isBeforeInsertion, boolean isSoftClipped); // -------------------------------------------------------- // diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java index 1d7e6f636..921da2a1f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ExtendedEventPileupElement.java @@ -31,6 +31,8 @@ import java.util.Arrays; * Time: 2:57:55 PM * To change this template use File | Settings | File Templates. */ + +// Extended events are slated for removal public class ExtendedEventPileupElement extends PileupElement { public enum Type { NOEVENT, DELETION, INSERTION @@ -46,7 +48,7 @@ public class ExtendedEventPileupElement extends PileupElement { public ExtendedEventPileupElement(GATKSAMRecord read, int offset, int eventLength, String eventBases, Type type) { - super(read, offset, type == Type.DELETION); + super(read, offset, type == Type.DELETION, false, false); // extended events are slated for removal this.read = read; this.offset = offset; this.eventLength = eventLength; diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java index 73f010d40..87aa31c47 100755 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElement.java @@ -23,47 +23,46 @@ public class PileupElement implements Comparable { protected final GATKSAMRecord read; protected final int offset; protected final boolean isDeletion; + protected final boolean isBeforeInsertion; + protected final boolean isSoftClipped; /** * Creates a new pileup element. * - * @param read the read we are adding to the pileup - * @param offset the position in the read for this base. All deletions must be left aligned! (-1 is only allowed for reads starting with insertions) - * @param isDeletion whether or not this base is a deletion + * @param read the read we are adding to the pileup + * @param offset the position in the read for this base. All deletions must be left aligned! (-1 is only allowed for reads starting with insertions) + * @param isDeletion whether or not this base is a deletion + * @param isBeforeInsertion whether or not this base is before an insertion + * @param isSoftClipped whether or not this base was softclipped */ @Requires({ "read != null", "offset >= -1", "offset <= read.getReadLength()"}) - public PileupElement(GATKSAMRecord read, int offset, boolean isDeletion) { + public PileupElement(final GATKSAMRecord read, final int offset, final boolean isDeletion, final boolean isBeforeInsertion, final boolean isSoftClipped) { if (offset < 0 && isDeletion) throw new ReviewedStingException("Pileup Element cannot create a deletion with a negative offset"); this.read = read; this.offset = offset; this.isDeletion = isDeletion; + this.isBeforeInsertion = isBeforeInsertion; + this.isSoftClipped = isSoftClipped; } - // /** -// * Creates a NON DELETION pileup element. -// * -// * use this constructor only for insertions and matches/mismatches. -// * @param read the read we are adding to the pileup -// * @param offset the position in the read for this base. All deletions must be left aligned! (-1 is only allowed for reads starting with insertions) -// */ -// @Requires({ -// "read != null", -// "offset >= -1", -// "offset <= read.getReadLength()"}) -// public PileupElement( GATKSAMRecord read, int offset ) { -// this(read, offset, false); -// } -// public boolean isDeletion() { return isDeletion; } + public boolean isBeforeInsertion() { + return isBeforeInsertion; + } + + public boolean isSoftClipped() { + return isSoftClipped; + } + public boolean isInsertionAtBeginningOfRead() { return offset == -1; } diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java index bf67d1a70..641c63f6c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java @@ -96,7 +96,7 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup< } @Override - protected ExtendedEventPileupElement createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion) { + protected ExtendedEventPileupElement createNewPileupElement(GATKSAMRecord read, int offset, boolean isDeletion, boolean isBeforeInsertion, boolean isSoftClipped) { throw new UnsupportedOperationException("Not enough information provided to create a new pileup element"); } diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java index 66ddbe95d..965e74e8b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java @@ -71,7 +71,7 @@ public class ReadBackedPileupImpl extends AbstractReadBackedPileup= right.getAlignmentStart() && pos <= right.getAlignmentEnd()) { - pileupElements.add(new PileupElement(right, pos - rightStart, false)); + pileupElements.add(new PileupElement(right, pos - rightStart, false, false, false)); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java index 913548ecc..f17772f40 100755 --- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java @@ -43,7 +43,10 @@ import java.util.Map; * */ public class GATKSAMRecord extends BAMRecord { - public static final String REDUCED_READ_CONSENSUS_TAG = "RR"; + // ReduceReads specific attribute tags + public static final String REDUCED_READ_CONSENSUS_TAG = "RR"; // marks a synthetic read produced by the ReduceReads tool + public static final String REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT = "OS"; // reads that are clipped may use this attribute to keep track of their original alignment start + public static final String REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT = "OE"; // reads that are clipped may use this attribute to keep track of their original alignment end // the SAMRecord data we're caching private String mReadString = null; @@ -321,6 +324,36 @@ public class GATKSAMRecord extends BAMRecord { return (lastOperator == CigarOperator.HARD_CLIP) ? stop-1 : stop+shift-1 ; } + /** + * Determines the original alignment start of a previously clipped read. + * + * This is useful for reads that have been trimmed to a variant region and lost the information of it's original alignment end + * + * @return the alignment start of a read before it was clipped + */ + public int getOriginalAlignmentStart() { + int originalAlignmentStart = getUnclippedStart(); + Integer alignmentShift = (Integer) getAttribute(REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT); + if (alignmentShift != null) + originalAlignmentStart += alignmentShift; + return originalAlignmentStart; + } + + /** + * Determines the original alignment end of a previously clipped read. + * + * This is useful for reads that have been trimmed to a variant region and lost the information of it's original alignment end + * + * @return the alignment end of a read before it was clipped + */ + public int getOriginalAlignmentEnd() { + int originalAlignmentEnd = getUnclippedEnd(); + Integer alignmentShift = (Integer) getAttribute(REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT); + if (alignmentShift != null) + originalAlignmentEnd -= alignmentShift; + return originalAlignmentEnd; + } + /** * Creates an empty GATKSAMRecord with the read's header, read group and mate * information, but empty (not-null) fields: @@ -363,4 +396,21 @@ public class GATKSAMRecord extends BAMRecord { return emptyRead; } + /** + * Shallow copy of everything, except for the attribute list and the temporary attributes. + * A new list of the attributes is created for both, but the attributes themselves are copied by reference. + * This should be safe because callers should never modify a mutable value returned by any of the get() methods anyway. + * + * @return a shallow copy of the GATKSAMRecord + * @throws CloneNotSupportedException + */ + @Override + public Object clone() throws CloneNotSupportedException { + final GATKSAMRecord clone = (GATKSAMRecord) super.clone(); + if (temporaryAttributes != null) { + for (Object attribute : temporaryAttributes.keySet()) + clone.setTemporaryAttribute(attribute, temporaryAttributes.get(attribute)); + } + return clone; + } } diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java new file mode 100755 index 000000000..317b320d3 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/sam/GATKSAMRecordUnitTest.java @@ -0,0 +1,83 @@ +package org.broadinstitute.sting.utils.sam; + +import net.sf.samtools.SAMFileHeader; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + + +public class GATKSAMRecordUnitTest extends BaseTest { + GATKSAMRecord read, reducedRead; + final static String BASES = "ACTG"; + final static String QUALS = "!+5?"; + final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40, 1}; + final private static byte[] REDUCED_READ_COUNTS_TAG = new byte[]{10, 10, 20, 30, -9}; // just the offsets + + @BeforeClass + public void init() { + SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); + read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, BASES.length()); + read.setReadUnmappedFlag(true); + read.setReadBases(new String(BASES).getBytes()); + read.setBaseQualityString(new String(QUALS)); + + reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length()); + reducedRead.setReadBases(BASES.getBytes()); + reducedRead.setBaseQualityString(QUALS); + reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS_TAG); + } + + @Test + public void testReducedReads() { + Assert.assertFalse(read.isReducedRead(), "isReducedRead is false for normal read"); + Assert.assertEquals(read.getReducedReadCounts(), null, "No reduced read tag in normal read"); + + Assert.assertTrue(reducedRead.isReducedRead(), "isReducedRead is true for reduced read"); + for (int i = 0; i < reducedRead.getReadLength(); i++) { + Assert.assertEquals(reducedRead.getReducedCount(i), REDUCED_READ_COUNTS[i], "Reduced read count not set to the expected value at " + i); + } + } + + @Test + public void testReducedReadPileupElement() { + PileupElement readp = new PileupElement(read, 0, false); + PileupElement reducedreadp = new PileupElement(reducedRead, 0, false); + + Assert.assertFalse(readp.getRead().isReducedRead()); + + Assert.assertTrue(reducedreadp.getRead().isReducedRead()); + Assert.assertEquals(reducedreadp.getRepresentativeCount(), REDUCED_READ_COUNTS[0]); + Assert.assertEquals(reducedreadp.getQual(), readp.getQual()); + } + + @Test + public void testGetOriginalAlignments() { + final byte [] bases = {'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A'}; + final byte [] quals = {20 , 20 , 20 , 20 , 20 , 20 , 20 , 20 }; + GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(bases, quals, "6M"); + + // A regular read with all matches + Assert.assertEquals(read.getAlignmentStart(), read.getOriginalAlignmentStart()); + Assert.assertEquals(read.getAlignmentEnd(), read.getOriginalAlignmentEnd()); + + // Alignment start shifted + int alignmentShift = 2; + read.setAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT, alignmentShift); + Assert.assertEquals(read.getAlignmentStart() + alignmentShift, read.getOriginalAlignmentStart()); + Assert.assertEquals(read.getAlignmentEnd(), read.getOriginalAlignmentEnd()); + + // Both alignments shifted + read.setAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT, alignmentShift); + Assert.assertEquals(read.getAlignmentStart() + alignmentShift, read.getOriginalAlignmentStart()); + Assert.assertEquals(read.getAlignmentEnd() - alignmentShift, read.getOriginalAlignmentEnd()); + + // Alignment end shifted + read.setAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT, null); + Assert.assertEquals(read.getAlignmentStart(), read.getOriginalAlignmentStart()); + Assert.assertEquals(read.getAlignmentEnd() - alignmentShift, read.getOriginalAlignmentEnd()); + + } + +} diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java index 1a8086a1b..7598f62a6 100755 --- a/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/sam/ReadUtilsUnitTest.java @@ -1,57 +1,11 @@ package org.broadinstitute.sting.utils.sam; -import net.sf.samtools.SAMFileHeader; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.pileup.PileupElement; import org.testng.Assert; -import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; public class ReadUtilsUnitTest extends BaseTest { - GATKSAMRecord read, reducedRead; - final static String BASES = "ACTG"; - final static String QUALS = "!+5?"; - final private static byte[] REDUCED_READ_COUNTS = new byte[]{10, 20, 30, 40, 1}; - final private static byte[] REDUCED_READ_COUNTS_TAG = new byte[]{10, 10, 20, 30, -9}; // just the offsets - - @BeforeTest - public void init() { - SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); - read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, BASES.length()); - read.setReadUnmappedFlag(true); - read.setReadBases(new String(BASES).getBytes()); - read.setBaseQualityString(new String(QUALS)); - - reducedRead = ArtificialSAMUtils.createArtificialRead(header, "reducedRead", 0, 1, BASES.length()); - reducedRead.setReadBases(BASES.getBytes()); - reducedRead.setBaseQualityString(QUALS); - reducedRead.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, REDUCED_READ_COUNTS_TAG); - } - - @Test - public void testReducedReads() { - Assert.assertFalse(read.isReducedRead(), "isReducedRead is false for normal read"); - Assert.assertEquals(read.getReducedReadCounts(), null, "No reduced read tag in normal read"); - - Assert.assertTrue(reducedRead.isReducedRead(), "isReducedRead is true for reduced read"); - for (int i = 0; i < reducedRead.getReadLength(); i++) { - Assert.assertEquals(reducedRead.getReducedCount(i), REDUCED_READ_COUNTS[i], "Reduced read count not set to the expected value at " + i); - } - } - - @Test - public void testReducedReadPileupElement() { - PileupElement readp = new PileupElement(read, 0, false); - PileupElement reducedreadp = new PileupElement(reducedRead, 0, false); - - Assert.assertFalse(readp.getRead().isReducedRead()); - - Assert.assertTrue(reducedreadp.getRead().isReducedRead()); - Assert.assertEquals(reducedreadp.getRepresentativeCount(), REDUCED_READ_COUNTS[0]); - Assert.assertEquals(reducedreadp.getQual(), readp.getQual()); - } - @Test public void testGetAdaptorBoundary() { final byte[] bases = {'A', 'C', 'G', 'T', 'A', 'C', 'G', 'T'};