PileupElement and LIBS cleanup

-- function to create pileup elements in AlignmentStateMachine and LIBS
-- Cleanup pileup element constructors, directing users to LIBS.createPileupFromRead() that really does the right thing
This commit is contained in:
Mark DePristo 2013-01-09 16:40:45 -05:00
parent 2f2a592c8e
commit fb9eb3d4ee
10 changed files with 76 additions and 47 deletions

View File

@ -51,6 +51,7 @@ import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.locusiterator.LocusIteratorByState;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -214,8 +215,7 @@ public class ArtificialReadPileupTestProvider {
read.setReadNegativeStrandFlag(false);
read.setReadGroup(sampleRG(sample));
pileupElements.add(new PileupElement(read,readOffset,false,isBeforeDeletion, false, isBeforeInsertion,false,false,altBases,Math.abs(eventLength)));
pileupElements.add(LocusIteratorByState.createPileupForReadAndOffset(read, readOffset));
}
return pileupElements;

View File

@ -124,7 +124,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testReverseTrim() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
Arrays.asList("44e9f6cf11b4efecb454cd3de8de9877"));
Arrays.asList("1e61de694b51d7c0f26da5179ee6bb0c"));
executeTest("test reverse trim", spec);
}
@ -391,7 +391,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
public void testMultiSampleIndels1() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
Arrays.asList("5667a699a3a13474f2d1cd2d6b01cd5b"));
Arrays.asList("3d3c5691973a223209a1341272d881be"));
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(

View File

@ -35,6 +35,8 @@ import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/**
* Steps a single read along its alignment to the genome
@ -59,7 +61,7 @@ class AlignmentStateMachine {
/**
* Our read
*/
private final SAMRecord read;
private final GATKSAMRecord read;
private final Cigar cigar;
private final int nCigarElements;
private int currentCigarElementOffset = -1;
@ -86,7 +88,7 @@ class AlignmentStateMachine {
@Requires({"read != null", "read.getAlignmentStart() != -1", "read.getCigar() != null"})
public AlignmentStateMachine(final SAMRecord read) {
this.read = read;
this.read = (GATKSAMRecord)read;
this.cigar = read.getCigar();
this.nCigarElements = cigar.numCigarElements();
initializeAsLeftEdge();
@ -337,5 +339,23 @@ class AlignmentStateMachine {
return currentElement.getOperator();
}
}
/**
* Create a new PileupElement based on the current state of this element
*
* Must not be a left or right edge
*
* @return a pileup element
*/
@Ensures("result != null")
public final PileupElement makePileupElement() {
if ( isLeftEdge() || isRightEdge() )
throw new IllegalStateException("Cannot make a pileup element from an edge alignment state");
return new PileupElement(read,
getReadOffset(),
getCurrentCigarElement(),
getCurrentCigarElementOffset(),
getOffsetIntoCurrentCigarElement());
}
}

View File

@ -256,9 +256,7 @@ public class LocusIteratorByState extends LocusIterator {
nDeletions++;
}
pile.add(new PileupElement(read, state.getReadOffset(),
state.getCurrentCigarElement(), state.getCurrentCigarElementOffset(),
state.getOffsetIntoCurrentCigarElement()));
pile.add(state.makePileupElement());
size++;
if ( read.getMappingQuality() == 0 )
@ -384,4 +382,29 @@ public class LocusIteratorByState extends LocusIterator {
return new LIBSDownsamplingInfo(performDownsampling, coverage);
}
/**
* Create a pileup element for read at offset
*
* offset must correspond to a valid read offset given the read's cigar, or an IllegalStateException will be throw
*
* @param read a read
* @param offset the offset into the bases we'd like to use in the pileup
* @return a valid PileupElement with read and at offset
*/
@Ensures("result != null")
public static PileupElement createPileupForReadAndOffset(final GATKSAMRecord read, final int offset) {
if ( read == null ) throw new IllegalArgumentException("read cannot be null");
if ( offset < 0 || offset >= read.getReadLength() ) throw new IllegalArgumentException("Invalid offset " + offset + " outside of bounds 0 and " + read.getReadLength());
final AlignmentStateMachine stateMachine = new AlignmentStateMachine(read);
while ( stateMachine.stepForwardOnGenome() != null ) {
if ( stateMachine.getReadOffset() == offset )
return stateMachine.makePileupElement();
}
throw new IllegalStateException("Tried to create a pileup for read " + read + " with offset " + offset +
" but we never saw such an offset in the alignment state machine");
}
}

View File

@ -178,7 +178,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
for (int i = 0; i < reads.size(); i++) {
GATKSAMRecord read = reads.get(i);
int offset = offsets.get(i);
pileup.add(createNewPileupElement(read, offset, false, false, false, false, false, false)); // only used to create fake pileups for testing so ancillary information is not important
pileup.add(createNewPileupElement(read, offset)); // only used to create fake pileups for testing so ancillary information is not important
}
return pileup;
@ -197,7 +197,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
UnifiedPileupElementTracker<PE> pileup = new UnifiedPileupElementTracker<PE>();
for (GATKSAMRecord read : reads) {
pileup.add(createNewPileupElement(read, offset, false, false, false, false, false, false)); // only used to create fake pileups for testing so ancillary information is not important
pileup.add(createNewPileupElement(read, offset)); // only used to create fake pileups for testing so ancillary information is not important
}
return pileup;
@ -205,8 +205,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
protected abstract AbstractReadBackedPileup<RBP, PE> createNewPileup(GenomeLoc loc, PileupElementTracker<PE> pileupElementTracker);
protected abstract PE createNewPileupElement(final GATKSAMRecord read, final int offset, final boolean isDeletion, final boolean isBeforeDeletion, final boolean isAfterDeletion, final boolean isBeforeInsertion, final boolean isAfterInsertion, final boolean isNextToSoftClip);
protected abstract PE createNewPileupElement(final GATKSAMRecord read, final int offset, final boolean isDeletion, final boolean isBeforeDeletion, final boolean isAfterDeletion, final boolean isBeforeInsertion, final boolean isAfterInsertion, final boolean isNextToSoftClip, final String nextEventBases, final int nextEventLength );
protected abstract PE createNewPileupElement(final GATKSAMRecord read, final int offset);
// --------------------------------------------------------
//

View File

@ -90,14 +90,6 @@ public class PileupElement implements Comparable<PileupElement> {
currentCigarOffset = offsetInCurrentCigar = -1;
}
@Deprecated
public PileupElement(final GATKSAMRecord read, final int offset, final boolean isDeletion, final boolean isBeforeDeletion, final boolean isAfterDeletion, final boolean isBeforeInsertion, final boolean isAfterInsertion, final boolean isNextToSoftClip) {
this(read, offset, isDeletion, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, null, -1);
}
//
// TODO -- make convenient testing constructor
//
public PileupElement(final GATKSAMRecord read, final int baseOffset,
final CigarElement currentElement, final int currentCigarOffset, final int offsetInCurrentCigar) {
this.read = read;
@ -107,10 +99,19 @@ public class PileupElement implements Comparable<PileupElement> {
this.offsetInCurrentCigar = offsetInCurrentCigar;
}
/**
* Create a new PileupElement that's a copy of toCopy
* @param toCopy the element we want to copy
*/
public PileupElement(final PileupElement toCopy) {
this(toCopy.read, toCopy.offset, toCopy.currentCigarElement, toCopy.currentCigarOffset, toCopy.offsetInCurrentCigar);
}
@Deprecated
public PileupElement(final GATKSAMRecord read, final int baseOffset) {
throw new UnsupportedOperationException("please use LocusIteratorByState.createPileupForReadAndOffset instead");
}
public boolean isDeletion() {
return currentCigarElement.getOperator() == CigarOperator.D;
}
@ -291,19 +292,6 @@ public class PileupElement implements Comparable<PileupElement> {
return representativeCount;
}
// public CigarElement getNextElement() {
// return ( offsetInCurrentCigar + 1 > currentCigarElement.getLength() && currentCigarOffset + 1 < read.getCigarLength()
// ? read.getCigar().getCigarElement(currentCigarOffset + 1)
// : currentCigarElement );
// }
//
// public CigarElement getPrevElement() {
// return ( offsetInCurrentCigar - 1 == 0 && currentCigarOffset - 1 > 0
// ? read.getCigar().getCigarElement(currentCigarOffset - 1)
// : currentCigarElement );
// }
public CigarElement getCurrentCigarElement() {
return currentCigarElement;
}

View File

@ -26,6 +26,7 @@
package org.broadinstitute.sting.utils.pileup;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.locusiterator.LocusIteratorByState;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.List;
@ -76,14 +77,7 @@ public class ReadBackedPileupImpl extends AbstractReadBackedPileup<ReadBackedPil
}
@Override
protected PileupElement createNewPileupElement(final GATKSAMRecord read, final int offset, final boolean isDeletion, final boolean isBeforeDeletion, final boolean isAfterDeletion, final boolean isBeforeInsertion, final boolean isAfterInsertion, final boolean isNextToSoftClip) {
return new PileupElement(read, offset, isDeletion, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, null, 0);
protected PileupElement createNewPileupElement(GATKSAMRecord read, int offset) {
return LocusIteratorByState.createPileupForReadAndOffset(read, offset);
}
@Override
protected PileupElement createNewPileupElement(final GATKSAMRecord read, final int offset, final boolean isDeletion, final boolean isBeforeDeletion, final boolean isAfterDeletion, final boolean isBeforeInsertion, final boolean isAfterInsertion, final boolean isNextToSoftClip, final String nextEventBases, final int nextEventLength ) {
return new PileupElement(read, offset, isDeletion, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, nextEventBases, nextEventLength);
}
}

View File

@ -30,6 +30,7 @@ import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.locusiterator.LocusIteratorByState;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
@ -478,10 +479,10 @@ public class ArtificialSAMUtils {
final GATKSAMRecord left = pair.get(0);
final GATKSAMRecord right = pair.get(1);
pileupElements.add(new PileupElement(left, pos - leftStart, false, false, false, false, false, false));
pileupElements.add(LocusIteratorByState.createPileupForReadAndOffset(left, pos - leftStart));
if (pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd()) {
pileupElements.add(new PileupElement(right, pos - rightStart, false, false, false, false, false, false));
pileupElements.add(LocusIteratorByState.createPileupForReadAndOffset(right, pos - rightStart));
}
}

View File

@ -94,6 +94,9 @@ public class AlignmentStateMachineUnitTest extends LocusIteratorByStateBaseTest
Assert.assertEquals(state.getLocation(genomeLocParser).size(), 1, "GenomeLoc position should have size == 1");
Assert.assertEquals(state.getLocation(genomeLocParser).getStart(), state.getGenomePosition(), "GenomeLoc position is bad");
// most tests of this functionality are in LIBS
Assert.assertNotNull(state.makePileupElement());
lastOffset = state.getReadOffset();
bpVisited++;
}

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.sam;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.locusiterator.LocusIteratorByState;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
@ -67,8 +68,8 @@ public class GATKSAMRecordUnitTest extends BaseTest {
@Test
public void testReducedReadPileupElement() {
PileupElement readp = new PileupElement(read, 0, false, false, false, false, false, false);
PileupElement reducedreadp = new PileupElement(reducedRead, 0, false, false, false, false, false, false);
PileupElement readp = LocusIteratorByState.createPileupForReadAndOffset(read, 0);
PileupElement reducedreadp = LocusIteratorByState.createPileupForReadAndOffset(reducedRead, 0);
Assert.assertFalse(readp.getRead().isReducedRead());