Cleanup FragmentPileup before main repo commit
-- removed intermiate functions. Now only original version and best optimized new version remain -- Moved general artificial read backed pileup creation code into ArtificialSamUtils
This commit is contained in:
parent
166174a551
commit
502592671d
|
|
@ -14,9 +14,6 @@ import java.util.*;
|
||||||
*
|
*
|
||||||
* Based on the original code by E. Banks
|
* Based on the original code by E. Banks
|
||||||
*
|
*
|
||||||
* TODO -- technically we could generalize this code to support a pseudo-duplicate marking
|
|
||||||
* TODO -- algorithm that could collect all duplicates into a single super pileup element
|
|
||||||
*
|
|
||||||
* Oct 21: note that the order of the oneReadPileup and twoReadPileups are not
|
* Oct 21: note that the order of the oneReadPileup and twoReadPileups are not
|
||||||
* defined. The algorithms that produce these lists are in fact producing
|
* defined. The algorithms that produce these lists are in fact producing
|
||||||
* lists of Pileup elements *NOT* sorted by alignment start position of the underlying
|
* lists of Pileup elements *NOT* sorted by alignment start position of the underlying
|
||||||
|
|
@ -30,11 +27,9 @@ public class FragmentPileup {
|
||||||
Collection<PileupElement> oneReadPile = null;
|
Collection<PileupElement> oneReadPile = null;
|
||||||
Collection<TwoReadPileupElement> twoReadPile = null;
|
Collection<TwoReadPileupElement> twoReadPile = null;
|
||||||
|
|
||||||
public enum FragmentMatchingAlgorithm {
|
protected enum FragmentMatchingAlgorithm {
|
||||||
ORIGINAL,
|
ORIGINAL,
|
||||||
FAST_V1,
|
|
||||||
skipNonOverlapping,
|
skipNonOverlapping,
|
||||||
skipNonOverlappingNotLazy
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -42,16 +37,14 @@ public class FragmentPileup {
|
||||||
* @param pileup
|
* @param pileup
|
||||||
*/
|
*/
|
||||||
public FragmentPileup(ReadBackedPileup pileup) {
|
public FragmentPileup(ReadBackedPileup pileup) {
|
||||||
//oldSlowCalculation(pileup);
|
skipNonOverlapping(pileup);
|
||||||
fastNewCalculation(pileup);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** For performance testing only */
|
||||||
protected FragmentPileup(ReadBackedPileup pileup, FragmentMatchingAlgorithm algorithm) {
|
protected FragmentPileup(ReadBackedPileup pileup, FragmentMatchingAlgorithm algorithm) {
|
||||||
switch ( algorithm ) {
|
switch ( algorithm ) {
|
||||||
case ORIGINAL: oldSlowCalculation(pileup); break;
|
case ORIGINAL: oldSlowCalculation(pileup); break;
|
||||||
case FAST_V1: fastNewCalculation(pileup); break;
|
|
||||||
case skipNonOverlapping: skipNonOverlapping(pileup); break;
|
case skipNonOverlapping: skipNonOverlapping(pileup); break;
|
||||||
case skipNonOverlappingNotLazy: skipNonOverlappingNotLazy(pileup); break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -76,58 +69,36 @@ public class FragmentPileup {
|
||||||
oneReadPile = nameMap.values();
|
oneReadPile = nameMap.values();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
private final void skipNonOverlapping(final ReadBackedPileup pileup) {
|
||||||
* @param pileup
|
Map<String, PileupElement> nameMap = null;
|
||||||
*/
|
|
||||||
private final void fastNewCalculation(final ReadBackedPileup pileup) {
|
|
||||||
Map<String, PileupElement> nameMap = null; // lazy initialization
|
|
||||||
|
|
||||||
|
// build an initial map, grabbing all of the multi-read fragments
|
||||||
for ( final PileupElement p : pileup ) {
|
for ( final PileupElement p : pileup ) {
|
||||||
final SAMRecord read = p.getRead();
|
final SAMRecord read = p.getRead();
|
||||||
|
final int mateStart = read.getMateAlignmentStart();
|
||||||
|
|
||||||
switch (ReadUtils.readMightOverlapMate(read) ) {
|
if ( mateStart == 0 || mateStart > read.getAlignmentEnd() ) {
|
||||||
// we know for certain this read doesn't have an overlapping mate
|
// if we know that this read won't overlap its mate, or doesn't have one, jump out early
|
||||||
case NO: {
|
if ( oneReadPile == null ) oneReadPile = new ArrayList<PileupElement>(pileup.size()); // lazy init
|
||||||
addToOnePile(p);
|
oneReadPile.add(p);
|
||||||
break;
|
} else {
|
||||||
}
|
// the read might overlap it's mate, or is the rightmost read of a pair
|
||||||
|
final String readName = p.getRead().getReadName();
|
||||||
// we know that we overlap our mate, so put the read in the nameMap in
|
final PileupElement pe1 = nameMap == null ? null : nameMap.get(readName);
|
||||||
// case our mate shows up
|
if ( pe1 != null ) {
|
||||||
case LEFT_YES: {
|
// assumes we have at most 2 reads per fragment
|
||||||
nameMap = addToNameMap(nameMap, p);
|
if ( twoReadPile == null ) twoReadPile = new ArrayList<TwoReadPileupElement>(); // lazy init
|
||||||
break;
|
twoReadPile.add(new TwoReadPileupElement(pe1, p));
|
||||||
}
|
nameMap.remove(readName);
|
||||||
|
} else {
|
||||||
// read starts at the same position, so we are looking at either the first or
|
if ( nameMap == null ) nameMap = new HashMap<String, PileupElement>(pileup.size()); // lazy init
|
||||||
// the second read. In the first, add it to the map, in the second grab it
|
nameMap.put(readName, p);
|
||||||
// from the map and create a fragment
|
|
||||||
case SAME_START: {
|
|
||||||
final PileupElement pe1 = getFromNameMap(nameMap, p);
|
|
||||||
if ( pe1 != null ) {
|
|
||||||
addToTwoPile(pe1, p);
|
|
||||||
nameMap.remove(p.getRead().getReadName());
|
|
||||||
} else {
|
|
||||||
nameMap = addToNameMap(nameMap, p);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// in this case we need to see if our mate is already present, and if so
|
|
||||||
// grab the read from the list
|
|
||||||
case RIGHT_MAYBE: {
|
|
||||||
final PileupElement pe1 = getFromNameMap(nameMap, p);
|
|
||||||
if ( pe1 != null ) {
|
|
||||||
addToTwoPile(pe1, p);
|
|
||||||
nameMap.remove(p.getRead().getReadName());
|
|
||||||
} else {
|
|
||||||
addToOnePile(p);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// add all of the reads that are potentially overlapping but whose mate never showed
|
||||||
|
// up to the oneReadPile
|
||||||
if ( nameMap != null && ! nameMap.isEmpty() ) {
|
if ( nameMap != null && ! nameMap.isEmpty() ) {
|
||||||
if ( oneReadPile == null )
|
if ( oneReadPile == null )
|
||||||
oneReadPile = nameMap.values();
|
oneReadPile = nameMap.values();
|
||||||
|
|
@ -136,90 +107,6 @@ public class FragmentPileup {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* @param pileup
|
|
||||||
*/
|
|
||||||
private final void skipNonOverlappingNotLazy(final ReadBackedPileup pileup) {
|
|
||||||
oneReadPile = new ArrayList<PileupElement>(pileup.size());
|
|
||||||
twoReadPile = new ArrayList<TwoReadPileupElement>();
|
|
||||||
final Map<String, PileupElement> nameMap = new HashMap<String, PileupElement>(pileup.size());
|
|
||||||
|
|
||||||
// build an initial map, grabbing all of the multi-read fragments
|
|
||||||
for ( final PileupElement p : pileup ) {
|
|
||||||
// if we know that this read won't overlap its mate, or doesn't have one, jump out early
|
|
||||||
final SAMRecord read = p.getRead();
|
|
||||||
final int mateStart = read.getMateAlignmentStart();
|
|
||||||
if ( mateStart == 0 || mateStart > read.getAlignmentEnd() ) {
|
|
||||||
oneReadPile.add(p);
|
|
||||||
} else {
|
|
||||||
final String readName = p.getRead().getReadName();
|
|
||||||
final PileupElement pe1 = nameMap.get(readName);
|
|
||||||
if ( pe1 != null ) {
|
|
||||||
// assumes we have at most 2 reads per fragment
|
|
||||||
twoReadPile.add(new TwoReadPileupElement(pe1, p));
|
|
||||||
nameMap.remove(readName);
|
|
||||||
} else {
|
|
||||||
nameMap.put(readName, p);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
oneReadPile.addAll(nameMap.values());
|
|
||||||
}
|
|
||||||
|
|
||||||
private final void skipNonOverlapping(final ReadBackedPileup pileup) {
|
|
||||||
Map<String, PileupElement> nameMap = null;
|
|
||||||
|
|
||||||
// build an initial map, grabbing all of the multi-read fragments
|
|
||||||
for ( final PileupElement p : pileup ) {
|
|
||||||
// if we know that this read won't overlap its mate, or doesn't have one, jump out early
|
|
||||||
final SAMRecord read = p.getRead();
|
|
||||||
final int mateStart = read.getMateAlignmentStart();
|
|
||||||
if ( mateStart == 0 || mateStart > read.getAlignmentEnd() ) {
|
|
||||||
if ( oneReadPile == null ) oneReadPile = new ArrayList<PileupElement>(pileup.size());
|
|
||||||
oneReadPile.add(p);
|
|
||||||
} else {
|
|
||||||
final String readName = p.getRead().getReadName();
|
|
||||||
final PileupElement pe1 = nameMap == null ? null : nameMap.get(readName);
|
|
||||||
if ( pe1 != null ) {
|
|
||||||
// assumes we have at most 2 reads per fragment
|
|
||||||
if ( twoReadPile == null ) twoReadPile = new ArrayList<TwoReadPileupElement>();
|
|
||||||
twoReadPile.add(new TwoReadPileupElement(pe1, p));
|
|
||||||
nameMap.remove(readName);
|
|
||||||
} else {
|
|
||||||
nameMap = addToNameMap(nameMap, p);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( oneReadPile == null )
|
|
||||||
oneReadPile = nameMap == null ? Collections.<PileupElement>emptyList() : nameMap.values();
|
|
||||||
else if ( nameMap != null )
|
|
||||||
oneReadPile.addAll(nameMap.values());
|
|
||||||
}
|
|
||||||
|
|
||||||
private final Map<String, PileupElement> addToNameMap(Map<String, PileupElement> map, final PileupElement p) {
|
|
||||||
if ( map == null ) map = new HashMap<String, PileupElement>();
|
|
||||||
map.put(p.getRead().getReadName(), p);
|
|
||||||
return map;
|
|
||||||
}
|
|
||||||
|
|
||||||
private final PileupElement getFromNameMap(Map<String, PileupElement> map, final PileupElement p) {
|
|
||||||
return map == null ? null : map.get(p.getRead().getReadName());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private final void addToOnePile(final PileupElement p) {
|
|
||||||
if ( oneReadPile == null ) oneReadPile = new ArrayList<PileupElement>();
|
|
||||||
oneReadPile.add(p);
|
|
||||||
}
|
|
||||||
|
|
||||||
private final void addToTwoPile(final PileupElement p1, final PileupElement p2) {
|
|
||||||
// assumes we have at most 2 reads per fragment
|
|
||||||
if ( twoReadPile == null ) twoReadPile = new ArrayList<TwoReadPileupElement>();
|
|
||||||
twoReadPile.add(new TwoReadPileupElement(p1, p2));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the pileup elements containing two reads, in no particular order
|
* Gets the pileup elements containing two reads, in no particular order
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -2,11 +2,15 @@ package org.broadinstitute.sting.utils.sam;
|
||||||
|
|
||||||
import net.sf.samtools.*;
|
import net.sf.samtools.*;
|
||||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author aaron
|
* @author aaron
|
||||||
|
|
@ -29,7 +33,7 @@ public class ArtificialSAMUtils {
|
||||||
File outFile = new File(filename);
|
File outFile = new File(filename);
|
||||||
|
|
||||||
SAMFileWriter out = new SAMFileWriterFactory().makeBAMWriter(header, true, outFile);
|
SAMFileWriter out = new SAMFileWriterFactory().makeBAMWriter(header, true, outFile);
|
||||||
|
|
||||||
for (int x = startingChromosome; x < startingChromosome + numberOfChromosomes; x++) {
|
for (int x = startingChromosome; x < startingChromosome + numberOfChromosomes; x++) {
|
||||||
for (int readNumber = 1; readNumber < readsPerChomosome; readNumber++) {
|
for (int readNumber = 1; readNumber < readsPerChomosome; readNumber++) {
|
||||||
out.addAlignment(createArtificialRead(header, "Read_" + readNumber, x - startingChromosome, readNumber, DEFAULT_READ_LENGTH));
|
out.addAlignment(createArtificialRead(header, "Read_" + readNumber, x - startingChromosome, readNumber, DEFAULT_READ_LENGTH));
|
||||||
|
|
@ -145,7 +149,7 @@ public class ArtificialSAMUtils {
|
||||||
*/
|
*/
|
||||||
public static GATKSAMRecord createArtificialRead(SAMFileHeader header, String name, int refIndex, int alignmentStart, int length) {
|
public static GATKSAMRecord createArtificialRead(SAMFileHeader header, String name, int refIndex, int alignmentStart, int length) {
|
||||||
if( (refIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart != SAMRecord.NO_ALIGNMENT_START) ||
|
if( (refIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart != SAMRecord.NO_ALIGNMENT_START) ||
|
||||||
(refIndex != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart == SAMRecord.NO_ALIGNMENT_START) )
|
(refIndex != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && alignmentStart == SAMRecord.NO_ALIGNMENT_START) )
|
||||||
throw new ReviewedStingException("Invalid alignment start for artificial read, start = " + alignmentStart);
|
throw new ReviewedStingException("Invalid alignment start for artificial read, start = " + alignmentStart);
|
||||||
GATKSAMRecord record = new GATKSAMRecord(header);
|
GATKSAMRecord record = new GATKSAMRecord(header);
|
||||||
record.setReadName(name);
|
record.setReadName(name);
|
||||||
|
|
@ -197,6 +201,37 @@ public class ArtificialSAMUtils {
|
||||||
return rec;
|
return rec;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public final static List<SAMRecord> createPair(SAMFileHeader header, String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) {
|
||||||
|
SAMRecord left = ArtificialSAMUtils.createArtificialRead(header, name, 0, leftStart, readLen);
|
||||||
|
SAMRecord right = ArtificialSAMUtils.createArtificialRead(header, name, 0, rightStart, readLen);
|
||||||
|
|
||||||
|
left.setReadPairedFlag(true);
|
||||||
|
right.setReadPairedFlag(true);
|
||||||
|
|
||||||
|
left.setProperPairFlag(true);
|
||||||
|
right.setProperPairFlag(true);
|
||||||
|
|
||||||
|
left.setFirstOfPairFlag(leftIsFirst);
|
||||||
|
right.setFirstOfPairFlag(! leftIsFirst);
|
||||||
|
|
||||||
|
left.setReadNegativeStrandFlag(leftIsNegative);
|
||||||
|
left.setMateNegativeStrandFlag(!leftIsNegative);
|
||||||
|
right.setReadNegativeStrandFlag(!leftIsNegative);
|
||||||
|
right.setMateNegativeStrandFlag(leftIsNegative);
|
||||||
|
|
||||||
|
left.setMateAlignmentStart(right.getAlignmentStart());
|
||||||
|
right.setMateAlignmentStart(left.getAlignmentStart());
|
||||||
|
|
||||||
|
left.setMateReferenceIndex(0);
|
||||||
|
right.setMateReferenceIndex(0);
|
||||||
|
|
||||||
|
int isize = rightStart + readLen - leftStart;
|
||||||
|
left.setInferredInsertSize(isize);
|
||||||
|
right.setInferredInsertSize(-isize);
|
||||||
|
|
||||||
|
return Arrays.asList(left, right);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* create an iterator containing the specified read piles
|
* create an iterator containing the specified read piles
|
||||||
*
|
*
|
||||||
|
|
@ -258,4 +293,52 @@ public class ArtificialSAMUtils {
|
||||||
|
|
||||||
return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, unmappedReadCount, header);
|
return new ArtificialSAMQueryIterator(startingChr, endingChr, readCount, unmappedReadCount, header);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private final static int ranIntInclusive(Random ran, int start, int stop) {
|
||||||
|
final int range = stop - start;
|
||||||
|
return ran.nextInt(range) + start;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a read backed pileup containing up to pileupSize reads at refID 0 from header at loc with
|
||||||
|
* reads created that have readLen bases. Pairs are sampled from a gaussian distribution with mean insert
|
||||||
|
* size of insertSize and variation of insertSize / 10. The first read will be in the pileup, and the second
|
||||||
|
* may be, depending on where this sampled insertSize puts it.
|
||||||
|
* @param header
|
||||||
|
* @param loc
|
||||||
|
* @param readLen
|
||||||
|
* @param insertSize
|
||||||
|
* @param pileupSize
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public static ReadBackedPileup createReadBackedPileup(final SAMFileHeader header, final GenomeLoc loc, final int readLen, final int insertSize, final int pileupSize) {
|
||||||
|
final Random ran = new Random();
|
||||||
|
final boolean leftIsFirst = true;
|
||||||
|
final boolean leftIsNegative = false;
|
||||||
|
final int insertSizeVariation = insertSize / 10;
|
||||||
|
final int pos = loc.getStart();
|
||||||
|
|
||||||
|
final List<PileupElement> pileupElements = new ArrayList<PileupElement>();
|
||||||
|
for ( int i = 0; i < pileupSize / 2; i++ ) {
|
||||||
|
final String readName = "read" + i;
|
||||||
|
final int leftStart = ranIntInclusive(ran, 1, pos);
|
||||||
|
final int fragmentSize = (int)(ran.nextGaussian() * insertSizeVariation + insertSize);
|
||||||
|
final int rightStart = leftStart + fragmentSize - readLen;
|
||||||
|
|
||||||
|
if ( rightStart <= 0 ) continue;
|
||||||
|
|
||||||
|
List<SAMRecord> pair = createPair(header, readName, readLen, leftStart, rightStart, leftIsFirst, leftIsNegative);
|
||||||
|
final SAMRecord left = pair.get(0);
|
||||||
|
final SAMRecord right = pair.get(1);
|
||||||
|
|
||||||
|
pileupElements.add(new PileupElement(left, pos - leftStart));
|
||||||
|
|
||||||
|
if ( pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd() ) {
|
||||||
|
pileupElements.add(new PileupElement(right, pos - rightStart));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Collections.sort(pileupElements);
|
||||||
|
return new ReadBackedPileupImpl(loc, pileupElements);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -214,54 +214,6 @@ public class ReadUtils {
|
||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* s1 e1
|
|
||||||
* |-----------------------> [record in hand]
|
|
||||||
* s2
|
|
||||||
* <-----------------------|
|
|
||||||
*
|
|
||||||
* s1, e1, and s2 are all in the record. Assuming that s1 < s2 (we are the left most read),
|
|
||||||
* we can compute whether we overlap with our mate by seeing if s2 <= e1 or no. If e1 <
|
|
||||||
* s2 then we known that we cannot over.
|
|
||||||
*
|
|
||||||
* If we are looking at the right read
|
|
||||||
*
|
|
||||||
* s1
|
|
||||||
* |----------------------->
|
|
||||||
* s2 e2
|
|
||||||
* <-----------------------| [record in hand]
|
|
||||||
*
|
|
||||||
* we know the position of s1 and s2, but we don't know e1, so we cannot tell if we
|
|
||||||
* overlap with our mate or not, so in this case we return MAYBE.
|
|
||||||
*
|
|
||||||
* Note that if rec has an unmapped mate or is unpaired we certainly know the answer
|
|
||||||
*
|
|
||||||
* @param rec
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public static ReadOverlapsMateType readMightOverlapMate(final SAMRecord rec) {
|
|
||||||
if ( ! rec.getReadPairedFlag() || rec.getMateUnmappedFlag() ) {
|
|
||||||
return ReadOverlapsMateType.NO;
|
|
||||||
} else { // read is actually paired
|
|
||||||
final int recStart = rec.getAlignmentStart();
|
|
||||||
final int recEnd = rec.getAlignmentEnd();
|
|
||||||
final int mateStart = rec.getMateAlignmentStart();
|
|
||||||
|
|
||||||
if ( recStart < mateStart ) {
|
|
||||||
// we are the left most read
|
|
||||||
return mateStart <= recEnd ? ReadOverlapsMateType.LEFT_YES: ReadOverlapsMateType.NO;
|
|
||||||
} else if ( recStart == mateStart ) {
|
|
||||||
// we are the left most read
|
|
||||||
return ReadOverlapsMateType.SAME_START;
|
|
||||||
} else {
|
|
||||||
// we are the right most read, so we cannot tell
|
|
||||||
return ReadOverlapsMateType.RIGHT_MAYBE;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public enum ReadOverlapsMateType { LEFT_YES, NO, SAME_START, RIGHT_MAYBE }
|
|
||||||
|
|
||||||
private static Pair<Integer, Integer> getAdaptorBoundaries(SAMRecord rec, int adaptorLength) {
|
private static Pair<Integer, Integer> getAdaptorBoundaries(SAMRecord rec, int adaptorLength) {
|
||||||
int isize = rec.getInferredInsertSize();
|
int isize = rec.getInferredInsertSize();
|
||||||
if ( isize == 0 )
|
if ( isize == 0 )
|
||||||
|
|
|
||||||
|
|
@ -53,38 +53,12 @@ public class FragmentPileupBenchmark extends SimpleBenchmark {
|
||||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
|
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
|
||||||
GenomeLocParser genomeLocParser;
|
GenomeLocParser genomeLocParser;
|
||||||
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||||
final int pos = 50;
|
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 50);
|
||||||
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", pos);
|
|
||||||
|
|
||||||
final Random ran = new Random();
|
|
||||||
final int readLen = 100;
|
final int readLen = 100;
|
||||||
final boolean leftIsFirst = true;
|
|
||||||
final boolean leftIsNegative = false;
|
|
||||||
final int insertSizeVariation = insertSize / 10;
|
|
||||||
|
|
||||||
for ( int pileupN = 0; pileupN < nPileupsToGenerate; pileupN++ ) {
|
for ( int pileupN = 0; pileupN < nPileupsToGenerate; pileupN++ ) {
|
||||||
List<PileupElement> pileupElements = new ArrayList<PileupElement>();
|
ReadBackedPileup rbp = ArtificialSAMUtils.createReadBackedPileup(header, loc, readLen, insertSize, pileupSize);
|
||||||
for ( int i = 0; i < pileupSize / 2; i++ ) {
|
pileups.add(rbp);
|
||||||
final String readName = "read" + i;
|
|
||||||
final int leftStart = new Random().nextInt(49) + 1;
|
|
||||||
final int fragmentSize = (int)(ran.nextGaussian() * insertSizeVariation + insertSize);
|
|
||||||
final int rightStart = leftStart + fragmentSize - readLen;
|
|
||||||
|
|
||||||
if ( rightStart <= 0 ) continue;
|
|
||||||
|
|
||||||
List<SAMRecord> pair = FragmentPileupUnitTest.createPair(header, readName, readLen, leftStart, rightStart, leftIsFirst, leftIsNegative);
|
|
||||||
SAMRecord left = pair.get(0);
|
|
||||||
SAMRecord right = pair.get(1);
|
|
||||||
|
|
||||||
pileupElements.add(new PileupElement(left, pos - leftStart));
|
|
||||||
|
|
||||||
if ( pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd() ) {
|
|
||||||
pileupElements.add(new PileupElement(right, pos - rightStart));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Collections.sort(pileupElements);
|
|
||||||
pileups.add(new ReadBackedPileupImpl(loc, pileupElements));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -100,18 +74,10 @@ public class FragmentPileupBenchmark extends SimpleBenchmark {
|
||||||
run(rep, FragmentPileup.FragmentMatchingAlgorithm.ORIGINAL);
|
run(rep, FragmentPileup.FragmentMatchingAlgorithm.ORIGINAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void timeFullOverlapPotential(int rep) {
|
|
||||||
run(rep, FragmentPileup.FragmentMatchingAlgorithm.FAST_V1);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void timeSkipNonOverlapping(int rep) {
|
public void timeSkipNonOverlapping(int rep) {
|
||||||
run(rep, FragmentPileup.FragmentMatchingAlgorithm.skipNonOverlapping);
|
run(rep, FragmentPileup.FragmentMatchingAlgorithm.skipNonOverlapping);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void timeSkipNonOverlappingNotLazy(int rep) {
|
|
||||||
run(rep, FragmentPileup.FragmentMatchingAlgorithm.skipNonOverlappingNotLazy);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
CaliperMain.main(FragmentPileupBenchmark.class, args);
|
CaliperMain.main(FragmentPileupBenchmark.class, args);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -43,44 +43,13 @@ import java.util.*;
|
||||||
public class FragmentPileupUnitTest extends BaseTest {
|
public class FragmentPileupUnitTest extends BaseTest {
|
||||||
private static SAMFileHeader header;
|
private static SAMFileHeader header;
|
||||||
|
|
||||||
public final static List<SAMRecord> createPair(SAMFileHeader header, String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) {
|
|
||||||
SAMRecord left = ArtificialSAMUtils.createArtificialRead(header, name, 0, leftStart, readLen);
|
|
||||||
SAMRecord right = ArtificialSAMUtils.createArtificialRead(header, name, 0, rightStart, readLen);
|
|
||||||
|
|
||||||
left.setReadPairedFlag(true);
|
|
||||||
right.setReadPairedFlag(true);
|
|
||||||
|
|
||||||
left.setProperPairFlag(true);
|
|
||||||
right.setProperPairFlag(true);
|
|
||||||
|
|
||||||
left.setFirstOfPairFlag(leftIsFirst);
|
|
||||||
right.setFirstOfPairFlag(! leftIsFirst);
|
|
||||||
|
|
||||||
left.setReadNegativeStrandFlag(leftIsNegative);
|
|
||||||
left.setMateNegativeStrandFlag(!leftIsNegative);
|
|
||||||
right.setReadNegativeStrandFlag(!leftIsNegative);
|
|
||||||
right.setMateNegativeStrandFlag(leftIsNegative);
|
|
||||||
|
|
||||||
left.setMateAlignmentStart(right.getAlignmentStart());
|
|
||||||
right.setMateAlignmentStart(left.getAlignmentStart());
|
|
||||||
|
|
||||||
left.setMateReferenceIndex(0);
|
|
||||||
right.setMateReferenceIndex(0);
|
|
||||||
|
|
||||||
int isize = rightStart + readLen - leftStart;
|
|
||||||
left.setInferredInsertSize(isize);
|
|
||||||
right.setInferredInsertSize(-isize);
|
|
||||||
|
|
||||||
return Arrays.asList(left, right);
|
|
||||||
}
|
|
||||||
|
|
||||||
private class FragmentPileupTest extends TestDataProvider {
|
private class FragmentPileupTest extends TestDataProvider {
|
||||||
List<TestState> states = new ArrayList<TestState>();
|
List<TestState> states = new ArrayList<TestState>();
|
||||||
|
|
||||||
private FragmentPileupTest(String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) {
|
private FragmentPileupTest(String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) {
|
||||||
super(FragmentPileupTest.class, String.format("%s-leftIsFirst:%b-leftIsNegative:%b", name, leftIsFirst, leftIsNegative));
|
super(FragmentPileupTest.class, String.format("%s-leftIsFirst:%b-leftIsNegative:%b", name, leftIsFirst, leftIsNegative));
|
||||||
|
|
||||||
List<SAMRecord> pair = createPair(header, "readpair", readLen, leftStart, rightStart, leftIsFirst, leftIsNegative);
|
List<SAMRecord> pair = ArtificialSAMUtils.createPair(header, "readpair", readLen, leftStart, rightStart, leftIsFirst, leftIsNegative);
|
||||||
SAMRecord left = pair.get(0);
|
SAMRecord left = pair.get(0);
|
||||||
SAMRecord right = pair.get(1);
|
SAMRecord right = pair.get(1);
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue