Initial version of "fast" FragmentPileup code
-- Uses mayOverlapRoutine in ReadUtils -- Attempts to be smart when doing overlap calculation, to avoid unnecessary allocations -- PileupElement now comparable (sorts on offset than on start) -- Caliper microbenchmark to assess performance
This commit is contained in:
parent
b863390cb1
commit
42bf9adede
|
|
@ -1,9 +1,10 @@
|
||||||
package org.broadinstitute.sting.utils.pileup;
|
package org.broadinstitute.sting.utils.pileup;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import net.sf.samtools.SAMRecord;
|
||||||
import java.util.Collection;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import java.util.HashMap;
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
import java.util.Map;
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An easy to access fragment-based pileup, which contains two separate pileups. The first
|
* An easy to access fragment-based pileup, which contains two separate pileups. The first
|
||||||
|
|
@ -16,28 +17,46 @@ import java.util.Map;
|
||||||
* TODO -- technically we could generalize this code to support a pseudo-duplicate marking
|
* TODO -- technically we could generalize this code to support a pseudo-duplicate marking
|
||||||
* TODO -- algorithm that could collect all duplicates into a single super pileup element
|
* TODO -- algorithm that could collect all duplicates into a single super pileup element
|
||||||
*
|
*
|
||||||
|
* Oct 21: note that the order of the oneReadPileup and twoReadPileups are not
|
||||||
|
* defined. The algorithms that produce these lists are in fact producing
|
||||||
|
* lists of Pileup elements *NOT* sorted by alignment start position of the underlying
|
||||||
|
* reads.
|
||||||
|
*
|
||||||
* User: depristo
|
* User: depristo
|
||||||
* Date: 3/26/11
|
* Date: 3/26/11
|
||||||
* Time: 10:09 PM
|
* Time: 10:09 PM
|
||||||
*/
|
*/
|
||||||
public class FragmentPileup {
|
public class FragmentPileup {
|
||||||
final Collection<PileupElement> oneReadPile;
|
Collection<PileupElement> oneReadPile = null;
|
||||||
final Collection<TwoReadPileupElement> twoReadPile = new ArrayList<TwoReadPileupElement>();
|
Collection<TwoReadPileupElement> twoReadPile = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new Fragment-based pileup from the standard read-based pileup
|
* Create a new Fragment-based pileup from the standard read-based pileup
|
||||||
* @param pileup
|
* @param pileup
|
||||||
*/
|
*/
|
||||||
public FragmentPileup(ReadBackedPileup pileup) {
|
public FragmentPileup(ReadBackedPileup pileup) {
|
||||||
Map<String, PileupElement> nameMap = new HashMap<String, PileupElement>();
|
//oldSlowCalculation(pileup);
|
||||||
|
fastNewCalculation(pileup);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected FragmentPileup(ReadBackedPileup pileup, boolean useOldAlgorithm) {
|
||||||
|
if ( useOldAlgorithm )
|
||||||
|
oldSlowCalculation(pileup);
|
||||||
|
else
|
||||||
|
fastNewCalculation(pileup);
|
||||||
|
}
|
||||||
|
|
||||||
|
private final void oldSlowCalculation(final ReadBackedPileup pileup) {
|
||||||
|
final Map<String, PileupElement> nameMap = new HashMap<String, PileupElement>();
|
||||||
|
|
||||||
// build an initial map, grabbing all of the multi-read fragments
|
// build an initial map, grabbing all of the multi-read fragments
|
||||||
for ( PileupElement p : pileup ) {
|
for ( final PileupElement p : pileup ) {
|
||||||
String readName = p.getRead().getReadName();
|
final String readName = p.getRead().getReadName();
|
||||||
|
|
||||||
PileupElement pe1 = nameMap.get(readName);
|
final PileupElement pe1 = nameMap.get(readName);
|
||||||
if ( pe1 != null ) {
|
if ( pe1 != null ) {
|
||||||
// assumes we have at most 2 reads per fragment
|
// assumes we have at most 2 reads per fragment
|
||||||
|
if ( twoReadPile == null ) twoReadPile = new ArrayList<TwoReadPileupElement>();
|
||||||
twoReadPile.add(new TwoReadPileupElement(pe1, p));
|
twoReadPile.add(new TwoReadPileupElement(pe1, p));
|
||||||
nameMap.remove(readName);
|
nameMap.remove(readName);
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -45,17 +64,96 @@ public class FragmentPileup {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// now set the one Read pile to the values in the nameMap with only a single read
|
|
||||||
oneReadPile = nameMap.values();
|
oneReadPile = nameMap.values();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param pileup
|
||||||
|
*/
|
||||||
|
private final void fastNewCalculation(final ReadBackedPileup pileup) {
|
||||||
|
Map<String, PileupElement> nameMap = null; // lazy initialization
|
||||||
|
|
||||||
|
for ( final PileupElement p : pileup ) {
|
||||||
|
final SAMRecord read = p.getRead();
|
||||||
|
|
||||||
|
switch (ReadUtils.readMightOverlapMate(read) ) {
|
||||||
|
// we know for certain this read doesn't have an overlapping mate
|
||||||
|
case NO: {
|
||||||
|
addToOnePile(p);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// we know that we overlap our mate, so put the read in the nameMap in
|
||||||
|
// case our mate shows up
|
||||||
|
case LEFT_YES: {
|
||||||
|
nameMap = addToNameMap(nameMap, p);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// read starts at the same position, so we are looking at either the first or
|
||||||
|
// the second read. In the first, add it to the map, in the second grab it
|
||||||
|
// from the map and create a fragment
|
||||||
|
case SAME_START: {
|
||||||
|
final PileupElement pe1 = getFromNameMap(nameMap, p);
|
||||||
|
if ( pe1 != null ) {
|
||||||
|
addToTwoPile(pe1, p);
|
||||||
|
nameMap.remove(p.getRead().getReadName());
|
||||||
|
} else {
|
||||||
|
nameMap = addToNameMap(nameMap, p);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// in this case we need to see if our mate is already present, and if so
|
||||||
|
// grab the read from the list
|
||||||
|
case RIGHT_MAYBE: {
|
||||||
|
final PileupElement pe1 = getFromNameMap(nameMap, p);
|
||||||
|
if ( pe1 != null ) {
|
||||||
|
addToTwoPile(pe1, p);
|
||||||
|
nameMap.remove(p.getRead().getReadName());
|
||||||
|
} else {
|
||||||
|
addToOnePile(p);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( nameMap != null && ! nameMap.isEmpty() ) // could be slightly more optimally
|
||||||
|
for ( final PileupElement p : nameMap.values() )
|
||||||
|
addToOnePile(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
private final Map<String, PileupElement> addToNameMap(Map<String, PileupElement> map, final PileupElement p) {
|
||||||
|
if ( map == null ) map = new HashMap<String, PileupElement>();
|
||||||
|
map.put(p.getRead().getReadName(), p);
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
private final PileupElement getFromNameMap(Map<String, PileupElement> map, final PileupElement p) {
|
||||||
|
return map == null ? null : map.get(p.getRead().getReadName());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private final void addToOnePile(final PileupElement p) {
|
||||||
|
if ( oneReadPile == null ) oneReadPile = new ArrayList<PileupElement>();
|
||||||
|
oneReadPile.add(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
private final void addToTwoPile(final PileupElement p1, final PileupElement p2) {
|
||||||
|
// assumes we have at most 2 reads per fragment
|
||||||
|
if ( twoReadPile == null ) twoReadPile = new ArrayList<TwoReadPileupElement>();
|
||||||
|
twoReadPile.add(new TwoReadPileupElement(p1, p2));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the pileup elements containing two reads, in no particular order
|
* Gets the pileup elements containing two reads, in no particular order
|
||||||
*
|
*
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public Collection<TwoReadPileupElement> getTwoReadPileup() {
|
public Collection<TwoReadPileupElement> getTwoReadPileup() {
|
||||||
return twoReadPile;
|
return twoReadPile == null ? Collections.<TwoReadPileupElement>emptyList() : twoReadPile;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -64,7 +162,7 @@ public class FragmentPileup {
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public Collection<PileupElement> getOneReadPileup() {
|
public Collection<PileupElement> getOneReadPileup() {
|
||||||
return oneReadPile;
|
return oneReadPile == null ? Collections.<PileupElement>emptyList() : oneReadPile;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
* Date: Apr 14, 2009
|
* Date: Apr 14, 2009
|
||||||
* Time: 8:54:05 AM
|
* Time: 8:54:05 AM
|
||||||
*/
|
*/
|
||||||
public class PileupElement {
|
public class PileupElement implements Comparable<PileupElement> {
|
||||||
public static final byte DELETION_BASE = BaseUtils.D;
|
public static final byte DELETION_BASE = BaseUtils.D;
|
||||||
public static final byte DELETION_QUAL = (byte) 16;
|
public static final byte DELETION_QUAL = (byte) 16;
|
||||||
public static final byte A_FOLLOWED_BY_INSERTION_BASE = (byte) 87;
|
public static final byte A_FOLLOWED_BY_INSERTION_BASE = (byte) 87;
|
||||||
|
|
@ -76,6 +76,20 @@ public class PileupElement {
|
||||||
return isDeletion() ? DELETION_QUAL : read.getBaseQualities()[offset];
|
return isDeletion() ? DELETION_QUAL : read.getBaseQualities()[offset];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareTo(final PileupElement pileupElement) {
|
||||||
|
if ( offset < pileupElement.offset )
|
||||||
|
return -1;
|
||||||
|
else if ( offset > pileupElement.offset )
|
||||||
|
return 1;
|
||||||
|
else if ( read.getAlignmentStart() < pileupElement.read.getAlignmentStart() )
|
||||||
|
return -1;
|
||||||
|
else if ( read.getAlignmentStart() > pileupElement.read.getAlignmentStart() )
|
||||||
|
return 1;
|
||||||
|
else
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
// --------------------------------------------------------------------------
|
// --------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// Reduced read accessors
|
// Reduced read accessors
|
||||||
|
|
|
||||||
|
|
@ -214,6 +214,54 @@ public class ReadUtils {
|
||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* s1 e1
|
||||||
|
* |-----------------------> [record in hand]
|
||||||
|
* s2
|
||||||
|
* <-----------------------|
|
||||||
|
*
|
||||||
|
* s1, e1, and s2 are all in the record. Assuming that s1 < s2 (we are the left most read),
|
||||||
|
* we can compute whether we overlap with our mate by seeing if s2 <= e1 or no. If e1 <
|
||||||
|
* s2 then we known that we cannot over.
|
||||||
|
*
|
||||||
|
* If we are looking at the right read
|
||||||
|
*
|
||||||
|
* s1
|
||||||
|
* |----------------------->
|
||||||
|
* s2 e2
|
||||||
|
* <-----------------------| [record in hand]
|
||||||
|
*
|
||||||
|
* we know the position of s1 and s2, but we don't know e1, so we cannot tell if we
|
||||||
|
* overlap with our mate or not, so in this case we return MAYBE.
|
||||||
|
*
|
||||||
|
* Note that if rec has an unmapped mate or is unpaired we certainly know the answer
|
||||||
|
*
|
||||||
|
* @param rec
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public static ReadOverlapsMateType readMightOverlapMate(final SAMRecord rec) {
|
||||||
|
if ( ! rec.getReadPairedFlag() || rec.getMateUnmappedFlag() ) {
|
||||||
|
return ReadOverlapsMateType.NO;
|
||||||
|
} else { // read is actually paired
|
||||||
|
final int recStart = rec.getAlignmentStart();
|
||||||
|
final int recEnd = rec.getAlignmentEnd();
|
||||||
|
final int mateStart = rec.getMateAlignmentStart();
|
||||||
|
|
||||||
|
if ( recStart < mateStart ) {
|
||||||
|
// we are the left most read
|
||||||
|
return mateStart <= recEnd ? ReadOverlapsMateType.LEFT_YES: ReadOverlapsMateType.NO;
|
||||||
|
} else if ( recStart == mateStart ) {
|
||||||
|
// we are the left most read
|
||||||
|
return ReadOverlapsMateType.SAME_START;
|
||||||
|
} else {
|
||||||
|
// we are the right most read, so we cannot tell
|
||||||
|
return ReadOverlapsMateType.RIGHT_MAYBE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public enum ReadOverlapsMateType { LEFT_YES, NO, SAME_START, RIGHT_MAYBE }
|
||||||
|
|
||||||
private static Pair<Integer, Integer> getAdaptorBoundaries(SAMRecord rec, int adaptorLength) {
|
private static Pair<Integer, Integer> getAdaptorBoundaries(SAMRecord rec, int adaptorLength) {
|
||||||
int isize = rec.getInferredInsertSize();
|
int isize = rec.getInferredInsertSize();
|
||||||
if ( isize == 0 )
|
if ( isize == 0 )
|
||||||
|
|
|
||||||
|
|
@ -224,7 +224,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
" -o %s" +
|
" -o %s" +
|
||||||
" -L 1:10,000,000-10,500,000",
|
" -L 1:10,000,000-10,500,000",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("0bece77ce6bc447438ef9b2921b2dc41"));
|
Arrays.asList("eeba568272f9b42d5450da75c7cc6d2d"));
|
||||||
|
|
||||||
executeTest(String.format("test indel caller in SLX"), spec);
|
executeTest(String.format("test indel caller in SLX"), spec);
|
||||||
}
|
}
|
||||||
|
|
@ -252,7 +252,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
" -o %s" +
|
" -o %s" +
|
||||||
" -L 1:10,000,000-10,500,000",
|
" -L 1:10,000,000-10,500,000",
|
||||||
1,
|
1,
|
||||||
Arrays.asList("790b1a1d6ab79eee8c24812bb8ca6fae"));
|
Arrays.asList("19ff9bd3139480bdf79dcbf117cf2b24"));
|
||||||
|
|
||||||
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
||||||
}
|
}
|
||||||
|
|
@ -262,7 +262,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||||
Arrays.asList("408d3aba4d094c067fc00a43992c2292"));
|
Arrays.asList("118918f2e9e56a3cfc5ccb2856d529c8"));
|
||||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1);
|
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -272,7 +272,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||||
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
+ validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||||
Arrays.asList("5e4e09354410b76fc0d822050d84132a"));
|
Arrays.asList("a20799237accd52c1b8c2ac096309c8f"));
|
||||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
|
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -282,7 +282,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
|
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation +
|
||||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
|
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1,
|
||||||
Arrays.asList("c599eedbeb422713b8a28529e805e4ae"));
|
Arrays.asList("18ef8181157b4ac3eb8492f538467f92"));
|
||||||
executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
|
executeTest("test MultiSample Pilot2 indels with complicated records", spec3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -291,7 +291,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
||||||
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
|
WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
|
||||||
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
|
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
|
||||||
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
|
"phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
|
||||||
Arrays.asList("37d908a682ac269f8f19dec939ff5b01"));
|
Arrays.asList("ad884e511a751b05e64db5314314365a"));
|
||||||
executeTest("test MultiSample 1000G Phase1 indels with complicated records emitting all sites", spec4);
|
executeTest("test MultiSample 1000G Phase1 indels with complicated records emitting all sites", spec4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,108 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.utils.pileup;
|
||||||
|
|
||||||
|
import com.google.caliper.Param;
|
||||||
|
import com.google.caliper.SimpleBenchmark;
|
||||||
|
import com.google.caliper.runner.CaliperMain;
|
||||||
|
import net.sf.samtools.SAMFileHeader;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Caliper microbenchmark of fragment pileup
|
||||||
|
*/
|
||||||
|
public class FragmentPileupBenchmark extends SimpleBenchmark {
|
||||||
|
final int N_PILEUPS_TO_GENERATE = 100;
|
||||||
|
List<ReadBackedPileup> pileups = new ArrayList<ReadBackedPileup>(N_PILEUPS_TO_GENERATE);
|
||||||
|
|
||||||
|
@Param({"10", "100", "1000"}) // , "10000"})
|
||||||
|
int pileupSize; // set automatically by framework
|
||||||
|
|
||||||
|
@Param({"150", "400"})
|
||||||
|
int insertSize; // set automatically by framework
|
||||||
|
|
||||||
|
@Override protected void setUp() {
|
||||||
|
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
|
||||||
|
GenomeLocParser genomeLocParser;
|
||||||
|
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||||
|
final int pos = 50;
|
||||||
|
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", pos);
|
||||||
|
|
||||||
|
final Random ran = new Random();
|
||||||
|
final int readLen = 100;
|
||||||
|
final boolean leftIsFirst = true;
|
||||||
|
final boolean leftIsNegative = false;
|
||||||
|
final int insertSizeVariation = insertSize / 10;
|
||||||
|
|
||||||
|
for ( int pileupN = 0; pileupN < N_PILEUPS_TO_GENERATE; pileupN++ ) {
|
||||||
|
List<PileupElement> pileupElements = new ArrayList<PileupElement>();
|
||||||
|
for ( int i = 0; i < pileupSize / 2; i++ ) {
|
||||||
|
final String readName = "read" + i;
|
||||||
|
final int leftStart = new Random().nextInt(49) + 1;
|
||||||
|
final int fragmentSize = (int)(ran.nextGaussian() * insertSizeVariation + insertSize);
|
||||||
|
final int rightStart = leftStart + fragmentSize - readLen;
|
||||||
|
|
||||||
|
if ( rightStart <= 0 ) continue;
|
||||||
|
|
||||||
|
List<SAMRecord> pair = FragmentPileupUnitTest.createPair(header, readName, readLen, leftStart, rightStart, leftIsFirst, leftIsNegative);
|
||||||
|
SAMRecord left = pair.get(0);
|
||||||
|
SAMRecord right = pair.get(1);
|
||||||
|
|
||||||
|
pileupElements.add(new PileupElement(left, pos - leftStart));
|
||||||
|
|
||||||
|
if ( pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd() ) {
|
||||||
|
pileupElements.add(new PileupElement(right, pos - rightStart));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Collections.sort(pileupElements);
|
||||||
|
pileups.add(new ReadBackedPileupImpl(loc, pileupElements));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void timeNaiveNameMatch(int rep) {
|
||||||
|
int nFrags = 0;
|
||||||
|
for ( int i = 0; i < rep; i++ ) {
|
||||||
|
for ( ReadBackedPileup rbp : pileups )
|
||||||
|
nFrags += new FragmentPileup(rbp, true).getTwoReadPileup().size();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void timeFastNameMatch(int rep) {
|
||||||
|
int nFrags = 0;
|
||||||
|
for ( int i = 0; i < rep; i++ )
|
||||||
|
for ( ReadBackedPileup rbp : pileups )
|
||||||
|
nFrags += new FragmentPileup(rbp, false).getTwoReadPileup().size();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
CaliperMain.main(FragmentPileupBenchmark.class, args);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -43,37 +43,48 @@ import java.util.*;
|
||||||
public class FragmentPileupUnitTest extends BaseTest {
|
public class FragmentPileupUnitTest extends BaseTest {
|
||||||
private static SAMFileHeader header;
|
private static SAMFileHeader header;
|
||||||
|
|
||||||
|
public final static List<SAMRecord> createPair(SAMFileHeader header, String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) {
|
||||||
|
SAMRecord left = ArtificialSAMUtils.createArtificialRead(header, name, 0, leftStart, readLen);
|
||||||
|
SAMRecord right = ArtificialSAMUtils.createArtificialRead(header, name, 0, rightStart, readLen);
|
||||||
|
|
||||||
|
left.setReadPairedFlag(true);
|
||||||
|
right.setReadPairedFlag(true);
|
||||||
|
|
||||||
|
left.setProperPairFlag(true);
|
||||||
|
right.setProperPairFlag(true);
|
||||||
|
|
||||||
|
left.setFirstOfPairFlag(leftIsFirst);
|
||||||
|
right.setFirstOfPairFlag(! leftIsFirst);
|
||||||
|
|
||||||
|
left.setReadNegativeStrandFlag(leftIsNegative);
|
||||||
|
left.setMateNegativeStrandFlag(!leftIsNegative);
|
||||||
|
right.setReadNegativeStrandFlag(!leftIsNegative);
|
||||||
|
right.setMateNegativeStrandFlag(leftIsNegative);
|
||||||
|
|
||||||
|
left.setMateAlignmentStart(right.getAlignmentStart());
|
||||||
|
right.setMateAlignmentStart(left.getAlignmentStart());
|
||||||
|
|
||||||
|
left.setMateReferenceIndex(0);
|
||||||
|
right.setMateReferenceIndex(0);
|
||||||
|
|
||||||
|
int isize = rightStart + readLen - leftStart;
|
||||||
|
left.setInferredInsertSize(isize);
|
||||||
|
right.setInferredInsertSize(-isize);
|
||||||
|
|
||||||
|
return Arrays.asList(left, right);
|
||||||
|
}
|
||||||
|
|
||||||
private class FragmentPileupTest extends TestDataProvider {
|
private class FragmentPileupTest extends TestDataProvider {
|
||||||
List<TestState> states = new ArrayList<TestState>();
|
List<TestState> states = new ArrayList<TestState>();
|
||||||
|
|
||||||
private FragmentPileupTest(String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) {
|
private FragmentPileupTest(String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) {
|
||||||
super(FragmentPileupTest.class, String.format("%s-leftIsFirst:%b-leftIsNegative:%b", name, leftIsFirst, leftIsNegative));
|
super(FragmentPileupTest.class, String.format("%s-leftIsFirst:%b-leftIsNegative:%b", name, leftIsFirst, leftIsNegative));
|
||||||
|
|
||||||
|
List<SAMRecord> pair = createPair(header, "readpair", readLen, leftStart, rightStart, leftIsFirst, leftIsNegative);
|
||||||
|
SAMRecord left = pair.get(0);
|
||||||
|
SAMRecord right = pair.get(1);
|
||||||
|
|
||||||
for ( int pos = leftStart; pos < rightStart + readLen; pos++) {
|
for ( int pos = leftStart; pos < rightStart + readLen; pos++) {
|
||||||
SAMRecord left = ArtificialSAMUtils.createArtificialRead(header, "readpair", 0, leftStart, readLen);
|
|
||||||
SAMRecord right = ArtificialSAMUtils.createArtificialRead(header, "readpair", 0, rightStart, readLen);
|
|
||||||
|
|
||||||
left.setProperPairFlag(true);
|
|
||||||
right.setProperPairFlag(true);
|
|
||||||
|
|
||||||
left.setFirstOfPairFlag(leftIsFirst);
|
|
||||||
right.setFirstOfPairFlag(! leftIsFirst);
|
|
||||||
|
|
||||||
left.setReadNegativeStrandFlag(leftIsNegative);
|
|
||||||
left.setMateNegativeStrandFlag(!leftIsNegative);
|
|
||||||
right.setReadNegativeStrandFlag(!leftIsNegative);
|
|
||||||
right.setMateNegativeStrandFlag(leftIsNegative);
|
|
||||||
|
|
||||||
left.setMateAlignmentStart(right.getAlignmentStart());
|
|
||||||
right.setMateAlignmentStart(left.getAlignmentStart());
|
|
||||||
|
|
||||||
left.setMateReferenceIndex(0);
|
|
||||||
right.setMateReferenceIndex(0);
|
|
||||||
|
|
||||||
int isize = rightStart + readLen - leftStart;
|
|
||||||
left.setInferredInsertSize(isize);
|
|
||||||
right.setInferredInsertSize(-isize);
|
|
||||||
|
|
||||||
boolean posCoveredByLeft = pos >= left.getAlignmentStart() && pos <= left.getAlignmentEnd();
|
boolean posCoveredByLeft = pos >= left.getAlignmentStart() && pos <= left.getAlignmentEnd();
|
||||||
boolean posCoveredByRight = pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd();
|
boolean posCoveredByRight = pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd();
|
||||||
|
|
||||||
|
|
@ -139,7 +150,6 @@ public class FragmentPileupUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@BeforeTest
|
@BeforeTest
|
||||||
public void setup() {
|
public void setup() {
|
||||||
header = ArtificialSAMUtils.createArtificialSamHeader(1,1,1000);
|
header = ArtificialSAMUtils.createArtificialSamHeader(1,1,1000);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue