diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java index f9a94989b..e811db355 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java @@ -30,6 +30,13 @@ public class FragmentPileup { Collection oneReadPile = null; Collection twoReadPile = null; + public enum FragmentMatchingAlgorithm { + ORIGINAL, + FAST_V1, + skipNonOverlapping, + skipNonOverlappingNotLazy + } + /** * Create a new Fragment-based pileup from the standard read-based pileup * @param pileup @@ -39,15 +46,17 @@ public class FragmentPileup { fastNewCalculation(pileup); } - protected FragmentPileup(ReadBackedPileup pileup, boolean useOldAlgorithm) { - if ( useOldAlgorithm ) - oldSlowCalculation(pileup); - else - fastNewCalculation(pileup); + protected FragmentPileup(ReadBackedPileup pileup, FragmentMatchingAlgorithm algorithm) { + switch ( algorithm ) { + case ORIGINAL: oldSlowCalculation(pileup); break; + case FAST_V1: fastNewCalculation(pileup); break; + case skipNonOverlapping: skipNonOverlapping(pileup); break; + case skipNonOverlappingNotLazy: skipNonOverlappingNotLazy(pileup); break; + } } private final void oldSlowCalculation(final ReadBackedPileup pileup) { - final Map nameMap = new HashMap(); + final Map nameMap = new HashMap(pileup.size()); // build an initial map, grabbing all of the multi-read fragments for ( final PileupElement p : pileup ) { @@ -104,7 +113,6 @@ public class FragmentPileup { break; } - // in this case we need to see if our mate is already present, and if so // grab the read from the list case RIGHT_MAYBE: { @@ -120,9 +128,74 @@ public class FragmentPileup { } } - if ( nameMap != null && ! nameMap.isEmpty() ) // could be slightly more optimally - for ( final PileupElement p : nameMap.values() ) - addToOnePile(p); + if ( nameMap != null && ! nameMap.isEmpty() ) { + if ( oneReadPile == null ) + oneReadPile = nameMap.values(); + else + oneReadPile.addAll(nameMap.values()); + } + } + + /** + * @param pileup + */ + private final void skipNonOverlappingNotLazy(final ReadBackedPileup pileup) { + oneReadPile = new ArrayList(pileup.size()); + twoReadPile = new ArrayList(); + final Map nameMap = new HashMap(pileup.size()); + + // build an initial map, grabbing all of the multi-read fragments + for ( final PileupElement p : pileup ) { + // if we know that this read won't overlap its mate, or doesn't have one, jump out early + final SAMRecord read = p.getRead(); + final int mateStart = read.getMateAlignmentStart(); + if ( mateStart == 0 || mateStart > read.getAlignmentEnd() ) { + oneReadPile.add(p); + } else { + final String readName = p.getRead().getReadName(); + final PileupElement pe1 = nameMap.get(readName); + if ( pe1 != null ) { + // assumes we have at most 2 reads per fragment + twoReadPile.add(new TwoReadPileupElement(pe1, p)); + nameMap.remove(readName); + } else { + nameMap.put(readName, p); + } + } + } + + oneReadPile.addAll(nameMap.values()); + } + + private final void skipNonOverlapping(final ReadBackedPileup pileup) { + Map nameMap = null; + + // build an initial map, grabbing all of the multi-read fragments + for ( final PileupElement p : pileup ) { + // if we know that this read won't overlap its mate, or doesn't have one, jump out early + final SAMRecord read = p.getRead(); + final int mateStart = read.getMateAlignmentStart(); + if ( mateStart == 0 || mateStart > read.getAlignmentEnd() ) { + if ( oneReadPile == null ) oneReadPile = new ArrayList(pileup.size()); + oneReadPile.add(p); + } else { + final String readName = p.getRead().getReadName(); + final PileupElement pe1 = nameMap == null ? null : nameMap.get(readName); + if ( pe1 != null ) { + // assumes we have at most 2 reads per fragment + if ( twoReadPile == null ) twoReadPile = new ArrayList(); + twoReadPile.add(new TwoReadPileupElement(pe1, p)); + nameMap.remove(readName); + } else { + nameMap = addToNameMap(nameMap, p); + } + } + } + + if ( oneReadPile == null ) + oneReadPile = nameMap == null ? Collections.emptyList() : nameMap.values(); + else if ( nameMap != null ) + oneReadPile.addAll(nameMap.values()); } private final Map addToNameMap(Map map, final PileupElement p) { diff --git a/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupBenchmark.java b/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupBenchmark.java index fe1ca305f..6a5378d46 100644 --- a/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupBenchmark.java +++ b/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupBenchmark.java @@ -39,16 +39,17 @@ import java.util.*; * Caliper microbenchmark of fragment pileup */ public class FragmentPileupBenchmark extends SimpleBenchmark { - final int N_PILEUPS_TO_GENERATE = 100; - List pileups = new ArrayList(N_PILEUPS_TO_GENERATE); + List pileups; - @Param({"10", "100", "1000"}) // , "10000"}) + @Param({"0", "4", "30", "150", "1000"}) int pileupSize; // set automatically by framework - @Param({"150", "400"}) + @Param({"200", "400"}) int insertSize; // set automatically by framework @Override protected void setUp() { + final int nPileupsToGenerate = 100; + pileups = new ArrayList(nPileupsToGenerate); SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); GenomeLocParser genomeLocParser; genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); @@ -61,7 +62,7 @@ public class FragmentPileupBenchmark extends SimpleBenchmark { final boolean leftIsNegative = false; final int insertSizeVariation = insertSize / 10; - for ( int pileupN = 0; pileupN < N_PILEUPS_TO_GENERATE; pileupN++ ) { + for ( int pileupN = 0; pileupN < nPileupsToGenerate; pileupN++ ) { List pileupElements = new ArrayList(); for ( int i = 0; i < pileupSize / 2; i++ ) { final String readName = "read" + i; @@ -87,19 +88,28 @@ public class FragmentPileupBenchmark extends SimpleBenchmark { } } - public void timeNaiveNameMatch(int rep) { + private void run(int rep, FragmentPileup.FragmentMatchingAlgorithm algorithm) { int nFrags = 0; for ( int i = 0; i < rep; i++ ) { for ( ReadBackedPileup rbp : pileups ) - nFrags += new FragmentPileup(rbp, true).getTwoReadPileup().size(); + nFrags += new FragmentPileup(rbp, algorithm).getTwoReadPileup().size(); } } - public void timeFastNameMatch(int rep) { - int nFrags = 0; - for ( int i = 0; i < rep; i++ ) - for ( ReadBackedPileup rbp : pileups ) - nFrags += new FragmentPileup(rbp, false).getTwoReadPileup().size(); + public void timeOriginal(int rep) { + run(rep, FragmentPileup.FragmentMatchingAlgorithm.ORIGINAL); + } + + public void timeFullOverlapPotential(int rep) { + run(rep, FragmentPileup.FragmentMatchingAlgorithm.FAST_V1); + } + + public void timeSkipNonOverlapping(int rep) { + run(rep, FragmentPileup.FragmentMatchingAlgorithm.skipNonOverlapping); + } + + public void timeSkipNonOverlappingNotLazy(int rep) { + run(rep, FragmentPileup.FragmentMatchingAlgorithm.skipNonOverlappingNotLazy); } public static void main(String[] args) {