From 7fa943aef14c55797460170f663b70ef89aeece3 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 26 Oct 2011 14:01:45 -0400 Subject: [PATCH 1/5] Renamed FragmentPileup to FragmentUtils --- .../genotyper/DiploidSNPGenotypeLikelihoods.java | 8 ++++---- .../{pileup/FragmentPileup.java => FragmentUtils.java} | 10 ++++++---- .../sting/utils/pileup/FragmentPileupBenchmark.java | 10 +++++----- .../sting/utils/pileup/FragmentPileupUnitTest.java | 5 ++--- 4 files changed, 17 insertions(+), 16 deletions(-) rename public/java/src/org/broadinstitute/sting/utils/{pileup/FragmentPileup.java => FragmentUtils.java} (94%) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java index 71eea2467..6b57bfc42 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java @@ -27,10 +27,10 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import net.sf.samtools.SAMUtils; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.FragmentUtils; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.pileup.FragmentPileup; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -259,12 +259,12 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable { int n = 0; // for each fragment, add to the likelihoods - FragmentPileup fpile = new FragmentPileup(pileup); + FragmentUtils fpile = new FragmentUtils(pileup); for ( PileupElement p : fpile.getOneReadPileup() ) n += add(p, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual); - for ( FragmentPileup.TwoReadPileupElement twoRead : fpile.getTwoReadPileup() ) + for ( FragmentUtils.TwoReadPileupElement twoRead : fpile.getTwoReadPileup() ) n += add(twoRead, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual); return n; @@ -286,7 +286,7 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable { } } - public int add(FragmentPileup.TwoReadPileupElement twoRead, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual) { + public int add(FragmentUtils.TwoReadPileupElement twoRead, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual) { final byte observedBase1 = twoRead.getFirst().getBase(); final byte qualityScore1 = qualToUse(twoRead.getFirst(), ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual); final byte observedBase2 = twoRead.getSecond().getBase(); diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java b/public/java/src/org/broadinstitute/sting/utils/FragmentUtils.java similarity index 94% rename from public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java rename to public/java/src/org/broadinstitute/sting/utils/FragmentUtils.java index 4eda7c7cd..60367e42a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/FragmentPileup.java +++ b/public/java/src/org/broadinstitute/sting/utils/FragmentUtils.java @@ -1,6 +1,8 @@ -package org.broadinstitute.sting.utils.pileup; +package org.broadinstitute.sting.utils; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; @@ -23,7 +25,7 @@ import java.util.*; * Date: 3/26/11 * Time: 10:09 PM */ -public class FragmentPileup { +public class FragmentUtils { Collection oneReadPile = null; Collection twoReadPile = null; @@ -36,12 +38,12 @@ public class FragmentPileup { * Create a new Fragment-based pileup from the standard read-based pileup * @param pileup */ - public FragmentPileup(ReadBackedPileup pileup) { + public FragmentUtils(ReadBackedPileup pileup) { skipNonOverlapping(pileup); } /** For performance testing only */ - protected FragmentPileup(ReadBackedPileup pileup, FragmentMatchingAlgorithm algorithm) { + protected FragmentUtils(ReadBackedPileup pileup, FragmentMatchingAlgorithm algorithm) { switch ( algorithm ) { case ORIGINAL: oldSlowCalculation(pileup); break; case skipNonOverlapping: skipNonOverlapping(pileup); break; diff --git a/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupBenchmark.java b/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupBenchmark.java index 8b797def4..60ed358c8 100644 --- a/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupBenchmark.java +++ b/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupBenchmark.java @@ -28,7 +28,7 @@ import com.google.caliper.Param; import com.google.caliper.SimpleBenchmark; import com.google.caliper.runner.CaliperMain; import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.FragmentUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; @@ -62,20 +62,20 @@ public class FragmentPileupBenchmark extends SimpleBenchmark { } } - private void run(int rep, FragmentPileup.FragmentMatchingAlgorithm algorithm) { + private void run(int rep, FragmentUtils.FragmentMatchingAlgorithm algorithm) { int nFrags = 0; for ( int i = 0; i < rep; i++ ) { for ( ReadBackedPileup rbp : pileups ) - nFrags += new FragmentPileup(rbp, algorithm).getTwoReadPileup().size(); + nFrags += new FragmentUtils(rbp, algorithm).getTwoReadPileup().size(); } } public void timeOriginal(int rep) { - run(rep, FragmentPileup.FragmentMatchingAlgorithm.ORIGINAL); + run(rep, FragmentUtils.FragmentMatchingAlgorithm.ORIGINAL); } public void timeSkipNonOverlapping(int rep) { - run(rep, FragmentPileup.FragmentMatchingAlgorithm.skipNonOverlapping); + run(rep, FragmentUtils.FragmentMatchingAlgorithm.skipNonOverlapping); } public static void main(String[] args) { diff --git a/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupUnitTest.java index c42c01c65..431777a42 100644 --- a/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupUnitTest.java @@ -25,10 +25,9 @@ package org.broadinstitute.sting.utils.pileup; import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.FragmentUtils; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.testng.Assert; import org.testng.annotations.BeforeTest; @@ -113,7 +112,7 @@ public class FragmentPileupUnitTest extends BaseTest { public void testMe(FragmentPileupTest test) { for ( TestState testState : test.states ) { ReadBackedPileup rbp = testState.pileup; - FragmentPileup fp = new FragmentPileup(rbp); + FragmentUtils fp = new FragmentUtils(rbp); Assert.assertEquals(fp.getTwoReadPileup().size(), testState.shouldBeFragment ? 1 : 0); Assert.assertEquals(fp.getOneReadPileup().size(), testState.shouldBeFragment ? 0 : 1); } From 034a997d07914146869b9289090f56d74da22cb2 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 26 Oct 2011 15:31:24 -0400 Subject: [PATCH 2/5] Generalized Reads -> Fragment calculation -- Supports ReadBackedPileup -> FragmentCollection as before -- Added support for List -> FragmentCollection for Ryan's haplotype caller -- General cleanup, renaming, move to separate package, more extensive unit tests, etc. -- Added toFragment() function to ReadBackedPileup interface --- .../DiploidSNPGenotypeLikelihoods.java | 27 +-- .../sting/utils/FragmentUtils.java | 155 ----------------- .../utils/fragments/FragmentCollection.java | 66 +++++++ .../sting/utils/fragments/FragmentUtils.java | 123 +++++++++++++ .../pileup/AbstractReadBackedPileup.java | 7 + .../sting/utils/pileup/ReadBackedPileup.java | 6 + .../FragmentUtilsBenchmark.java} | 24 ++- .../fragments/FragmentUtilsUnitTest.java | 164 ++++++++++++++++++ .../utils/pileup/FragmentPileupUnitTest.java | 125 ------------- 9 files changed, 393 insertions(+), 304 deletions(-) delete mode 100644 public/java/src/org/broadinstitute/sting/utils/FragmentUtils.java create mode 100644 public/java/src/org/broadinstitute/sting/utils/fragments/FragmentCollection.java create mode 100644 public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java rename public/java/test/org/broadinstitute/sting/utils/{pileup/FragmentPileupBenchmark.java => fragments/FragmentUtilsBenchmark.java} (81%) create mode 100644 public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java delete mode 100644 public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java index 6b57bfc42..525be9cb0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java @@ -27,13 +27,16 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import net.sf.samtools.SAMUtils; import org.broadinstitute.sting.utils.BaseUtils; -import org.broadinstitute.sting.utils.FragmentUtils; +import org.broadinstitute.sting.utils.fragments.FragmentCollection; +import org.broadinstitute.sting.utils.fragments.FragmentUtils; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import java.util.List; + import static java.lang.Math.log10; import static java.lang.Math.pow; @@ -259,16 +262,17 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable { int n = 0; // for each fragment, add to the likelihoods - FragmentUtils fpile = new FragmentUtils(pileup); + FragmentCollection fpile = pileup.toFragments(); - for ( PileupElement p : fpile.getOneReadPileup() ) + for ( PileupElement p : fpile.getSingletonReads() ) n += add(p, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual); - for ( FragmentUtils.TwoReadPileupElement twoRead : fpile.getTwoReadPileup() ) - n += add(twoRead, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual); + for ( List overlappingPair : fpile.getOverlappingPairs() ) + n += add(overlappingPair, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual); return n; } + public int add(PileupElement elt, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual) { byte obsBase = elt.getBase(); @@ -286,11 +290,14 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable { } } - public int add(FragmentUtils.TwoReadPileupElement twoRead, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual) { - final byte observedBase1 = twoRead.getFirst().getBase(); - final byte qualityScore1 = qualToUse(twoRead.getFirst(), ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual); - final byte observedBase2 = twoRead.getSecond().getBase(); - final byte qualityScore2 = qualToUse(twoRead.getSecond(), ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual); + public int add(List overlappingPair, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual) { + final PileupElement p1 = overlappingPair.get(0); + final PileupElement p2 = overlappingPair.get(1); + + final byte observedBase1 = p1.getBase(); + final byte qualityScore1 = qualToUse(p1, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual); + final byte observedBase2 = p2.getBase(); + final byte qualityScore2 = qualToUse(p2, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual); if ( qualityScore1 == 0 ) { if ( qualityScore2 == 0 ) // abort early if we didn't see any good bases diff --git a/public/java/src/org/broadinstitute/sting/utils/FragmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/FragmentUtils.java deleted file mode 100644 index 60367e42a..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/FragmentUtils.java +++ /dev/null @@ -1,155 +0,0 @@ -package org.broadinstitute.sting.utils; - -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.sam.ReadUtils; - -import java.util.*; - -/** - * An easy to access fragment-based pileup, which contains two separate pileups. The first - * is a regular collection of PileupElements containing all of the reads in the original RBP - * that uniquely info about a fragment. The second are TwoReadPileupElements that, as the - * name suggests, contain two reads that are sequenced from the same underlying fragment. - * - * Based on the original code by E. Banks - * - * Oct 21: note that the order of the oneReadPileup and twoReadPileups are not - * defined. The algorithms that produce these lists are in fact producing - * lists of Pileup elements *NOT* sorted by alignment start position of the underlying - * reads. - * - * User: depristo - * Date: 3/26/11 - * Time: 10:09 PM - */ -public class FragmentUtils { - Collection oneReadPile = null; - Collection twoReadPile = null; - - protected enum FragmentMatchingAlgorithm { - ORIGINAL, - skipNonOverlapping, - } - - /** - * Create a new Fragment-based pileup from the standard read-based pileup - * @param pileup - */ - public FragmentUtils(ReadBackedPileup pileup) { - skipNonOverlapping(pileup); - } - - /** For performance testing only */ - protected FragmentUtils(ReadBackedPileup pileup, FragmentMatchingAlgorithm algorithm) { - switch ( algorithm ) { - case ORIGINAL: oldSlowCalculation(pileup); break; - case skipNonOverlapping: skipNonOverlapping(pileup); break; - } - } - - private final void oldSlowCalculation(final ReadBackedPileup pileup) { - final Map nameMap = new HashMap(pileup.size()); - - // build an initial map, grabbing all of the multi-read fragments - for ( final PileupElement p : pileup ) { - final String readName = p.getRead().getReadName(); - - final PileupElement pe1 = nameMap.get(readName); - if ( pe1 != null ) { - // assumes we have at most 2 reads per fragment - if ( twoReadPile == null ) twoReadPile = new ArrayList(); - twoReadPile.add(new TwoReadPileupElement(pe1, p)); - nameMap.remove(readName); - } else { - nameMap.put(readName, p); - } - } - - oneReadPile = nameMap.values(); - } - - private final void skipNonOverlapping(final ReadBackedPileup pileup) { - Map nameMap = null; - - // build an initial map, grabbing all of the multi-read fragments - for ( final PileupElement p : pileup ) { - final SAMRecord read = p.getRead(); - final int mateStart = read.getMateAlignmentStart(); - - if ( mateStart == 0 || mateStart > read.getAlignmentEnd() ) { - // if we know that this read won't overlap its mate, or doesn't have one, jump out early - if ( oneReadPile == null ) oneReadPile = new ArrayList(pileup.size()); // lazy init - oneReadPile.add(p); - } else { - // the read might overlap it's mate, or is the rightmost read of a pair - final String readName = p.getRead().getReadName(); - final PileupElement pe1 = nameMap == null ? null : nameMap.get(readName); - if ( pe1 != null ) { - // assumes we have at most 2 reads per fragment - if ( twoReadPile == null ) twoReadPile = new ArrayList(); // lazy init - twoReadPile.add(new TwoReadPileupElement(pe1, p)); - nameMap.remove(readName); - } else { - if ( nameMap == null ) nameMap = new HashMap(pileup.size()); // lazy init - nameMap.put(readName, p); - } - } - } - - // add all of the reads that are potentially overlapping but whose mate never showed - // up to the oneReadPile - if ( nameMap != null && ! nameMap.isEmpty() ) { - if ( oneReadPile == null ) - oneReadPile = nameMap.values(); - else - oneReadPile.addAll(nameMap.values()); - } - } - - /** - * Gets the pileup elements containing two reads, in no particular order - * - * @return - */ - public Collection getTwoReadPileup() { - return twoReadPile == null ? Collections.emptyList() : twoReadPile; - } - - /** - * Gets the pileup elements containing one read, in no particular order - * - * @return - */ - public Collection getOneReadPileup() { - return oneReadPile == null ? Collections.emptyList() : oneReadPile; - } - - /** - * Useful helper class to represent a full read pair at a position - * - * User: ebanks, depristo - * Date: Jan 10, 2011 - */ - public static class TwoReadPileupElement { - final protected PileupElement PE1, PE2; - - /** - * Creates a fragment element that contains both ends of a paired end read - * @param PE1 - * @param PE2 - */ - public TwoReadPileupElement(PileupElement PE1, PileupElement PE2) { - this.PE1 = PE1; - this.PE2 = PE2; - } - - /** Returns the first pileup element -- never null */ - public PileupElement getFirst() { return PE1; } - - /** Returns the second read in this fragment element. May be null */ - public PileupElement getSecond() { return PE2; } - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentCollection.java b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentCollection.java new file mode 100644 index 000000000..3261e8d2e --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentCollection.java @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.fragments; + +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +/** + * Useful helper class to represent the results of the reads -> fragment calculation. + * + * Contains singleton -- objects whose underlying reads do not overlap their mate pair + * Contains overlappingPairs -- objects whose underlying reads do overlap their mate pair + * + * User: ebanks, depristo + * Date: Jan 10, 2011 + */ +public class FragmentCollection { + Collection singletons; + Collection> overlappingPairs; + + public FragmentCollection(final Collection singletons, final Collection> overlappingPairs) { + this.singletons = singletons == null ? Collections.emptyList() : singletons; + this.overlappingPairs = overlappingPairs == null ? Collections.>emptyList() : overlappingPairs; + } + + /** + * Gets the T elements not containing overlapping elements, in no particular order + * + * @return + */ + public Collection getSingletonReads() { + return singletons; + } + + /** + * Gets the T elements containing overlapping elements, in no particular order + * + * @return + */ + public Collection> getOverlappingPairs() { + return overlappingPairs; + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java new file mode 100644 index 000000000..83961021a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java @@ -0,0 +1,123 @@ +package org.broadinstitute.sting.utils.fragments; + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; + +import java.util.*; + +/** + * An easy to access fragment-based pileup, which contains two separate pileups. The first + * is a regular collection of PileupElements containing all of the reads in the original RBP + * that uniquely info about a fragment. The second are TwoReadPileupElements that, as the + * name suggests, contain two reads that are sequenced from the same underlying fragment. + * + * Based on the original code by E. Banks + * + * Oct 21: note that the order of the oneReadPileup and twoReadPileups are not + * defined. The algorithms that produce these lists are in fact producing + * lists of Pileup elements *NOT* sorted by alignment start position of the underlying + * reads. + * + * User: depristo + * Date: 3/26/11 + * Time: 10:09 PM + */ +public class FragmentUtils { + private FragmentUtils() {} // private constructor + + /** + * A getter function that takes an Object of type T and returns its associated SAMRecord. + * + * Allows us to write a generic T -> Fragment algorithm that works with any object containing + * a read. + * + * @param + */ + public interface ReadGetter { + public SAMRecord get(T object); + } + + /** Identify getter for SAMRecords themselves */ + private final static ReadGetter SamRecordGetter = new ReadGetter() { + @Override public SAMRecord get(final SAMRecord object) { return object; } + }; + + /** Gets the SAMRecord in a PileupElement */ + private final static ReadGetter PileupElementGetter = new ReadGetter() { + @Override public SAMRecord get(final PileupElement object) { return object.getRead(); } + }; + + + /** + * Generic algorithm that takes an iterable over T objects, a getter routine to extract the reads in T, + * and returns a FragmentCollection that contains the T objects whose underlying reads either overlap (or + * not) with their mate pairs. + * + * @param readContainingObjects + * @param nElements + * @param getter + * @param + * @return + */ + private final static FragmentCollection create(Iterable readContainingObjects, int nElements, ReadGetter getter) { + Collection singletons = null; + Collection> overlapping = null; + Map nameMap = null; + + int lastStart = -1; + + // build an initial map, grabbing all of the multi-read fragments + for ( final T p : readContainingObjects ) { + final SAMRecord read = getter.get(p); + + if ( read.getAlignmentStart() < lastStart ) { + throw new IllegalArgumentException(String.format( + "FragmentUtils.create assumes that the incoming objects are ordered by " + + "SAMRecord alignment start, but saw a read %s with alignment start " + + "%d before the previous start %d", read.getSAMString(), read.getAlignmentStart(), lastStart)); + } + lastStart = read.getAlignmentStart(); + + final int mateStart = read.getMateAlignmentStart(); + if ( mateStart == 0 || mateStart > read.getAlignmentEnd() ) { + // if we know that this read won't overlap its mate, or doesn't have one, jump out early + if ( singletons == null ) singletons = new ArrayList(nElements); // lazy init + singletons.add(p); + } else { + // the read might overlap it's mate, or is the rightmost read of a pair + final String readName = read.getReadName(); + final T pe1 = nameMap == null ? null : nameMap.get(readName); + if ( pe1 != null ) { + // assumes we have at most 2 reads per fragment + if ( overlapping == null ) overlapping = new ArrayList>(); // lazy init + overlapping.add(Arrays.asList(pe1, p)); + nameMap.remove(readName); + } else { + if ( nameMap == null ) nameMap = new HashMap(nElements); // lazy init + nameMap.put(readName, p); + } + } + } + + // add all of the reads that are potentially overlapping but whose mate never showed + // up to the oneReadPile + if ( nameMap != null && ! nameMap.isEmpty() ) { + if ( singletons == null ) + singletons = nameMap.values(); + else + singletons.addAll(nameMap.values()); + } + + return new FragmentCollection(singletons, overlapping); + } + + public final static FragmentCollection create(ReadBackedPileup rbp) { + return create(rbp, rbp.size(), PileupElementGetter); + } + + public final static FragmentCollection create(List reads) { + return create(reads, reads.size(), SamRecordGetter); + } + +} diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java index b3f2bc6b0..bec27af38 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java @@ -29,6 +29,8 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.fragments.FragmentCollection; +import org.broadinstitute.sting.utils.fragments.FragmentUtils; import java.util.*; @@ -871,5 +873,10 @@ public abstract class AbstractReadBackedPileup toFragments() { + return FragmentUtils.create(this); + } } + diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java index 3d30aa11b..fd04b455a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.pileup; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.HasGenomeLocation; +import org.broadinstitute.sting.utils.fragments.FragmentCollection; import java.util.Collection; import java.util.List; @@ -222,4 +223,9 @@ public interface ReadBackedPileup extends Iterable, HasGenomeLoca */ public byte[] getMappingQuals(); + /** + * Converts this pileup into a FragmentCollection (see FragmentUtils for documentation) + * @return + */ + public FragmentCollection toFragments(); } diff --git a/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupBenchmark.java b/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsBenchmark.java similarity index 81% rename from public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupBenchmark.java rename to public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsBenchmark.java index 60ed358c8..2771a7e45 100644 --- a/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupBenchmark.java +++ b/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsBenchmark.java @@ -22,15 +22,15 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.pileup; +package org.broadinstitute.sting.utils.fragments; import com.google.caliper.Param; import com.google.caliper.SimpleBenchmark; import com.google.caliper.runner.CaliperMain; import net.sf.samtools.SAMFileHeader; -import org.broadinstitute.sting.utils.FragmentUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import java.util.*; @@ -38,7 +38,7 @@ import java.util.*; /** * Caliper microbenchmark of fragment pileup */ -public class FragmentPileupBenchmark extends SimpleBenchmark { +public class FragmentUtilsBenchmark extends SimpleBenchmark { List pileups; @Param({"0", "4", "30", "150", "1000"}) @@ -62,23 +62,19 @@ public class FragmentPileupBenchmark extends SimpleBenchmark { } } - private void run(int rep, FragmentUtils.FragmentMatchingAlgorithm algorithm) { +// public void timeOriginal(int rep) { +// run(rep, FragmentUtils.FragmentMatchingAlgorithm.ORIGINAL); +// } + + public void timeSkipNonOverlapping(int rep) { int nFrags = 0; for ( int i = 0; i < rep; i++ ) { for ( ReadBackedPileup rbp : pileups ) - nFrags += new FragmentUtils(rbp, algorithm).getTwoReadPileup().size(); + nFrags += FragmentUtils.create(rbp).getOverlappingPairs().size(); } } - public void timeOriginal(int rep) { - run(rep, FragmentUtils.FragmentMatchingAlgorithm.ORIGINAL); - } - - public void timeSkipNonOverlapping(int rep) { - run(rep, FragmentUtils.FragmentMatchingAlgorithm.skipNonOverlapping); - } - public static void main(String[] args) { - CaliperMain.main(FragmentPileupBenchmark.class, args); + CaliperMain.main(FragmentUtilsBenchmark.class, args); } } diff --git a/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java new file mode 100644 index 000000000..b4e6ad0ad --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java @@ -0,0 +1,164 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.fragments; + +import net.sf.samtools.SAMFileHeader; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.pileup.PileupElement; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; +import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.testng.Assert; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.*; + +/** + * Test routines for read-backed pileup. + */ +public class FragmentUtilsUnitTest extends BaseTest { + private static SAMFileHeader header; + + private class FragmentUtilsTest extends TestDataProvider { + List statesForPileup = new ArrayList(); + List statesForReads = new ArrayList(); + + private FragmentUtilsTest(String name, int readLen, int leftStart, int rightStart, + boolean leftIsFirst, boolean leftIsNegative) { + super(FragmentUtilsTest.class, String.format("%s-leftIsFirst:%b-leftIsNegative:%b", name, leftIsFirst, leftIsNegative)); + + List pair = ArtificialSAMUtils.createPair(header, "readpair", readLen, leftStart, rightStart, leftIsFirst, leftIsNegative); + SAMRecord left = pair.get(0); + SAMRecord right = pair.get(1); + + for ( int pos = leftStart; pos < rightStart + readLen; pos++) { + boolean posCoveredByLeft = pos >= left.getAlignmentStart() && pos <= left.getAlignmentEnd(); + boolean posCoveredByRight = pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd(); + + if ( posCoveredByLeft || posCoveredByRight ) { + List reads = new ArrayList(); + List offsets = new ArrayList(); + + if ( posCoveredByLeft ) { + reads.add(left); + offsets.add(pos - left.getAlignmentStart()); + } + + if ( posCoveredByRight ) { + reads.add(right); + offsets.add(pos - right.getAlignmentStart()); + } + + boolean shouldBeFragment = posCoveredByLeft && posCoveredByRight; + ReadBackedPileup pileup = new ReadBackedPileupImpl(null, reads, offsets); + TestState testState = new TestState(shouldBeFragment ? 0 : 1, shouldBeFragment ? 1 : 0, pileup, null); + statesForPileup.add(testState); + } + + TestState testState = left.getAlignmentEnd() >= right.getAlignmentStart() ? new TestState(0, 1, null, pair) : new TestState(2, 0, null, pair); + statesForReads.add(testState); + } + } + } + + private class TestState { + int expectedSingletons, expectedPairs; + ReadBackedPileup pileup; + List rawReads; + + private TestState(final int expectedSingletons, final int expectedPairs, final ReadBackedPileup pileup, final List rawReads) { + this.expectedSingletons = expectedSingletons; + this.expectedPairs = expectedPairs; + this.pileup = pileup; + this.rawReads = rawReads; + } + } + + @DataProvider(name = "fragmentUtilsTest") + public Object[][] createTests() { + for ( boolean leftIsFirst : Arrays.asList(true, false) ) { + for ( boolean leftIsNegative : Arrays.asList(true, false) ) { + // Overlapping pair + // ----> [first] + // <--- [second] + new FragmentUtilsTest("overlapping-pair", 10, 1, 5, leftIsFirst, leftIsNegative); + + // Non-overlapping pair + // ----> + // <---- + new FragmentUtilsTest("nonoverlapping-pair", 10, 1, 15, leftIsFirst, leftIsNegative); + } + } + + return FragmentUtilsTest.getTests(FragmentUtilsTest.class); + } + + @Test(enabled = true, dataProvider = "fragmentUtilsTest") + public void testAsPileup(FragmentUtilsTest test) { + for ( TestState testState : test.statesForPileup ) { + ReadBackedPileup rbp = testState.pileup; + FragmentCollection fp = FragmentUtils.create(rbp); + Assert.assertEquals(fp.getOverlappingPairs().size(), testState.expectedPairs); + Assert.assertEquals(fp.getSingletonReads().size(), testState.expectedSingletons); + } + } + + @Test(enabled = true, dataProvider = "fragmentUtilsTest") + public void testAsListOfReadsFromPileup(FragmentUtilsTest test) { + for ( TestState testState : test.statesForPileup ) { + FragmentCollection fp = FragmentUtils.create(testState.pileup.getReads()); + Assert.assertEquals(fp.getOverlappingPairs().size(), testState.expectedPairs); + Assert.assertEquals(fp.getSingletonReads().size(), testState.expectedSingletons); + } + } + + @Test(enabled = true, dataProvider = "fragmentUtilsTest") + public void testAsListOfReads(FragmentUtilsTest test) { + for ( TestState testState : test.statesForReads ) { + FragmentCollection fp = FragmentUtils.create(testState.rawReads); + Assert.assertEquals(fp.getOverlappingPairs().size(), testState.expectedPairs); + Assert.assertEquals(fp.getSingletonReads().size(), testState.expectedSingletons); + } + } + + @Test(enabled = true, expectedExceptions = IllegalArgumentException.class) + public void testOutOfOrder() { + final List pair = ArtificialSAMUtils.createPair(header, "readpair", 100, 1, 50, true, true); + final SAMRecord left = pair.get(0); + final SAMRecord right = pair.get(1); + final List reads = Arrays.asList(right, left); // OUT OF ORDER! + final List offsets = Arrays.asList(0, 50); + final ReadBackedPileup pileup = new ReadBackedPileupImpl(null, reads, offsets); + FragmentUtils.create(pileup); // should throw exception + } + + @BeforeTest + public void setup() { + header = ArtificialSAMUtils.createArtificialSamHeader(1,1,1000); + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupUnitTest.java deleted file mode 100644 index 431777a42..000000000 --- a/public/java/test/org/broadinstitute/sting/utils/pileup/FragmentPileupUnitTest.java +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2011, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.pileup; - -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.FragmentUtils; -import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; -import org.testng.Assert; -import org.testng.annotations.BeforeTest; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.util.*; - -/** - * Test routines for read-backed pileup. - */ -public class FragmentPileupUnitTest extends BaseTest { - private static SAMFileHeader header; - - private class FragmentPileupTest extends TestDataProvider { - List states = new ArrayList(); - - private FragmentPileupTest(String name, int readLen, int leftStart, int rightStart, boolean leftIsFirst, boolean leftIsNegative) { - super(FragmentPileupTest.class, String.format("%s-leftIsFirst:%b-leftIsNegative:%b", name, leftIsFirst, leftIsNegative)); - - List pair = ArtificialSAMUtils.createPair(header, "readpair", readLen, leftStart, rightStart, leftIsFirst, leftIsNegative); - SAMRecord left = pair.get(0); - SAMRecord right = pair.get(1); - - for ( int pos = leftStart; pos < rightStart + readLen; pos++) { - boolean posCoveredByLeft = pos >= left.getAlignmentStart() && pos <= left.getAlignmentEnd(); - boolean posCoveredByRight = pos >= right.getAlignmentStart() && pos <= right.getAlignmentEnd(); - - if ( posCoveredByLeft || posCoveredByRight ) { - List reads = new ArrayList(); - List offsets = new ArrayList(); - - if ( posCoveredByLeft ) { - reads.add(left); - offsets.add(pos - left.getAlignmentStart()); - } - - if ( posCoveredByRight ) { - reads.add(right); - offsets.add(pos - right.getAlignmentStart()); - } - - boolean shouldBeFragment = posCoveredByLeft && posCoveredByRight; - ReadBackedPileup pileup = new ReadBackedPileupImpl(null, reads, offsets); - TestState testState = new TestState(shouldBeFragment, pileup); - states.add(testState); - } - } - } - } - - private class TestState { - boolean shouldBeFragment; - ReadBackedPileup pileup; - - private TestState(final boolean shouldBeFragment, final ReadBackedPileup pileup) { - this.shouldBeFragment = shouldBeFragment; - this.pileup = pileup; - } - } - - @DataProvider(name = "fragmentPileupTest") - public Object[][] createTests() { - for ( boolean leftIsFirst : Arrays.asList(true, false) ) { - for ( boolean leftIsNegative : Arrays.asList(true, false) ) { - // Overlapping pair - // ----> [first] - // <--- [second] - new FragmentPileupTest("overlapping-pair", 10, 1, 5, leftIsFirst, leftIsNegative); - - // Non-overlapping pair - // ----> - // <---- - new FragmentPileupTest("nonoverlapping-pair", 10, 1, 15, leftIsFirst, leftIsNegative); - } - } - - return FragmentPileupTest.getTests(FragmentPileupTest.class); - } - - @Test(enabled = true, dataProvider = "fragmentPileupTest") - public void testMe(FragmentPileupTest test) { - for ( TestState testState : test.states ) { - ReadBackedPileup rbp = testState.pileup; - FragmentUtils fp = new FragmentUtils(rbp); - Assert.assertEquals(fp.getTwoReadPileup().size(), testState.shouldBeFragment ? 1 : 0); - Assert.assertEquals(fp.getOneReadPileup().size(), testState.shouldBeFragment ? 0 : 1); - } - } - - @BeforeTest - public void setup() { - header = ArtificialSAMUtils.createArtificialSamHeader(1,1,1000); - } -} From 86871bd1e36534eabdac8ff5e3dc371aaad3fef4 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Wed, 26 Oct 2011 15:56:41 -0400 Subject: [PATCH 3/5] Throw a UserException in the BQSR when there is no data instead of creating an empty csv file --- .../gatk/walkers/recalibration/CountCovariatesWalker.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java index 1bdb70bdd..88a9668cc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java @@ -526,6 +526,9 @@ public class CountCovariatesWalker extends LocusWalker Date: Wed, 26 Oct 2011 15:34:23 -0400 Subject: [PATCH 5/5] No scatter gather for VQSR or ApplyVQSR. These walkers should not be scatter gatherable. Annotating them accordingly so that Queue doesn't allow a less than knowledgeable user to try and scatter/gather VQSR. --- .../gatk/walkers/variantrecalibration/ApplyRecalibration.java | 3 +++ .../gatk/walkers/variantrecalibration/VariantRecalibrator.java | 3 +++ 2 files changed, 6 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java index 16f1abf1b..1d5493daf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java @@ -32,6 +32,8 @@ import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.PartitionBy; +import org.broadinstitute.sting.gatk.walkers.PartitionType; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; @@ -84,6 +86,7 @@ import java.util.*; * */ +@PartitionBy(PartitionType.NONE) public class ApplyRecalibration extends RodWalker { ///////////////////////////// diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 89e702b64..d8cc264c2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -29,6 +29,8 @@ import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.PartitionBy; +import org.broadinstitute.sting.gatk.walkers.PartitionType; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.utils.MathUtils; @@ -94,6 +96,7 @@ import java.util.*; * */ +@PartitionBy(PartitionType.NONE) public class VariantRecalibrator extends RodWalker, ExpandingArrayList> implements TreeReducible> { public static final String VQS_LOD_KEY = "VQSLOD"; // Log odds ratio of being a true variant versus being false under the trained gaussian mixture model