From 9f519af06d7a64fed078732cd11947210153a1af Mon Sep 17 00:00:00 2001 From: kcibul Date: Mon, 15 Mar 2010 15:40:09 +0000 Subject: [PATCH] new method to filter out overlapping PE reads git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3002 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/utils/pileup/ReadBackedPileup.java | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java b/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java index 06918b8dc..6b99ceb3f 100755 --- a/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java +++ b/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java @@ -173,6 +173,41 @@ public class ReadBackedPileup implements Iterable { } } + /** + * Returns a new ReadBackedPileup where only one read from an overlapping read + * pair is retained. If the two reads in question disagree to their basecall, + * neither read is retained. If they agree on the base, the read with the higher + * quality observation is retained + * + * @return the newly filtered pileup + */ + public ReadBackedPileup getOverlappingFragementFilteredPileup() { + Map filteredPileup = new HashMap(); + + for ( PileupElement p : pileup ) { + String readName = p.getRead().getReadName(); + + // if we've never seen this read before, life is good + if (!filteredPileup.containsKey(readName)) { + filteredPileup.put(readName, p); + } else { + PileupElement existing = filteredPileup.get(readName); + + // if the reads disagree at this position, throw them both out. Otherwise + // keep the element with the higher quality score + if (existing.getBase() != p.getBase()) { + filteredPileup.remove(readName); + } else { + if (existing.getQual() < p.getQual()) { + filteredPileup.put(readName, p); + } + } + } + } + + return new ReadBackedPileup(loc, new ArrayList(filteredPileup.values())); + } + /** * Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup. Note that this * does not copy the data, so both ReadBackedPileups should not be changed. Doesn't make an unnecessary copy