From 5a6892900e88fdbdf5709e416b3300996741228a Mon Sep 17 00:00:00 2001 From: depristo Date: Thu, 7 May 2009 18:55:45 +0000 Subject: [PATCH] fixing oddities in duplicates git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@628 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/traversals/TraverseDuplicates.java | 14 +++++++++++--- .../gatk/walkers/CombineDuplicatesWalker.java | 6 +++++- .../sting/utils/duplicates/DupUtils.java | 1 + 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java index 2cf46508e..c0e195cb7 100755 --- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java +++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java @@ -115,7 +115,12 @@ public class TraverseDuplicates extends TraversalEngine { final GenomeLoc readLoc = new GenomeLoc(read); final GenomeLoc readMateLoc = new GenomeLoc(read.getMateReferenceIndex(), read.getMateAlignmentStart(), read.getMateAlignmentStart()); if (DEBUG) logger.debug(String.format("Examining reads at %s vs. %s at %s / %s vs. %s / %s%n", key.getReadName(), read.getReadName(), keyLoc, keyMateLoc, readLoc, readMateLoc)); - if ( readLoc.compareTo(keyLoc) == 0 && readMateLoc.compareTo(keyMateLoc) == 0 ) { + + // read and key start at the same place, and either the this read and the key + // share a mate location or the read is flagged as a duplicate + if ( readLoc.compareTo(keyLoc) == 0 && + ( readMateLoc.compareTo(keyMateLoc) == 0) || + read.getDuplicateReadFlag() ) { // we are at the same position as the dup and have the same mat pos, it's a dup if (DEBUG) logger.debug(String.format(" => Adding read to dups list: %s%n", read)); dups.add(read); @@ -157,8 +162,11 @@ public class TraverseDuplicates extends TraversalEngine { List uniqueReads = split.getFirst(); List duplicateReads = split.getSecond(); - logger.debug(String.format("*** TraverseDuplicates.traverse at %s has %d unique and %d duplicate reads", - site, uniqueReads.size(), duplicateReads.size())); + logger.debug(String.format("*** TraverseDuplicates.traverse at %s with %d reads has %d unique and %d duplicate reads", + site, reads.size(), uniqueReads.size(), duplicateReads.size())); + if ( reads.size() != uniqueReads.size() + duplicateReads.size() ) + throw new RuntimeException(String.format("Bug occurred spliting reads [N=%d] at loc %s into unique [N=%d] and duplicates [N=%d], sizes don't match", + reads.size(), uniqueReads.size(), duplicateReads.size())); // Jump forward in the reference to this locus location LocusContext locus = new LocusContext(site, duplicateReads, Arrays.asList(0)); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/CombineDuplicatesWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/CombineDuplicatesWalker.java index 47669ef09..b5038b10b 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/CombineDuplicatesWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/CombineDuplicatesWalker.java @@ -92,7 +92,7 @@ public class CombineDuplicatesWalker extends DuplicateWalker