From eb74b16e39d134e2d1cc8ca8fd9be4133e0d7fc1 Mon Sep 17 00:00:00 2001 From: ebanks Date: Fri, 26 Jun 2009 18:29:00 +0000 Subject: [PATCH] updated what constitutes removing entropy git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1113 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/indels/IntervalCleanerWalker.java | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalCleanerWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalCleanerWalker.java index c9b0d1f6b..8cd306dc7 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalCleanerWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/indels/IntervalCleanerWalker.java @@ -40,6 +40,9 @@ public class IntervalCleanerWalker extends LocusWindowWalker public static final int MAX_QUAL = 99; + // fraction of mismatches that need to no longer mismatch for a column to be considered cleaned + private static final double MISMATCH_COLUMN_CLEANED_FRACTION = 0.75; + private SAMFileWriter writer = null; private FileWriter indelOutput = null; private FileWriter statsOutput = null; @@ -577,22 +580,25 @@ public class IntervalCleanerWalker extends LocusWindowWalker for ( int i=0; i < reference.length(); i++ ) { if ( cleanedMismatchBases[i] == originalMismatchBases[i] ) continue; - if ( originalMismatchBases[i] > totalBases[i] * MISMATCH_THRESHOLD ) + boolean didMismatch = false, stillMismatches = false; + if ( originalMismatchBases[i] > totalBases[i] * MISMATCH_THRESHOLD ) { + didMismatch = true; originalMismatchColumns++; - if ( cleanedMismatchBases[i] > totalBases[i] * MISMATCH_THRESHOLD ) + if ( cleanedMismatchBases[i] > originalMismatchBases[i] * (1.0 - MISMATCH_COLUMN_CLEANED_FRACTION) ) { + stillMismatches = true; + cleanedMismatchColumns++; + } + } else if ( cleanedMismatchBases[i] > totalBases[i] * MISMATCH_THRESHOLD ) { cleanedMismatchColumns++; + } if ( snpsOutput != null ) { - if ( originalMismatchBases[i] > totalBases[i] * MISMATCH_THRESHOLD ) { + if ( didMismatch ) { sb.append(reads.get(0).getRead().getReferenceName() + ":"); sb.append(((int)leftmostIndex + i)); - if ( cleanedMismatchBases[i] > totalBases[i] * MISMATCH_THRESHOLD ) + if ( stillMismatches ) sb.append(" SAME_SNP\n"); else sb.append(" NOT_SNP\n"); - //} else if ( cleanedMismatchBases[i] > totalBases[i] * MISMATCH_THRESHOLD ) { - // sb.append(reads.get(0).getRead().getReferenceName() + ":"); - // sb.append(((int)leftmostIndex + i)); - // sb.append(" NEW_SNP\n"); } } }