diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 125dc0fa0..3c881442a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -75,9 +75,6 @@ public class IndelRealigner extends ReadWalker { @Argument(fullName="bam_compression", shortName="compress", required=false, doc="Compression level to use for output bams [default:5]") protected Integer compressionLevel = 5; - @Argument(fullName="cleanPerfectMatches", shortName="cpm", required=false, doc="If true, Realigner will ignore the NM == 0 flag and include reads with supposely no mismatches to reference for cleaning. Useful for malformed BAM files") - protected boolean CLEAN_PERFECT_MATCHES = false; - public enum RealignerSortingStrategy { NO_SORT, ON_DISK, @@ -287,29 +284,8 @@ public class IndelRealigner extends ReadWalker { } } - /** - * returns true if a read has only a single cigar element (indicating its xM) and it has a NM flag - * and NM == 0. - * @param read - * @return - */ - private boolean perfectlyMatchesReference(SAMRecord read) { - if ( CLEAN_PERFECT_MATCHES ) { - return false; - } else { - boolean cigarIsMatches = read.getCigar().numCigarElements() == 1; - Integer NM = read.getIntegerAttribute("NM"); - boolean noMM = NM != null && NM == 0; - boolean perfectMatch = cigarIsMatches && noMM; -// if ( perfectMatch ) { -// System.out.println("Perfect match " + read.format()); -// } - return perfectMatch; - } - } - - int nPerfectMatches = 0; - int nReadsToClean = 0; + long nPerfectMatches = 0; + long nReadsToClean = 0; public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { if ( currentInterval == null ) { @@ -514,17 +490,6 @@ public class IndelRealigner extends ReadWalker { continue; } - // optimization to avoid trying to clean perfect matches to the reference - if ( perfectlyMatchesReference(read) ) { - refReads.add(read); - nPerfectMatches++; - if ( nPerfectMatches % 10000 == 0 ) { - logger.debug(String.format("Perfect matching fraction: %d %d => %.2f", nPerfectMatches, nReadsToClean, 100.0 * nPerfectMatches / ( nReadsToClean + 1))); - } - - continue; - } - final AlignedRead aRead = new AlignedRead(read); // first, move existing indels (for 1 indel reads only) to leftmost position within identical sequence @@ -562,6 +527,11 @@ public class IndelRealigner extends ReadWalker { } // otherwise, we can emit it as is else { +// nPerfectMatches++; +// if ( nPerfectMatches % 1000 == 0 ) { +// logger.info(String.format("Perfect matching fraction: %d %d => %.2f", nPerfectMatches, nReadsToClean, 100.0 * nPerfectMatches / ( nReadsToClean + 1))); +// } + // if ( debugOn ) System.out.println("Emitting as is..."); //logger.debug("Adding " + aRead.getRead().getReadName() + " with raw mismatch score " + rawMismatchScore + " to ref reads"); refReads.add(read); diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java index 3f4c602d6..5f3b795f7 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java @@ -27,7 +27,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest { String filename1 = "NA12878.chrom1.SLX.SRP000032.2009_06"; String filename2 = "low_coverage_CEU.chr1.10k-11k"; WalkerTestSpec spec3 = new WalkerTestSpec( - "-T IndelRealigner --cleanPerfectMatches -nway -noPG -LOD 5 -maxConsensuses 100 -greedy 100 -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + filename1 + ".bam -I " + validationDataLocation + filename2 + ".bam -L 1:10023900-10024000 -compress 1 -targetIntervals " + validationDataLocation + "cleaner.test.intervals -O /tmp -snps %s", + "-T IndelRealigner -nway -noPG -LOD 5 -maxConsensuses 100 -greedy 100 -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + filename1 + ".bam -I " + validationDataLocation + filename2 + ".bam -L 1:10023900-10024000 -compress 1 -targetIntervals " + validationDataLocation + "cleaner.test.intervals -O /tmp -snps %s", 1, Arrays.asList("bd42a4fa66d7ec7a480c2b94313a78d3")); File file1 = new File("/tmp/" + filename1 + ".cleaned.bam");