Removing my unnecessary optimization. 10 lines later in the code the same optimization was applied. A monumental waste of time.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3455 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-05-28 14:10:48 +00:00
parent a4d834cc01
commit cc2bf549c8
2 changed files with 8 additions and 38 deletions

View File

@ -75,9 +75,6 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
@Argument(fullName="bam_compression", shortName="compress", required=false, doc="Compression level to use for output bams [default:5]")
protected Integer compressionLevel = 5;
@Argument(fullName="cleanPerfectMatches", shortName="cpm", required=false, doc="If true, Realigner will ignore the NM == 0 flag and include reads with supposely no mismatches to reference for cleaning. Useful for malformed BAM files")
protected boolean CLEAN_PERFECT_MATCHES = false;
public enum RealignerSortingStrategy {
NO_SORT,
ON_DISK,
@ -287,29 +284,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
}
}
/**
* returns true if a read has only a single cigar element (indicating its xM) and it has a NM flag
* and NM == 0.
* @param read
* @return
*/
private boolean perfectlyMatchesReference(SAMRecord read) {
if ( CLEAN_PERFECT_MATCHES ) {
return false;
} else {
boolean cigarIsMatches = read.getCigar().numCigarElements() == 1;
Integer NM = read.getIntegerAttribute("NM");
boolean noMM = NM != null && NM == 0;
boolean perfectMatch = cigarIsMatches && noMM;
// if ( perfectMatch ) {
// System.out.println("Perfect match " + read.format());
// }
return perfectMatch;
}
}
int nPerfectMatches = 0;
int nReadsToClean = 0;
long nPerfectMatches = 0;
long nReadsToClean = 0;
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
if ( currentInterval == null ) {
@ -514,17 +490,6 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
continue;
}
// optimization to avoid trying to clean perfect matches to the reference
if ( perfectlyMatchesReference(read) ) {
refReads.add(read);
nPerfectMatches++;
if ( nPerfectMatches % 10000 == 0 ) {
logger.debug(String.format("Perfect matching fraction: %d %d => %.2f", nPerfectMatches, nReadsToClean, 100.0 * nPerfectMatches / ( nReadsToClean + 1)));
}
continue;
}
final AlignedRead aRead = new AlignedRead(read);
// first, move existing indels (for 1 indel reads only) to leftmost position within identical sequence
@ -562,6 +527,11 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
}
// otherwise, we can emit it as is
else {
// nPerfectMatches++;
// if ( nPerfectMatches % 1000 == 0 ) {
// logger.info(String.format("Perfect matching fraction: %d %d => %.2f", nPerfectMatches, nReadsToClean, 100.0 * nPerfectMatches / ( nReadsToClean + 1)));
// }
// if ( debugOn ) System.out.println("Emitting as is...");
//logger.debug("Adding " + aRead.getRead().getReadName() + " with raw mismatch score " + rawMismatchScore + " to ref reads");
refReads.add(read);

View File

@ -27,7 +27,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
String filename1 = "NA12878.chrom1.SLX.SRP000032.2009_06";
String filename2 = "low_coverage_CEU.chr1.10k-11k";
WalkerTestSpec spec3 = new WalkerTestSpec(
"-T IndelRealigner --cleanPerfectMatches -nway -noPG -LOD 5 -maxConsensuses 100 -greedy 100 -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + filename1 + ".bam -I " + validationDataLocation + filename2 + ".bam -L 1:10023900-10024000 -compress 1 -targetIntervals " + validationDataLocation + "cleaner.test.intervals -O /tmp -snps %s",
"-T IndelRealigner -nway -noPG -LOD 5 -maxConsensuses 100 -greedy 100 -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + filename1 + ".bam -I " + validationDataLocation + filename2 + ".bam -L 1:10023900-10024000 -compress 1 -targetIntervals " + validationDataLocation + "cleaner.test.intervals -O /tmp -snps %s",
1,
Arrays.asList("bd42a4fa66d7ec7a480c2b94313a78d3"));
File file1 = new File("/tmp/" + filename1 + ".cleaned.bam");