Removing my unnecessary optimization. 10 lines later in the code the same optimization was applied. A monumental waste of time.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3455 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
a4d834cc01
commit
cc2bf549c8
|
|
@ -75,9 +75,6 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
@Argument(fullName="bam_compression", shortName="compress", required=false, doc="Compression level to use for output bams [default:5]")
|
||||
protected Integer compressionLevel = 5;
|
||||
|
||||
@Argument(fullName="cleanPerfectMatches", shortName="cpm", required=false, doc="If true, Realigner will ignore the NM == 0 flag and include reads with supposely no mismatches to reference for cleaning. Useful for malformed BAM files")
|
||||
protected boolean CLEAN_PERFECT_MATCHES = false;
|
||||
|
||||
public enum RealignerSortingStrategy {
|
||||
NO_SORT,
|
||||
ON_DISK,
|
||||
|
|
@ -287,29 +284,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* returns true if a read has only a single cigar element (indicating its xM) and it has a NM flag
|
||||
* and NM == 0.
|
||||
* @param read
|
||||
* @return
|
||||
*/
|
||||
private boolean perfectlyMatchesReference(SAMRecord read) {
|
||||
if ( CLEAN_PERFECT_MATCHES ) {
|
||||
return false;
|
||||
} else {
|
||||
boolean cigarIsMatches = read.getCigar().numCigarElements() == 1;
|
||||
Integer NM = read.getIntegerAttribute("NM");
|
||||
boolean noMM = NM != null && NM == 0;
|
||||
boolean perfectMatch = cigarIsMatches && noMM;
|
||||
// if ( perfectMatch ) {
|
||||
// System.out.println("Perfect match " + read.format());
|
||||
// }
|
||||
return perfectMatch;
|
||||
}
|
||||
}
|
||||
|
||||
int nPerfectMatches = 0;
|
||||
int nReadsToClean = 0;
|
||||
long nPerfectMatches = 0;
|
||||
long nReadsToClean = 0;
|
||||
|
||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
if ( currentInterval == null ) {
|
||||
|
|
@ -514,17 +490,6 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
continue;
|
||||
}
|
||||
|
||||
// optimization to avoid trying to clean perfect matches to the reference
|
||||
if ( perfectlyMatchesReference(read) ) {
|
||||
refReads.add(read);
|
||||
nPerfectMatches++;
|
||||
if ( nPerfectMatches % 10000 == 0 ) {
|
||||
logger.debug(String.format("Perfect matching fraction: %d %d => %.2f", nPerfectMatches, nReadsToClean, 100.0 * nPerfectMatches / ( nReadsToClean + 1)));
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
final AlignedRead aRead = new AlignedRead(read);
|
||||
|
||||
// first, move existing indels (for 1 indel reads only) to leftmost position within identical sequence
|
||||
|
|
@ -562,6 +527,11 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
}
|
||||
// otherwise, we can emit it as is
|
||||
else {
|
||||
// nPerfectMatches++;
|
||||
// if ( nPerfectMatches % 1000 == 0 ) {
|
||||
// logger.info(String.format("Perfect matching fraction: %d %d => %.2f", nPerfectMatches, nReadsToClean, 100.0 * nPerfectMatches / ( nReadsToClean + 1)));
|
||||
// }
|
||||
|
||||
// if ( debugOn ) System.out.println("Emitting as is...");
|
||||
//logger.debug("Adding " + aRead.getRead().getReadName() + " with raw mismatch score " + rawMismatchScore + " to ref reads");
|
||||
refReads.add(read);
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
|
|||
String filename1 = "NA12878.chrom1.SLX.SRP000032.2009_06";
|
||||
String filename2 = "low_coverage_CEU.chr1.10k-11k";
|
||||
WalkerTestSpec spec3 = new WalkerTestSpec(
|
||||
"-T IndelRealigner --cleanPerfectMatches -nway -noPG -LOD 5 -maxConsensuses 100 -greedy 100 -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + filename1 + ".bam -I " + validationDataLocation + filename2 + ".bam -L 1:10023900-10024000 -compress 1 -targetIntervals " + validationDataLocation + "cleaner.test.intervals -O /tmp -snps %s",
|
||||
"-T IndelRealigner -nway -noPG -LOD 5 -maxConsensuses 100 -greedy 100 -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + filename1 + ".bam -I " + validationDataLocation + filename2 + ".bam -L 1:10023900-10024000 -compress 1 -targetIntervals " + validationDataLocation + "cleaner.test.intervals -O /tmp -snps %s",
|
||||
1,
|
||||
Arrays.asList("bd42a4fa66d7ec7a480c2b94313a78d3"));
|
||||
File file1 = new File("/tmp/" + filename1 + ".cleaned.bam");
|
||||
|
|
|
|||
Loading…
Reference in New Issue