diff --git a/src/markdup/markdup.cpp b/src/markdup/markdup.cpp index e844315..9dec53a 100644 --- a/src/markdup/markdup.cpp +++ b/src/markdup/markdup.cpp @@ -153,6 +153,10 @@ int MarkDuplicates() { spdlog::info("{} duplicate reads found", dupIdxQue.Size()); spdlog::info("{} optical reads found", opticalIdxQue.Size()); // spdlog::info("{} represent reads found", repIdxQue.Size()); + // dupIdxQue.RealSize("na12878.dup"); + // opticalIdxQue.RealSize("normal.odup"); + + // return 0; uint64_t bamIdx = 0; DupInfo dupIdx = dupIdxQue.Pop(); diff --git a/src/markdup/md_types.h b/src/markdup/md_types.h index 0b653a9..7993773 100644 --- a/src/markdup/md_types.h +++ b/src/markdup/md_types.h @@ -272,16 +272,22 @@ struct DupIdxQueue { return len - popNum; } - uint64_t RealSize() { + uint64_t RealSize(const string fileName) { + if (this->Size() == 0) { + return 0; + } uint64_t len = 0; auto preTop = minHeap.top(); DupInfo dupIdx = this->Pop(); DupInfo nextDup = dupIdx; auto topIdx = minHeap.top(); - // ofstream ofs("n.dup"); ofstream ofs1("n-all.dup"); + ofstream ofs(fileName); // ofstream ofs1(filePrefix + ".odup"); while (dupIdx != -1) { + + ofs << dupIdx.idx << endl; // ofs1 << topIdx.arrId << '\t' << topIdx.arrIdx << '\t' << topIdx.dupIdx << endl; + ++len; while (true) { topIdx = minHeap.top(); @@ -295,14 +301,12 @@ struct DupIdxQueue { << endl; } } - - // ofs << topIdx.dupIdx << endl; ofs1 << topIdx.arrId << '\t' << topIdx.arrIdx << '\t' << topIdx.dupIdx << endl; - + dupIdx = nextDup; preTop = topIdx; } - // ofs.close(); ofs1.close(); - // cout << "RealSize: " << len << endl; + ofs.close(); // ofs1.close(); + cout << "RealSize: " << len << endl; return len; } }; \ No newline at end of file diff --git a/src/markdup/read_ends.h b/src/markdup/read_ends.h index d296ff3..866f165 100644 --- a/src/markdup/read_ends.h +++ b/src/markdup/read_ends.h @@ -147,12 +147,12 @@ struct ReadEnds : PhysicalLocation { comp = a.read2ReferenceIndex - b.read2ReferenceIndex; if (comp == 0) comp = a.read2Coordinate - b.read2Coordinate; - if (comp == 0) - comp = a.tile - b.tile; - if (comp == 0) - comp = a.x - b.x; // 由于picard的bug,用short类型来表示x,y,导致其可能为负数 - if (comp == 0) - comp - a.y - b.y; +// if (comp == 0) +// comp = a.tile - b.tile; +// if (comp == 0) +// comp = a.x - b.x; // 由于picard的bug,用short类型来表示x,y,导致其可能为负数 +// if (comp == 0) +// comp - a.y - b.y; if (comp == 0) comp = (int)(a.read1IndexInFile - b.read1IndexInFile); if (comp == 0) @@ -170,12 +170,12 @@ struct ReadEnds : PhysicalLocation { comp = a.read2Coordinate - b.read2Coordinate; if (comp == 0) // 这个放在坐标比较了之后,把坐标范围的放在之前,这样对分段数据块比较好处理 comp = a.orientation - b.orientation; - if (comp == 0) - comp = a.tile - b.tile; - if (comp == 0) - comp = a.x - b.x; // 由于picard的bug,用short类型来表示x,y,导致其可能为负数 - if (comp == 0) - comp - a.y - b.y; +// if (comp == 0) +// comp = a.tile - b.tile; +// if (comp == 0) +// comp = a.x - b.x; // 由于picard的bug,用short类型来表示x,y,导致其可能为负数 +// if (comp == 0) +// comp - a.y - b.y; if (comp == 0) comp = (int)(a.read1IndexInFile - b.read1IndexInFile); if (comp == 0)