/* Description: read ends结构体主要用来标记冗余,包含一些序列的测序过程中的物理信息等 Copyright : All right reserved by ICT Author : Zhang Zhonghai Date : 2023/11/3 */ #ifndef READ_ENDS_H_ #define READ_ENDS_H_ #include /** * Small interface that provides access to the physical location information about a cluster. * All values should be defaulted to -1 if unavailable. ReadGroup and Tile should only allow * non-zero positive integers, x and y coordinates may be negative. */ struct PhysicalLocation { /** * Small class that provides access to the physical location information about a cluster. * All values should be defaulted to -1 if unavailable. Tile should only allow * non-zero positive integers, x and y coordinates must be non-negative. * This is different from PhysicalLocationShort in that the x and y positions are ints, not shorts * thus, they do not overflow within a HiSeqX tile. */ int16_t tile = -1; int32_t x = -1; int32_t y = -1; }; /* 包含了所有read ends信息,如picard里边的 ReadEndsForMarkDuplicates*/ struct ReadEnds : PhysicalLocation { /* ReadEnds中的成员变量 */ /** Little struct-like class to hold read pair (and fragment) end data for duplicate marking. */ static const int8_t F = 0, R = 1, FF = 2, FR = 3, RR = 4, RF = 5; // int16_t libraryId; // 没用,不考虑多样本 int8_t orientation; int32_t read1ReferenceIndex = -1; int32_t read1Coordinate = -1; int32_t read2ReferenceIndex = -1; int32_t read2Coordinate = -1; // This field is overloaded for flow based processing as the end coordinate of read 1. (paired reads not supported) /* Additional information used to detect optical dupes */ // int16_t readGroup = -1; 一般经过比对后的bam文件只有一个read group,normal或者tumor /** For optical duplicate detection the orientation matters regard to 1st or 2nd end of a mate */ int8_t orientationForOpticalDuplicates = -1; /** A *transient* flag marking this read end as being an optical duplicate. */ bool isOpticalDuplicate = false; /* ReadEndsForMarkDuplicates中的成员变量 */ /** Little struct-like class to hold read pair (and fragment) end data for MarkDuplicatesWithMateCigar **/ int16_t score = 0; int64_t read1IndexInFile = -1; int64_t read2IndexInFile = -1; int64_t duplicateSetSize = -1; /* ReadEndsForMarkDuplicatesWithBarcodes中的成员变量 (好像用不到) */ // int32_t barcode = 0; // primary barcode for this read (and pair) // int32_t readOneBarcode = 0; // read one barcode, 0 if not present // int32_t readTwoBarcode = 0; // read two barcode, 0 if not present or not paired /* zzh增加的成员变量 */ int64_t posKey = -1; // 根据位置信息生成的关键字 return (int64_t)tid << MAX_CONTIG_LEN_SHIFT | (int64_t)pos; /* 根据pairend read的比对方向,来确定整体的比对方向 */ static int8_t GetOrientationByte(bool read1NegativeStrand, bool read2NegativeStrand) { if (read1NegativeStrand) { if (read2NegativeStrand) return RR; else return RF; } else { if (read2NegativeStrand) return FR; else return FF; } } /* 比较两个readends是否一样(有个冗余) */ static bool AreComparableForDuplicates(ReadEnds &lhs, ReadEnds &rhs, bool compareRead2) { bool areComparable = true; areComparable = lhs.read1ReferenceIndex == rhs.read1ReferenceIndex && lhs.read1Coordinate == rhs.read1Coordinate && lhs.orientation == rhs.orientation; if (areComparable && compareRead2) { areComparable = lhs.read2ReferenceIndex == rhs.read2ReferenceIndex && lhs.read2Coordinate == rhs.read2Coordinate; } return areComparable; } /* 比对方向是否正向 */ bool IsForwardStrand() { return orientation == F; } /* pairend是否合适的比对上了 */ bool IsPaired() { return read2ReferenceIndex != -1; } }; #endif