115 lines
4.2 KiB
C
115 lines
4.2 KiB
C
|
|
/*
|
|||
|
|
Description: read ends结构体主要用来标记冗余,包含一些序列的测序过程中的物理信息等
|
|||
|
|
|
|||
|
|
Copyright : All right reserved by ICT
|
|||
|
|
|
|||
|
|
Author : Zhang Zhonghai
|
|||
|
|
Date : 2023/11/3
|
|||
|
|
*/
|
|||
|
|
|
|||
|
|
#ifndef READ_ENDS_H_
|
|||
|
|
#define READ_ENDS_H_
|
|||
|
|
|
|||
|
|
#include <stdint.h>
|
|||
|
|
|
|||
|
|
/**
|
|||
|
|
* Small interface that provides access to the physical location information about a cluster.
|
|||
|
|
* All values should be defaulted to -1 if unavailable. ReadGroup and Tile should only allow
|
|||
|
|
* non-zero positive integers, x and y coordinates may be negative.
|
|||
|
|
*/
|
|||
|
|
struct PhysicalLocation
|
|||
|
|
{
|
|||
|
|
/**
|
|||
|
|
* Small class that provides access to the physical location information about a cluster.
|
|||
|
|
* All values should be defaulted to -1 if unavailable. Tile should only allow
|
|||
|
|
* non-zero positive integers, x and y coordinates must be non-negative.
|
|||
|
|
* This is different from PhysicalLocationShort in that the x and y positions are ints, not shorts
|
|||
|
|
* thus, they do not overflow within a HiSeqX tile.
|
|||
|
|
*/
|
|||
|
|
int16_t tile = -1;
|
|||
|
|
int32_t x = -1;
|
|||
|
|
int32_t y = -1;
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
/* 包含了所有read ends信息,如picard里边的 ReadEndsForMarkDuplicates*/
|
|||
|
|
struct ReadEnds : PhysicalLocation
|
|||
|
|
{
|
|||
|
|
/* ReadEnds中的成员变量 */
|
|||
|
|
/** Little struct-like class to hold read pair (and fragment) end data for duplicate marking. */
|
|||
|
|
static const int8_t F = 0, R = 1, FF = 2, FR = 3, RR = 4, RF = 5;
|
|||
|
|
// int16_t libraryId; // 没用,不考虑多样本
|
|||
|
|
int8_t orientation;
|
|||
|
|
int32_t read1ReferenceIndex = -1;
|
|||
|
|
int32_t read1Coordinate = -1;
|
|||
|
|
int32_t read2ReferenceIndex = -1;
|
|||
|
|
int32_t read2Coordinate = -1; // This field is overloaded for flow based processing as the end coordinate of read 1. (paired reads not supported)
|
|||
|
|
/* Additional information used to detect optical dupes */
|
|||
|
|
// int16_t readGroup = -1; 一般经过比对后的bam文件只有一个read group,normal或者tumor
|
|||
|
|
/** For optical duplicate detection the orientation matters regard to 1st or 2nd end of a mate */
|
|||
|
|
int8_t orientationForOpticalDuplicates = -1;
|
|||
|
|
/** A *transient* flag marking this read end as being an optical duplicate. */
|
|||
|
|
bool isOpticalDuplicate = false;
|
|||
|
|
|
|||
|
|
/* ReadEndsForMarkDuplicates中的成员变量 */
|
|||
|
|
/** Little struct-like class to hold read pair (and fragment) end data for MarkDuplicatesWithMateCigar **/
|
|||
|
|
int16_t score = 0;
|
|||
|
|
int64_t read1IndexInFile = -1;
|
|||
|
|
int64_t read2IndexInFile = -1;
|
|||
|
|
int64_t duplicateSetSize = -1;
|
|||
|
|
|
|||
|
|
/* ReadEndsForMarkDuplicatesWithBarcodes中的成员变量 (好像用不到) */
|
|||
|
|
// int32_t barcode = 0; // primary barcode for this read (and pair)
|
|||
|
|
// int32_t readOneBarcode = 0; // read one barcode, 0 if not present
|
|||
|
|
// int32_t readTwoBarcode = 0; // read two barcode, 0 if not present or not paired
|
|||
|
|
|
|||
|
|
/* zzh增加的成员变量 */
|
|||
|
|
int64_t posKey = -1; // 根据位置信息生成的关键字 return (int64_t)tid << MAX_CONTIG_LEN_SHIFT | (int64_t)pos;
|
|||
|
|
|
|||
|
|
/* 根据pairend read的比对方向,来确定整体的比对方向 */
|
|||
|
|
static int8_t GetOrientationByte(bool read1NegativeStrand, bool read2NegativeStrand)
|
|||
|
|
{
|
|||
|
|
if (read1NegativeStrand)
|
|||
|
|
{
|
|||
|
|
if (read2NegativeStrand)
|
|||
|
|
return RR;
|
|||
|
|
else
|
|||
|
|
return RF;
|
|||
|
|
}
|
|||
|
|
else
|
|||
|
|
{
|
|||
|
|
if (read2NegativeStrand)
|
|||
|
|
return FR;
|
|||
|
|
else
|
|||
|
|
return FF;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* 比较两个readends是否一样(有个冗余) */
|
|||
|
|
static bool AreComparableForDuplicates(ReadEnds &lhs, ReadEnds &rhs, bool compareRead2)
|
|||
|
|
{
|
|||
|
|
bool areComparable = true;
|
|||
|
|
areComparable = lhs.read1ReferenceIndex == rhs.read1ReferenceIndex &&
|
|||
|
|
lhs.read1Coordinate == rhs.read1Coordinate &&
|
|||
|
|
lhs.orientation == rhs.orientation;
|
|||
|
|
if (areComparable && compareRead2)
|
|||
|
|
{
|
|||
|
|
areComparable = lhs.read2ReferenceIndex == rhs.read2ReferenceIndex &&
|
|||
|
|
lhs.read2Coordinate == rhs.read2Coordinate;
|
|||
|
|
}
|
|||
|
|
return areComparable;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* 比对方向是否正向 */
|
|||
|
|
bool IsForwardStrand()
|
|||
|
|
{
|
|||
|
|
return orientation == F;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* pairend是否合适的比对上了 */
|
|||
|
|
bool IsPaired()
|
|||
|
|
{
|
|||
|
|
return read2ReferenceIndex != -1;
|
|||
|
|
}
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
#endif
|