picard_cpp/src/sam/utils/read_ends.h

115 lines
4.2 KiB
C
Raw Normal View History

/*
Description: read ends
Copyright : All right reserved by ICT
Author : Zhang Zhonghai
Date : 2023/11/3
*/
#ifndef READ_ENDS_H_
#define READ_ENDS_H_
#include <stdint.h>
/**
* Small interface that provides access to the physical location information about a cluster.
* All values should be defaulted to -1 if unavailable. ReadGroup and Tile should only allow
* non-zero positive integers, x and y coordinates may be negative.
*/
struct PhysicalLocation
{
/**
* Small class that provides access to the physical location information about a cluster.
* All values should be defaulted to -1 if unavailable. Tile should only allow
* non-zero positive integers, x and y coordinates must be non-negative.
* This is different from PhysicalLocationShort in that the x and y positions are ints, not shorts
* thus, they do not overflow within a HiSeqX tile.
*/
int16_t tile = -1;
int32_t x = -1;
int32_t y = -1;
};
/* 包含了所有read ends信息如picard里边的 ReadEndsForMarkDuplicates*/
struct ReadEnds : PhysicalLocation
{
/* ReadEnds中的成员变量 */
/** Little struct-like class to hold read pair (and fragment) end data for duplicate marking. */
static const int8_t F = 0, R = 1, FF = 2, FR = 3, RR = 4, RF = 5;
// int16_t libraryId; // 没用,不考虑多样本
int8_t orientation;
int32_t read1ReferenceIndex = -1;
int32_t read1Coordinate = -1;
int32_t read2ReferenceIndex = -1;
int32_t read2Coordinate = -1; // This field is overloaded for flow based processing as the end coordinate of read 1. (paired reads not supported)
/* Additional information used to detect optical dupes */
// int16_t readGroup = -1; 一般经过比对后的bam文件只有一个read groupnormal或者tumor
/** For optical duplicate detection the orientation matters regard to 1st or 2nd end of a mate */
int8_t orientationForOpticalDuplicates = -1;
/** A *transient* flag marking this read end as being an optical duplicate. */
bool isOpticalDuplicate = false;
/* ReadEndsForMarkDuplicates中的成员变量 */
/** Little struct-like class to hold read pair (and fragment) end data for MarkDuplicatesWithMateCigar **/
int16_t score = 0;
int64_t read1IndexInFile = -1;
int64_t read2IndexInFile = -1;
int64_t duplicateSetSize = -1;
/* ReadEndsForMarkDuplicatesWithBarcodes中的成员变量 (好像用不到) */
// int32_t barcode = 0; // primary barcode for this read (and pair)
// int32_t readOneBarcode = 0; // read one barcode, 0 if not present
// int32_t readTwoBarcode = 0; // read two barcode, 0 if not present or not paired
/* zzh增加的成员变量 */
int64_t posKey = -1; // 根据位置信息生成的关键字 return (int64_t)tid << MAX_CONTIG_LEN_SHIFT | (int64_t)pos;
/* 根据pairend read的比对方向来确定整体的比对方向 */
static int8_t GetOrientationByte(bool read1NegativeStrand, bool read2NegativeStrand)
{
if (read1NegativeStrand)
{
if (read2NegativeStrand)
return RR;
else
return RF;
}
else
{
if (read2NegativeStrand)
return FR;
else
return FF;
}
}
/* 比较两个readends是否一样有个冗余 */
static bool AreComparableForDuplicates(ReadEnds &lhs, ReadEnds &rhs, bool compareRead2)
{
bool areComparable = true;
areComparable = lhs.read1ReferenceIndex == rhs.read1ReferenceIndex &&
lhs.read1Coordinate == rhs.read1Coordinate &&
lhs.orientation == rhs.orientation;
if (areComparable && compareRead2)
{
areComparable = lhs.read2ReferenceIndex == rhs.read2ReferenceIndex &&
lhs.read2Coordinate == rhs.read2Coordinate;
}
return areComparable;
}
/* 比对方向是否正向 */
bool IsForwardStrand()
{
return orientation == F;
}
/* pairend是否合适的比对上了 */
bool IsPaired()
{
return read2ReferenceIndex != -1;
}
};
#endif