picard_cpp/src/sam/utils/read_ends.h

115 lines
4.2 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

/*
Description: read ends结构体主要用来标记冗余包含一些序列的测序过程中的物理信息等
Copyright : All right reserved by ICT
Author : Zhang Zhonghai
Date : 2023/11/3
*/
#ifndef READ_ENDS_H_
#define READ_ENDS_H_
#include <stdint.h>
/**
* Small interface that provides access to the physical location information about a cluster.
* All values should be defaulted to -1 if unavailable. ReadGroup and Tile should only allow
* non-zero positive integers, x and y coordinates may be negative.
*/
struct PhysicalLocation
{
/**
* Small class that provides access to the physical location information about a cluster.
* All values should be defaulted to -1 if unavailable. Tile should only allow
* non-zero positive integers, x and y coordinates must be non-negative.
* This is different from PhysicalLocationShort in that the x and y positions are ints, not shorts
* thus, they do not overflow within a HiSeqX tile.
*/
int16_t tile = -1;
int32_t x = -1;
int32_t y = -1;
};
/* 包含了所有read ends信息如picard里边的 ReadEndsForMarkDuplicates*/
struct ReadEnds : PhysicalLocation
{
/* ReadEnds中的成员变量 */
/** Little struct-like class to hold read pair (and fragment) end data for duplicate marking. */
static const int8_t F = 0, R = 1, FF = 2, FR = 3, RR = 4, RF = 5;
// int16_t libraryId; // 没用,不考虑多样本
int8_t orientation;
int32_t read1ReferenceIndex = -1;
int32_t read1Coordinate = -1;
int32_t read2ReferenceIndex = -1;
int32_t read2Coordinate = -1; // This field is overloaded for flow based processing as the end coordinate of read 1. (paired reads not supported)
/* Additional information used to detect optical dupes */
// int16_t readGroup = -1; 一般经过比对后的bam文件只有一个read groupnormal或者tumor
/** For optical duplicate detection the orientation matters regard to 1st or 2nd end of a mate */
int8_t orientationForOpticalDuplicates = -1;
/** A *transient* flag marking this read end as being an optical duplicate. */
bool isOpticalDuplicate = false;
/* ReadEndsForMarkDuplicates中的成员变量 */
/** Little struct-like class to hold read pair (and fragment) end data for MarkDuplicatesWithMateCigar **/
int16_t score = 0;
int64_t read1IndexInFile = -1;
int64_t read2IndexInFile = -1;
int64_t duplicateSetSize = -1;
/* ReadEndsForMarkDuplicatesWithBarcodes中的成员变量 (好像用不到) */
// int32_t barcode = 0; // primary barcode for this read (and pair)
// int32_t readOneBarcode = 0; // read one barcode, 0 if not present
// int32_t readTwoBarcode = 0; // read two barcode, 0 if not present or not paired
/* zzh增加的成员变量 */
int64_t posKey = -1; // 根据位置信息生成的关键字 return (int64_t)tid << MAX_CONTIG_LEN_SHIFT | (int64_t)pos;
/* 根据pairend read的比对方向来确定整体的比对方向 */
static int8_t GetOrientationByte(bool read1NegativeStrand, bool read2NegativeStrand)
{
if (read1NegativeStrand)
{
if (read2NegativeStrand)
return RR;
else
return RF;
}
else
{
if (read2NegativeStrand)
return FR;
else
return FF;
}
}
/* 比较两个readends是否一样有个冗余 */
static bool AreComparableForDuplicates(ReadEnds &lhs, ReadEnds &rhs, bool compareRead2)
{
bool areComparable = true;
areComparable = lhs.read1ReferenceIndex == rhs.read1ReferenceIndex &&
lhs.read1Coordinate == rhs.read1Coordinate &&
lhs.orientation == rhs.orientation;
if (areComparable && compareRead2)
{
areComparable = lhs.read2ReferenceIndex == rhs.read2ReferenceIndex &&
lhs.read2Coordinate == rhs.read2Coordinate;
}
return areComparable;
}
/* 比对方向是否正向 */
bool IsForwardStrand()
{
return orientation == F;
}
/* pairend是否合适的比对上了 */
bool IsPaired()
{
return read2ReferenceIndex != -1;
}
};
#endif