有个linux cm, 线程A接收数据落地,线程B去遍历目录,再处理这些数据文件。linux中一般是这个套路吧。
当A接收的数据速率较高时,B吃不消了,导致数据文件堆积严重。
问题出在IO操作太费时间。就想在内存中捣鼓,不落地。
这个设想中的内存list, 因为要求高效率,尽可能不用锁。
去找无锁队列的资料,原理说的都挺简洁的,如果要写个强壮的无锁队列,心里没谱。
看到shines77大神在github上已经写完一个好用的无锁队列,断续的学习了几天。
今天将shines77的工程代码整理了一个demo, 明天丢在工程里,配上业务逻辑用用。
shines77的工程已经严谨的用多线程测试过了,我只整理了压入弹出的接口的用法。在实际工程中再详细测试。
shines77的工程是用模板写的,我要丢进去的工程,有个同事也在维护,他java是主业,C用的不细,要求不能用模板实现,我就将shines77的代码改回C实现。
用模板写是灵活,对于队列,压入的都是结构指针。只要压入的那个指针类搞成模板就行。队列本身不用模板来做,是可以的。
我怕同事喷我,那个要压入的数据类,也用C写。代码中见不得模板:)
其实,模板用起来灵活,调试起来方便,看着也清晰,为啥不试着用一下呢? 多见几次面,看着就舒服了。
环形队列的条目数,要从配合文件中来,将构造函数改了下,不用宏了,将条目数传进构造函数。
shines77原工程各个平台都能编译运行,我只要linux和win下能跑. 跨平台的代码不用动。
ring_queue_ls_2017_1219_0228.7z
linux和win下编译,运行通过。
--------------------------------------------------------------------------------
test_ring_queue_ls 1.0.0.1 2017-12-18 10:51
--------------------------------------------------------------------------------
argv[0] = ./ring_queue_ls
my_ringQueue.capacity() = 1048576
m_i_len_data = 32
74 68 69 73 20 69 73 20
69 74 65 6D 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
spin2_push
spin2_pop
m_i_len_data = 32
74 68 69 73 20 69 73 20
69 74 65 6D 00 00 00 00
00 00 00 00 00 00 00 00
00 00 00 00 00 00 00 00
END
root@debian750devmin:/home/dev/linux#
// @file main.cpp
// @note
// set date on linux env : date -s "2017-10-29 12:08:00"
#include
#include
#include
#include
#include "helper_system.h"
#include "ring_queue.h"
#define PROG_NAME "test_ring_queue_ls"
#define PROG_VER "1.0.0.1"
#define PROG_MODIFY_TIME "2017-12-18 10:51"
#define LINE80 "--------------------------------------------------------------------------------"
void version();
void help();
int main(int argc, char** argv)
{
os_cmd_clear_screen();
version();
show_cmdline(argc, argv);
// class_ring_queue 参数3 :环形队列的条数,CAPACITY_POW2_BITS,可以从配置文件中来,灵活
class_ring_queue my_ringQueue(true, true, CAPACITY_POW2_BITS); // 2^CAPACITY_POW2_BITS = 1048576 = 100W条的队列
if (!my_ringQueue.is_valid()) {
printf("maybe ring queue unit numbers too large, please make small ring_queue\n");
}
class_ring_queue_item* p_item = NULL;
class_ring_queue_item* p_item_was_pop = NULL;
unsigned int ui_list_capacity = my_ringQueue.capacity();
printf("my_ringQueue.capacity() = %ld\n", (long)ui_list_capacity);
// shines77大神已经用多线程测试过了,我在正式工程里会测试
// 这里只演示2个接口(push,pop)的使用
do {
// 如果环形队列的大数组new不出来, 就不玩了
if (!my_ringQueue.is_valid()) {
break;
}
p_item = new class_ring_queue_item[1];
if (NULL != p_item) {
p_item->clear();
p_item->set_data(new unsigned char[0x20], 0x20);
strcpy((char*)p_item->data(), "this is item");
p_item->show();
// ring queue push
printf("spin2_push\n");
my_ringQueue.spin2_push(p_item);
// ring queue pop
printf("spin2_pop\n");
p_item_was_pop = my_ringQueue.spin2_pop();
if (NULL != p_item_was_pop) {
p_item_was_pop->show();
delete [] p_item_was_pop;
}
}
} while (0);
printf("END\n");
return EXIT_SUCCESS;
}
void version()
{
printf("%s\n", LINE80);
printf("%s %s %s\n",
PROG_NAME,
PROG_VER,
PROG_MODIFY_TIME);
printf("%s\n", LINE80);
}
void help()
{
printf("%s\n", LINE80);
printf("usage:\n");
printf("%s\n",
PROG_NAME);
printf("%s\n", LINE80);
}
// @file ring_queue.h
#ifndef __RING_QUEUE_H_V1__
#define __RING_QUEUE_H_V1__
// #ifdef linux
#include "ring_queue_define.h"
#include "ring_queue_item.h"
class class_ring_queue : public class_ring_queue_base
{
public:
class_ring_queue(bool bFillQueue, bool bInitHead, int i_capacity_pow2_bits);
~class_ring_queue();
public:
virtual void show();
protected:
void init_queue(bool bFillQueue = true);
};
// #endif // #ifdef linux
#endif // #ifndef __RING_QUEUE_H_V1__
// @file ring_queue_item.h
#ifndef __RING_QUEUE_ITEM_H_V1__
#define __RING_QUEUE_ITEM_H_V1__
// #ifdef linux
#include
#include
#include
#include
#include "port.h"
#include "sleep.h"
#ifdef WIN32
#include "msvc/pthread.h"
#else
#include
#endif // #ifdef WIN32
#include "ring_queue_define.h"
struct spin_mutex_t
{
volatile char padding1[CACHE_LINE_SIZE];
volatile uint32_t locked;
volatile char padding2[CACHE_LINE_SIZE - 1 * sizeof(uint32_t)];
volatile uint32_t spin_counter;
volatile uint32_t recurse_counter;
volatile uint32_t thread_id;
volatile uint32_t reserve;
volatile char padding3[CACHE_LINE_SIZE - 4 * sizeof(uint32_t)];
};
struct RingQueueHead
{
volatile uint32_t head;
char padding1[JIMI_CACHELINE_SIZE - sizeof(uint32_t)];
volatile uint32_t tail;
char padding2[JIMI_CACHELINE_SIZE - sizeof(uint32_t)];
};
class class_ring_queue_item {
public:
class_ring_queue_item();
virtual ~class_ring_queue_item();
void set_data(unsigned char* p_data, int i_len_data);
unsigned char* data();
int length();
void clear();
void show();
private:
unsigned char* m_p_data;
int m_i_len_data;
};
class RingQueueCore
{
public:
typedef class_ring_queue_item* item_type;
RingQueueCore()
{
m_p_queue = NULL;
}
virtual ~RingQueueCore() {}
public:
static const bool kIsAllocOnHeap = true;
bool is_valid() { return (NULL != m_p_queue); }
public:
RingQueueHead m_ring_queue_head;
item_type* m_p_queue;
};
class class_ring_queue_base
{
public:
typedef uint32_t size_type;
typedef uint32_t index_type;
typedef RingQueueCore core_type;
typedef class_ring_queue_item* value_type;
public:
size_type m_nCapacity;
index_type m_nMask;
public:
class_ring_queue_base(bool bInitHead, int i_capacity_pow2_bits);
~class_ring_queue_base();
public:
virtual void show();
bool is_valid() { return m_ring_queue_core.is_valid(); }
index_type mask() const { return m_nMask; };
size_type capacity() const { return m_nCapacity; };
size_type length() const { return sizes(); };
size_type sizes() const;
void init(bool bInitHead, int i_capacity_pow2_bits);
int spin2_push(class_ring_queue_item* item);
class_ring_queue_item* spin2_pop();
int spin2_push_(class_ring_queue_item* item);
protected:
core_type m_ring_queue_core;
spin_mutex_t spin_mutex;
pthread_mutex_t queue_mutex;
};
// #endif // #ifdef linux
#endif // #ifndef __RING_QUEUE_ITEM_H_V1__
// @file ring_queue_define.h
#ifndef __RING_QUEUE_DEFINE_H_V1__
#define __RING_QUEUE_DEFINE_H_V1__
// #ifdef linux
/// 缓存的CacheLineSize(x86上是64字节)
#define CACHE_LINE_SIZE 64
#ifndef JIMI_CACHE_LINE_SIZE
#define JIMI_CACHELINE_SIZE 64
#endif
/// RingQueue的容量(2^CAPACITY_POW2_BITS, 队列长度, 必须是2的幂次方)和Mask值
#define CAPACITY_POW2_BITS 20 // 2^CAPACITY_POW2_BITS = 1048576
#define CAPACITY_POW2_BITS_MIN 8 // 2^8 = 256, 如果设置的太小,内部建立的最小队列为256条
/// 下面一行请不要修改, 切记!!! qmask = qsize - 1
#define QMASK (QSIZE - 1)
///
/// 在spin_mutex里是否使用spin_counter计数, 0为不使用(更快!建议设为该值), 1为使用
///
#define USE_SPIN_MUTEX_COUNTER 0
///
/// spin_mutex的最大spin_count值, 默认值为16, 建议设为0或1,2, 更快! 设为0则跟USE_SPIN_MUTEX_COUNTER设为0等价
///
#define MUTEX_MAX_SPIN_COUNT 1
#define SPIN_YIELD_THRESHOLD 1
// #endif // #ifdef linux
#endif // #ifndef __RING_QUEUE_DEFINE_H_V1__
// @file ring_queue.cpp
#include
#include "ring_queue.h"
// #ifdef linux
class_ring_queue::class_ring_queue(bool bFillQueue,
bool bInitHead,
int i_capacity_pow2_bits)
: class_ring_queue_base(bInitHead, i_capacity_pow2_bits)
{
init_queue(bFillQueue);
}
class_ring_queue::~class_ring_queue()
{
// If the queue is allocated on system heap, release them.
if (RingQueueCore::kIsAllocOnHeap) {
if (this->m_ring_queue_core.m_p_queue != NULL) {
delete[] this->m_ring_queue_core.m_p_queue;
this->m_ring_queue_core.m_p_queue = NULL;
}
}
}
void class_ring_queue::init_queue(bool bFillQueue /* = true */)
{
value_type *newData = new class_ring_queue_item*[capacity()];
if (newData != NULL) {
if (bFillQueue) {
memset((void *)newData, 0, sizeof(value_type) * capacity());
}
this->m_ring_queue_core.m_p_queue = newData;
}
}
void class_ring_queue::show()
{
printf("class_ring_queue: (head = %u, tail = %u)\n",
this->m_ring_queue_core.m_ring_queue_head.head, this->m_ring_queue_core.m_ring_queue_head.tail);
}
// #endif // #ifdef linux
// @file ring_queue_item.cpp
#include
#include "ring_queue_item.h"
// #ifdef linux
// --------------------------------------------------------------------------------
// class_ring_queue_item
// --------------------------------------------------------------------------------
class_ring_queue_item::class_ring_queue_item()
{
set_data(NULL, 0);
}
class_ring_queue_item::~class_ring_queue_item()
{
clear();
}
void class_ring_queue_item::clear()
{
if (NULL != m_p_data) {
delete[] m_p_data;
m_p_data = NULL;
}
set_data(NULL, 0);
}
void class_ring_queue_item::set_data(unsigned char* p_data, int i_len_data)
{
m_p_data = p_data;
m_i_len_data = i_len_data;
}
unsigned char* class_ring_queue_item::data()
{
return m_p_data;
}
int class_ring_queue_item::length()
{
return m_i_len_data;
}
void class_ring_queue_item::show()
{
int i = 0;
int i_pos = 0;
printf("m_i_len_data = %d\n", m_i_len_data);
for (i = 0; i < m_i_len_data; i++) {
if ((0 != i) && (0 == (i_pos % 8))) {
i_pos = 0;
printf("\n");
}
i_pos++;
printf("%2.2X ", m_p_data[i]);
}
printf("\n");
}
// --------------------------------------------------------------------------------
// class_ring_queue_base
// --------------------------------------------------------------------------------
class_ring_queue_base::class_ring_queue_base(bool bInitHead, int i_capacity_pow2_bits)
{
init(bInitHead, i_capacity_pow2_bits);
}
class_ring_queue_base::~class_ring_queue_base()
{
// Do nothing!
Jimi_WriteCompilerBarrier();
spin_mutex.locked = 0;
pthread_mutex_destroy(&queue_mutex);
}
void class_ring_queue_base::init(bool bInitHead, int i_capacity_pow2_bits)
{
m_nCapacity = (size_type)JIMI_MAX(JIMI_ROUND_TO_POW2((1 << i_capacity_pow2_bits)), JIMI_ROUND_TO_POW2((1 << CAPACITY_POW2_BITS_MIN)));
m_nMask = (index_type)(m_nCapacity - 1);
if (!bInitHead) {
m_ring_queue_core.m_ring_queue_head.head = 0;
m_ring_queue_core.m_ring_queue_head.tail = 0;
}
else {
memset((void *)&m_ring_queue_core.m_ring_queue_head, 0, sizeof(m_ring_queue_core.m_ring_queue_head));
}
Jimi_CompilerBarrier();
// Initilized spin mutex
spin_mutex.locked = 0;
spin_mutex.spin_counter = MUTEX_MAX_SPIN_COUNT;
spin_mutex.recurse_counter = 0;
spin_mutex.thread_id = 0;
spin_mutex.reserve = 0;
// Initilized mutex
pthread_mutex_init(&queue_mutex, NULL);
}
void class_ring_queue_base::show()
{
printf("class_ring_queue_base: (head = %u, tail = %u)\n",
m_ring_queue_core.m_ring_queue_head.head, m_ring_queue_core.m_ring_queue_head.tail);
}
class_ring_queue_base::size_type
class_ring_queue_base::sizes() const
{
index_type head = 0;
index_type tail = 0;
Jimi_CompilerBarrier();
head = m_ring_queue_core.m_ring_queue_head.head;
tail = m_ring_queue_core.m_ring_queue_head.tail;
return (size_type)((head - tail) <= mask()) ? (head - tail) : (size_type)-1;
}
int class_ring_queue_base::spin2_push_(class_ring_queue_item* item)
{
index_type head, tail, next;
int32_t pause_cnt;
uint32_t loop_count, yield_cnt, spin_count;
static const uint32_t YIELD_THRESHOLD = 1; // 自旋次数阀值
Jimi_CompilerBarrier(); // 编译器读写屏障
// 下面这一句是一个小技巧, 参考自 pthread_spin_lock(), 自旋开始.
if (jimi_lock_test_and_set32(&spin_mutex.locked, 1U) != 0U) {
loop_count = 0;
spin_count = 1;
do {
if (loop_count < YIELD_THRESHOLD) {
for (pause_cnt = spin_count; pause_cnt > 0; --pause_cnt) {
jimi_mm_pause(); // 这是为支持超线程的 CPU 准备的切换提示
}
spin_count *= 2;
}
else {
yield_cnt = loop_count - YIELD_THRESHOLD;
if ((yield_cnt & 63) == 63) {
jimi_sleep(1); // 真正的休眠, 转入内核态
}
else if ((yield_cnt & 3) == 3) {
jimi_sleep(0); // 切换到优先级跟自己一样或更高的线程, 可以换到别的CPU核心上
}
else {
if (!jimi_yield()) { // 让步给该线程所在的CPU核心上的别的线程,
// 不能切换到别的CPU核心上等待的线程
jimi_sleep(0); // 如果同核心上没有可切换的线程,
// 则切到别的核心试试(只能切优先级跟自己相同或更好的)
}
}
}
loop_count++;
} while (jimi_val_compare_and_swap32(&spin_mutex.locked, 0U, 1U) != 0U);
}
// 进入锁区域
head = m_ring_queue_core.m_ring_queue_head.head;
tail = m_ring_queue_core.m_ring_queue_head.tail;
if ((head - tail) > mask()) {
Jimi_CompilerBarrier();
// 队列已满, 释放锁
spin_mutex.locked = 0;
return -1;
}
next = head + 1;
m_ring_queue_core.m_ring_queue_head.head = next;
m_ring_queue_core.m_p_queue[head & mask()] = item; // 把数据写入队列
Jimi_CompilerBarrier(); // 编译器读写屏障
spin_mutex.locked = 0; // 释放锁
return 0;
}
int class_ring_queue_base::spin2_push(class_ring_queue_item* item)
{
index_type head, tail, next;
int32_t pause_cnt;
uint32_t loop_count, yield_cnt, spin_count;
static const uint32_t YIELD_THRESHOLD = SPIN_YIELD_THRESHOLD;
Jimi_CompilerBarrier();
/* atomic_exchange usually takes less instructions than
atomic_compare_and_exchange. On the other hand,
atomic_compare_and_exchange potentially generates less bus traffic
when the lock is locked.
We assume that the first try mostly will be successful, and we use
atomic_exchange. For the subsequent tries we use
atomic_compare_and_exchange. */
if (jimi_lock_test_and_set32(&spin_mutex.locked, 1U) != 0U) {
loop_count = 0;
spin_count = 1;
do {
if (loop_count < YIELD_THRESHOLD) {
for (pause_cnt = spin_count; pause_cnt > 0; --pause_cnt) {
jimi_mm_pause();
}
spin_count *= 2;
}
else {
yield_cnt = loop_count - YIELD_THRESHOLD;
#if defined(__MINGW32__) || defined(__CYGWIN__)
if ((yield_cnt & 3) == 3) {
jimi_wsleep(0);
}
else {
if (!jimi_yield()) {
jimi_wsleep(0);
//jimi_mm_pause();
}
}
#else
if ((yield_cnt & 63) == 63) {
jimi_wsleep(1);
}
else if ((yield_cnt & 3) == 3) {
jimi_wsleep(0);
}
else {
if (!jimi_yield()) {
jimi_wsleep(0);
//jimi_mm_pause();
}
}
#endif
}
loop_count++;
//jimi_mm_pause();
} while (jimi_val_compare_and_swap32(&spin_mutex.locked, 0U, 1U) != 0U);
}
head = m_ring_queue_core.m_ring_queue_head.head;
tail = m_ring_queue_core.m_ring_queue_head.tail;
if ((head - tail) > mask()) {
Jimi_CompilerBarrier();
spin_mutex.locked = 0;
return -1;
}
next = head + 1;
m_ring_queue_core.m_ring_queue_head.head = next;
m_ring_queue_core.m_p_queue[head & mask()] = item;
//Jimi_CompilerBarrier();
//spin_mutex.locked = 0;
while (jimi_lock_test_and_set32(&spin_mutex.locked, 0U) != 1U) {
printf("spin2_push: jimi_lock_test_and_set32(&spin_mutex.locked, 0U) != 1U \n");
}
return 0;
}
class_ring_queue_item* class_ring_queue_base::spin2_pop()
{
index_type head, tail, next;
value_type item;
int32_t pause_cnt;
uint32_t loop_count, yield_cnt, spin_count;
static const uint32_t YIELD_THRESHOLD = SPIN_YIELD_THRESHOLD;
Jimi_CompilerBarrier();
/* atomic_exchange usually takes less instructions than
atomic_compare_and_exchange. On the other hand,
atomic_compare_and_exchange potentially generates less bus traffic
when the lock is locked.
We assume that the first try mostly will be successful, and we use
atomic_exchange. For the subsequent tries we use
atomic_compare_and_exchange. */
if (jimi_lock_test_and_set32(&spin_mutex.locked, 1U) != 0U) {
loop_count = 0;
spin_count = 1;
do {
if (loop_count < YIELD_THRESHOLD) {
for (pause_cnt = spin_count; pause_cnt > 0; --pause_cnt) {
jimi_mm_pause();
}
spin_count *= 2;
}
else {
yield_cnt = loop_count - YIELD_THRESHOLD;
#if defined(__MINGW32__) || defined(__CYGWIN__)
if ((yield_cnt & 3) == 3) {
jimi_wsleep(0);
}
else {
if (!jimi_yield()) {
jimi_wsleep(0);
//jimi_mm_pause();
}
}
#else
if ((yield_cnt & 63) == 63) {
jimi_wsleep(1);
}
else if ((yield_cnt & 3) == 3) {
jimi_wsleep(0);
}
else {
if (!jimi_yield()) {
jimi_wsleep(0);
//jimi_mm_pause();
}
}
#endif
}
loop_count++;
//jimi_mm_pause();
} while (jimi_val_compare_and_swap32(&spin_mutex.locked, 0U, 1U) != 0U);
}
head = m_ring_queue_core.m_ring_queue_head.head;
tail = m_ring_queue_core.m_ring_queue_head.tail;
if ((tail == head) || (tail > head && (head - tail) > mask())) {
Jimi_CompilerBarrier();
//jimi_lock_test_and_set32(&spin_mutex.locked, 0U);
spin_mutex.locked = 0;
return (value_type)NULL;
}
next = tail + 1;
m_ring_queue_core.m_ring_queue_head.tail = next;
item = m_ring_queue_core.m_p_queue[tail & mask()];
//Jimi_CompilerBarrier();
//spin_mutex.locked = 0;
while (jimi_lock_test_and_set32(&spin_mutex.locked, 0U) != 1U) {
printf("spin2_pop: jimi_lock_test_and_set32(&spin_mutex.locked, 0U) != 1U \n");
}
return item;
}
// #endif // #ifdef linux
# ==============================================================================
# makefile
# lostspeed 2017-12-18
# note
# when first build on vmware's linux, please adjust date time for build project
# e.g. date -s "2017-12-5 21:49:00"
# ==============================================================================
BIN = ring_queue_ls
LINE80 = --------------------------------------------------------------------------------
CC = g++ -std=c++98
CFLAGS = -Wall -g
INC = -I. -I./helper -I./inc
LIBS = -lstdc++ -pthread
LIBPATH = /usr/local/lib
DEPEND_CODE_DIR = ./have_not_depend_dir \
DEPEND_CODE_SRC = $(shell find $(DEPEND_CODE_DIR) -name '*.cpp')
DEPEND_CODE_OBJ = $(DEPEND_CODE_SRC:.cpp=.o)
# root code dir is ./'s code, e.g. main.cpp
ROOT_CODE_SRC = $(shell find ./ -name '*.cpp')
ROOT_CODE_OBJ = $(ROOT_CODE_SRC:.cpp=.o)
# if no sub code dir, must fill a sub dir exist but empty. e.g. ./empty_dir
# if have sub code dir, fill it like DEPEND_CODE_DIR
# e.g. ./xx_subdir/xx_type/
SUB_CODE_DIR = ./helper ./inc
SUB_CODE_SRC = $(shell find $(SUB_CODE_DIR) -name '*.cpp')
SUB_CODE_OBJ = $(SUB_CODE_SRC:.cpp=.o)
clean:
clear
@echo $(LINE80)
@echo make clean
rm -f $(BIN) $(ROOT_CODE_OBJ) $(DEPEND_CODE_OBJ) $(SUB_CODE_OBJ)
all:$(BIN)
@echo $(LINE80)
@echo make all
chmod 777 $(BIN)
find . -name $(BIN)
$(BIN) : $(ROOT_CODE_OBJ) $(DEPEND_CODE_OBJ) $(SUB_CODE_OBJ)
$(CC) $(CFLAGS) -o $@ $^
.cpp.o:
$(CC) -c $(CFLAGS) $^ -o $@ $(INC) -L$(LIBPATH) $(LIBS)
rebuild:
make clean
@echo $(LINE80)
make all
rebuild_and_run:
make rebuild
@echo $(LINE80)
./$(BIN)