实验:环形队列

前言

有个linux cm, 线程A接收数据落地,线程B去遍历目录,再处理这些数据文件。linux中一般是这个套路吧。
当A接收的数据速率较高时,B吃不消了,导致数据文件堆积严重。
问题出在IO操作太费时间。就想在内存中捣鼓,不落地。
这个设想中的内存list, 因为要求高效率,尽可能不用锁。
去找无锁队列的资料,原理说的都挺简洁的,如果要写个强壮的无锁队列,心里没谱。
看到shines77大神在github上已经写完一个好用的无锁队列,断续的学习了几天。
今天将shines77的工程代码整理了一个demo, 明天丢在工程里,配上业务逻辑用用。

shines77的工程已经严谨的用多线程测试过了,我只整理了压入弹出的接口的用法。在实际工程中再详细测试。
shines77的工程是用模板写的,我要丢进去的工程,有个同事也在维护,他java是主业,C用的不细,要求不能用模板实现,我就将shines77的代码改回C实现。

用模板写是灵活,对于队列,压入的都是结构指针。只要压入的那个指针类搞成模板就行。队列本身不用模板来做,是可以的。

我怕同事喷我,那个要压入的数据类,也用C写。代码中见不得模板:)
其实,模板用起来灵活,调试起来方便,看着也清晰,为啥不试着用一下呢? 多见几次面,看着就舒服了。

环形队列的条目数,要从配合文件中来,将构造函数改了下,不用宏了,将条目数传进构造函数。

shines77原工程各个平台都能编译运行,我只要linux和win下能跑. 跨平台的代码不用动。

工程下载点

ring_queue_ls_2017_1219_0228.7z
linux和win下编译,运行通过。

实验

运行效果

--------------------------------------------------------------------------------
test_ring_queue_ls 1.0.0.1 2017-12-18 10:51
--------------------------------------------------------------------------------
argv[0] = ./ring_queue_ls
my_ringQueue.capacity() = 1048576
m_i_len_data = 32
74 68 69 73 20 69 73 20 
69 74 65 6D 00 00 00 00 
00 00 00 00 00 00 00 00 
00 00 00 00 00 00 00 00 
spin2_push
spin2_pop
m_i_len_data = 32
74 68 69 73 20 69 73 20 
69 74 65 6D 00 00 00 00 
00 00 00 00 00 00 00 00 
00 00 00 00 00 00 00 00 
END
root@debian750devmin:/home/dev/linux# 

整理后的demo预览

// @file main.cpp
// @note
// set date on linux env :  date -s "2017-10-29 12:08:00"

#include 
#include 
#include 
#include 

#include "helper_system.h"

#include "ring_queue.h"

#define PROG_NAME "test_ring_queue_ls"
#define PROG_VER "1.0.0.1"
#define PROG_MODIFY_TIME "2017-12-18 10:51"

#define LINE80 "--------------------------------------------------------------------------------"

void version();
void help();

int main(int argc, char** argv)
{
    os_cmd_clear_screen();
    version();
    show_cmdline(argc, argv);

    // class_ring_queue 参数3 :环形队列的条数,CAPACITY_POW2_BITS,可以从配置文件中来,灵活
    class_ring_queue my_ringQueue(true, true, CAPACITY_POW2_BITS); // 2^CAPACITY_POW2_BITS = 1048576 = 100W条的队列
    if (!my_ringQueue.is_valid()) {
        printf("maybe ring queue unit numbers too large, please make small ring_queue\n");
    }

    class_ring_queue_item* p_item = NULL;
    class_ring_queue_item* p_item_was_pop = NULL;

    unsigned int ui_list_capacity = my_ringQueue.capacity();
    printf("my_ringQueue.capacity() = %ld\n", (long)ui_list_capacity);

    // shines77大神已经用多线程测试过了,我在正式工程里会测试
    // 这里只演示2个接口(push,pop)的使用
    do {
        // 如果环形队列的大数组new不出来, 就不玩了
        if (!my_ringQueue.is_valid()) {
            break;
        }

        p_item = new class_ring_queue_item[1];
        if (NULL != p_item) {
            p_item->clear();
            p_item->set_data(new unsigned char[0x20], 0x20);
            strcpy((char*)p_item->data(), "this is item");
            p_item->show();

            // ring queue push
            printf("spin2_push\n");
            my_ringQueue.spin2_push(p_item);

            // ring queue pop
            printf("spin2_pop\n");
            p_item_was_pop = my_ringQueue.spin2_pop();
            if (NULL != p_item_was_pop) {
                p_item_was_pop->show();
                delete [] p_item_was_pop;
            }
        }
    } while (0);

    printf("END\n");
    return EXIT_SUCCESS;
}

void version()
{
    printf("%s\n", LINE80);
    printf("%s %s %s\n",
        PROG_NAME,
        PROG_VER,
        PROG_MODIFY_TIME);
    printf("%s\n", LINE80);
}

void help()
{
    printf("%s\n", LINE80);
    printf("usage:\n");
    printf("%s\n",
        PROG_NAME);
    printf("%s\n", LINE80);
}
// @file ring_queue.h

#ifndef __RING_QUEUE_H_V1__
#define __RING_QUEUE_H_V1__

// #ifdef linux

#include "ring_queue_define.h"
#include "ring_queue_item.h"

class class_ring_queue : public class_ring_queue_base
{
public:
    class_ring_queue(bool bFillQueue, bool bInitHead, int i_capacity_pow2_bits);
    ~class_ring_queue();

public:
    virtual void show();

protected:
    void init_queue(bool bFillQueue = true);
};

// #endif // #ifdef linux

#endif // #ifndef __RING_QUEUE_H_V1__
// @file ring_queue_item.h

#ifndef __RING_QUEUE_ITEM_H_V1__
#define __RING_QUEUE_ITEM_H_V1__

// #ifdef linux

#include 
#include 
#include 
#include 

#include "port.h"
#include "sleep.h"

#ifdef WIN32
#include "msvc/pthread.h"
#else
#include 
#endif // #ifdef WIN32

#include "ring_queue_define.h"

struct spin_mutex_t
{
    volatile char padding1[CACHE_LINE_SIZE];
    volatile uint32_t locked;
    volatile char padding2[CACHE_LINE_SIZE - 1 * sizeof(uint32_t)];
    volatile uint32_t spin_counter;
    volatile uint32_t recurse_counter;
    volatile uint32_t thread_id;
    volatile uint32_t reserve;
    volatile char padding3[CACHE_LINE_SIZE - 4 * sizeof(uint32_t)];
};

struct RingQueueHead
{
    volatile uint32_t head;
    char padding1[JIMI_CACHELINE_SIZE - sizeof(uint32_t)];

    volatile uint32_t tail;
    char padding2[JIMI_CACHELINE_SIZE - sizeof(uint32_t)];
};

class class_ring_queue_item {
public:
    class_ring_queue_item();
    virtual ~class_ring_queue_item();

    void set_data(unsigned char* p_data, int i_len_data);
    unsigned char* data();
    int length();
    void clear();
    void show();

private:
    unsigned char* m_p_data;
    int m_i_len_data;
};

class RingQueueCore
{
public:
    typedef class_ring_queue_item*  item_type;

    RingQueueCore()
    {
        m_p_queue = NULL;
    }
    virtual ~RingQueueCore() {}

public:
    static const bool kIsAllocOnHeap = true;
    bool is_valid() { return (NULL != m_p_queue); }

public:
    RingQueueHead   m_ring_queue_head;
    item_type*      m_p_queue;
};

class class_ring_queue_base
{
public:
    typedef uint32_t                size_type;
    typedef uint32_t                index_type;
    typedef RingQueueCore           core_type;
    typedef class_ring_queue_item*  value_type;

public:
    size_type  m_nCapacity;
    index_type m_nMask;

public:
    class_ring_queue_base(bool bInitHead, int i_capacity_pow2_bits);
    ~class_ring_queue_base();

public:
    virtual void show();

    bool is_valid() { return m_ring_queue_core.is_valid(); }
    index_type mask() const      { return m_nMask; };
    size_type capacity() const   { return m_nCapacity; };
    size_type length() const     { return sizes(); };
    size_type sizes() const;

    void init(bool bInitHead, int i_capacity_pow2_bits);

    int spin2_push(class_ring_queue_item* item);
    class_ring_queue_item* spin2_pop();

    int spin2_push_(class_ring_queue_item* item);

protected:
    core_type       m_ring_queue_core;
    spin_mutex_t    spin_mutex;
    pthread_mutex_t queue_mutex;
};

// #endif // #ifdef linux

#endif // #ifndef __RING_QUEUE_ITEM_H_V1__
// @file ring_queue_define.h

#ifndef __RING_QUEUE_DEFINE_H_V1__
#define __RING_QUEUE_DEFINE_H_V1__

// #ifdef linux

/// 缓存的CacheLineSize(x86上是64字节)
#define CACHE_LINE_SIZE         64

#ifndef JIMI_CACHE_LINE_SIZE
#define JIMI_CACHELINE_SIZE    64
#endif

/// RingQueue的容量(2^CAPACITY_POW2_BITS, 队列长度, 必须是2的幂次方)和Mask值
#define CAPACITY_POW2_BITS 20 // 2^CAPACITY_POW2_BITS = 1048576
#define CAPACITY_POW2_BITS_MIN 8 // 2^8 = 256, 如果设置的太小,内部建立的最小队列为256条

/// 下面一行请不要修改, 切记!!! qmask = qsize - 1
#define QMASK                   (QSIZE - 1)

///
/// 在spin_mutex里是否使用spin_counter计数, 0为不使用(更快!建议设为该值), 1为使用
///
#define USE_SPIN_MUTEX_COUNTER  0

///
/// spin_mutex的最大spin_count值, 默认值为16, 建议设为0或1,2, 更快! 设为0则跟USE_SPIN_MUTEX_COUNTER设为0等价
///
#define MUTEX_MAX_SPIN_COUNT    1

#define SPIN_YIELD_THRESHOLD    1

// #endif // #ifdef linux

#endif // #ifndef __RING_QUEUE_DEFINE_H_V1__
// @file ring_queue.cpp

#include 
#include "ring_queue.h"

// #ifdef linux

class_ring_queue::class_ring_queue(bool bFillQueue,
    bool bInitHead,
    int i_capacity_pow2_bits)
    : class_ring_queue_base(bInitHead, i_capacity_pow2_bits)
{
    init_queue(bFillQueue);
}

class_ring_queue::~class_ring_queue()
{
    // If the queue is allocated on system heap, release them.
    if (RingQueueCore::kIsAllocOnHeap) {
        if (this->m_ring_queue_core.m_p_queue != NULL) {
            delete[] this->m_ring_queue_core.m_p_queue;
            this->m_ring_queue_core.m_p_queue = NULL;
        }
    }
}

void class_ring_queue::init_queue(bool bFillQueue /* = true */)
{
    value_type *newData = new class_ring_queue_item*[capacity()];
    if (newData != NULL) {
        if (bFillQueue) {
            memset((void *)newData, 0, sizeof(value_type) * capacity());
        }
        this->m_ring_queue_core.m_p_queue = newData;
    }
}

void class_ring_queue::show()
{
    printf("class_ring_queue: (head = %u, tail = %u)\n",
        this->m_ring_queue_core.m_ring_queue_head.head, this->m_ring_queue_core.m_ring_queue_head.tail);
}

// #endif // #ifdef linux
// @file ring_queue_item.cpp

#include 
#include "ring_queue_item.h"

// #ifdef linux

// --------------------------------------------------------------------------------
// class_ring_queue_item
// --------------------------------------------------------------------------------

class_ring_queue_item::class_ring_queue_item()
{
    set_data(NULL, 0);
}

class_ring_queue_item::~class_ring_queue_item()
{
    clear();
}

void class_ring_queue_item::clear()
{
    if (NULL != m_p_data) {
        delete[] m_p_data;
        m_p_data = NULL;
    }

    set_data(NULL, 0);
}

void class_ring_queue_item::set_data(unsigned char* p_data, int i_len_data)
{
    m_p_data = p_data;
    m_i_len_data = i_len_data;
}

unsigned char* class_ring_queue_item::data()
{
    return m_p_data;
}

int class_ring_queue_item::length()
{
    return m_i_len_data;
}

void class_ring_queue_item::show()
{
    int i = 0;
    int i_pos = 0;

    printf("m_i_len_data = %d\n", m_i_len_data);
    for (i = 0; i < m_i_len_data; i++) {
        if ((0 != i) && (0 == (i_pos % 8))) {
            i_pos = 0;
            printf("\n");
        }
        i_pos++;
        printf("%2.2X ", m_p_data[i]);
    }

    printf("\n");
}

// --------------------------------------------------------------------------------
// class_ring_queue_base
// --------------------------------------------------------------------------------

class_ring_queue_base::class_ring_queue_base(bool bInitHead, int i_capacity_pow2_bits)
{
    init(bInitHead, i_capacity_pow2_bits);
}

class_ring_queue_base::~class_ring_queue_base()
{
    // Do nothing!
    Jimi_WriteCompilerBarrier();

    spin_mutex.locked = 0;

    pthread_mutex_destroy(&queue_mutex);
}

void class_ring_queue_base::init(bool bInitHead, int i_capacity_pow2_bits)
{
    m_nCapacity = (size_type)JIMI_MAX(JIMI_ROUND_TO_POW2((1 << i_capacity_pow2_bits)), JIMI_ROUND_TO_POW2((1 << CAPACITY_POW2_BITS_MIN)));
    m_nMask     = (index_type)(m_nCapacity - 1);

    if (!bInitHead) {
        m_ring_queue_core.m_ring_queue_head.head = 0;
        m_ring_queue_core.m_ring_queue_head.tail = 0;
    }
    else {
        memset((void *)&m_ring_queue_core.m_ring_queue_head, 0, sizeof(m_ring_queue_core.m_ring_queue_head));
    }

    Jimi_CompilerBarrier();

    // Initilized spin mutex
    spin_mutex.locked = 0;
    spin_mutex.spin_counter = MUTEX_MAX_SPIN_COUNT;
    spin_mutex.recurse_counter = 0;
    spin_mutex.thread_id = 0;
    spin_mutex.reserve = 0;

    // Initilized mutex
    pthread_mutex_init(&queue_mutex, NULL);
}

void class_ring_queue_base::show()
{
    printf("class_ring_queue_base: (head = %u, tail = %u)\n",
        m_ring_queue_core.m_ring_queue_head.head, m_ring_queue_core.m_ring_queue_head.tail);
}

class_ring_queue_base::size_type
class_ring_queue_base::sizes() const
{
    index_type head = 0;
    index_type tail = 0;

    Jimi_CompilerBarrier();

    head = m_ring_queue_core.m_ring_queue_head.head;
    tail = m_ring_queue_core.m_ring_queue_head.tail;

    return (size_type)((head - tail) <= mask()) ? (head - tail) : (size_type)-1;
}

int class_ring_queue_base::spin2_push_(class_ring_queue_item* item)
{
    index_type head, tail, next;
    int32_t pause_cnt;
    uint32_t loop_count, yield_cnt, spin_count;
    static const uint32_t YIELD_THRESHOLD = 1;  // 自旋次数阀值

    Jimi_CompilerBarrier();    // 编译器读写屏障

                               // 下面这一句是一个小技巧, 参考自 pthread_spin_lock(), 自旋开始.
    if (jimi_lock_test_and_set32(&spin_mutex.locked, 1U) != 0U) {
        loop_count = 0;
        spin_count = 1;
        do {
            if (loop_count < YIELD_THRESHOLD) {
                for (pause_cnt = spin_count; pause_cnt > 0; --pause_cnt) {
                    jimi_mm_pause();        // 这是为支持超线程的 CPU 准备的切换提示
                }
                spin_count *= 2;
            }
            else {
                yield_cnt = loop_count - YIELD_THRESHOLD;
                if ((yield_cnt & 63) == 63) {
                    jimi_sleep(1);          // 真正的休眠, 转入内核态
                }
                else if ((yield_cnt & 3) == 3) {
                    jimi_sleep(0);          // 切换到优先级跟自己一样或更高的线程, 可以换到别的CPU核心上
                }
                else {
                    if (!jimi_yield()) {    // 让步给该线程所在的CPU核心上的别的线程,
                                            // 不能切换到别的CPU核心上等待的线程
                        jimi_sleep(0);      // 如果同核心上没有可切换的线程,
                                            // 则切到别的核心试试(只能切优先级跟自己相同或更好的)
                    }
                }
            }
            loop_count++;
        } while (jimi_val_compare_and_swap32(&spin_mutex.locked, 0U, 1U) != 0U);
    }

    // 进入锁区域
    head = m_ring_queue_core.m_ring_queue_head.head;
    tail = m_ring_queue_core.m_ring_queue_head.tail;
    if ((head - tail) > mask()) {
        Jimi_CompilerBarrier();
        // 队列已满, 释放锁
        spin_mutex.locked = 0;
        return -1;
    }
    next = head + 1;
    m_ring_queue_core.m_ring_queue_head.head = next;

    m_ring_queue_core.m_p_queue[head & mask()] = item;    // 把数据写入队列

    Jimi_CompilerBarrier();        // 编译器读写屏障

    spin_mutex.locked = 0;          // 释放锁

    return 0;
}

int class_ring_queue_base::spin2_push(class_ring_queue_item* item)
{
    index_type head, tail, next;
    int32_t pause_cnt;
    uint32_t loop_count, yield_cnt, spin_count;
    static const uint32_t YIELD_THRESHOLD = SPIN_YIELD_THRESHOLD;

    Jimi_CompilerBarrier();

    /* atomic_exchange usually takes less instructions than
    atomic_compare_and_exchange.  On the other hand,
    atomic_compare_and_exchange potentially generates less bus traffic
    when the lock is locked.
    We assume that the first try mostly will be successful, and we use
    atomic_exchange.  For the subsequent tries we use
    atomic_compare_and_exchange.  */
    if (jimi_lock_test_and_set32(&spin_mutex.locked, 1U) != 0U) {
        loop_count = 0;
        spin_count = 1;
        do {
            if (loop_count < YIELD_THRESHOLD) {
                for (pause_cnt = spin_count; pause_cnt > 0; --pause_cnt) {
                    jimi_mm_pause();
                }
                spin_count *= 2;
            }
            else {
                yield_cnt = loop_count - YIELD_THRESHOLD;
#if defined(__MINGW32__) || defined(__CYGWIN__)
                if ((yield_cnt & 3) == 3) {
                    jimi_wsleep(0);
                }
                else {
                    if (!jimi_yield()) {
                        jimi_wsleep(0);
                        //jimi_mm_pause();
                    }
                }
#else
                if ((yield_cnt & 63) == 63) {
                    jimi_wsleep(1);
                }
                else if ((yield_cnt & 3) == 3) {
                    jimi_wsleep(0);
                }
                else {
                    if (!jimi_yield()) {
                        jimi_wsleep(0);
                        //jimi_mm_pause();
                    }
                }
#endif
            }
            loop_count++;
            //jimi_mm_pause();
        } while (jimi_val_compare_and_swap32(&spin_mutex.locked, 0U, 1U) != 0U);
    }

    head = m_ring_queue_core.m_ring_queue_head.head;
    tail = m_ring_queue_core.m_ring_queue_head.tail;
    if ((head - tail) > mask()) {
        Jimi_CompilerBarrier();
        spin_mutex.locked = 0;
        return -1;
    }
    next = head + 1;
    m_ring_queue_core.m_ring_queue_head.head = next;

    m_ring_queue_core.m_p_queue[head & mask()] = item;

    //Jimi_CompilerBarrier();
    //spin_mutex.locked = 0;
    while (jimi_lock_test_and_set32(&spin_mutex.locked, 0U) != 1U) {
        printf("spin2_push: jimi_lock_test_and_set32(&spin_mutex.locked, 0U) != 1U \n");
    }

    return 0;
}

class_ring_queue_item* class_ring_queue_base::spin2_pop()
{
    index_type head, tail, next;
    value_type item;
    int32_t pause_cnt;
    uint32_t loop_count, yield_cnt, spin_count;
    static const uint32_t YIELD_THRESHOLD = SPIN_YIELD_THRESHOLD;

    Jimi_CompilerBarrier();

    /* atomic_exchange usually takes less instructions than
    atomic_compare_and_exchange.  On the other hand,
    atomic_compare_and_exchange potentially generates less bus traffic
    when the lock is locked.
    We assume that the first try mostly will be successful, and we use
    atomic_exchange.  For the subsequent tries we use
    atomic_compare_and_exchange.  */
    if (jimi_lock_test_and_set32(&spin_mutex.locked, 1U) != 0U) {
        loop_count = 0;
        spin_count = 1;
        do {
            if (loop_count < YIELD_THRESHOLD) {
                for (pause_cnt = spin_count; pause_cnt > 0; --pause_cnt) {
                    jimi_mm_pause();
                }
                spin_count *= 2;
            }
            else {
                yield_cnt = loop_count - YIELD_THRESHOLD;
#if defined(__MINGW32__) || defined(__CYGWIN__)
                if ((yield_cnt & 3) == 3) {
                    jimi_wsleep(0);
                }
                else {
                    if (!jimi_yield()) {
                        jimi_wsleep(0);
                        //jimi_mm_pause();
                    }
                }
#else
                if ((yield_cnt & 63) == 63) {
                    jimi_wsleep(1);
                }
                else if ((yield_cnt & 3) == 3) {
                    jimi_wsleep(0);
                }
                else {
                    if (!jimi_yield()) {
                        jimi_wsleep(0);
                        //jimi_mm_pause();
                    }
                }
#endif
            }
            loop_count++;
            //jimi_mm_pause();
        } while (jimi_val_compare_and_swap32(&spin_mutex.locked, 0U, 1U) != 0U);
    }

    head = m_ring_queue_core.m_ring_queue_head.head;
    tail = m_ring_queue_core.m_ring_queue_head.tail;
    if ((tail == head) || (tail > head && (head - tail) > mask())) {
        Jimi_CompilerBarrier();
        //jimi_lock_test_and_set32(&spin_mutex.locked, 0U);
        spin_mutex.locked = 0;
        return (value_type)NULL;
    }
    next = tail + 1;
    m_ring_queue_core.m_ring_queue_head.tail = next;

    item = m_ring_queue_core.m_p_queue[tail & mask()];

    //Jimi_CompilerBarrier();
    //spin_mutex.locked = 0;
    while (jimi_lock_test_and_set32(&spin_mutex.locked, 0U) != 1U) {
        printf("spin2_pop: jimi_lock_test_and_set32(&spin_mutex.locked, 0U) != 1U \n");
    }

    return item;
}

// #endif // #ifdef linux
# ==============================================================================
# makefile
#   lostspeed 2017-12-18
# note
#   when first build on vmware's linux, please adjust date time for build project
#   e.g. date -s "2017-12-5 21:49:00"
# ==============================================================================
BIN = ring_queue_ls
LINE80 = --------------------------------------------------------------------------------
CC = g++ -std=c++98
CFLAGS = -Wall -g
INC = -I. -I./helper -I./inc
LIBS = -lstdc++ -pthread
LIBPATH = /usr/local/lib

DEPEND_CODE_DIR = ./have_not_depend_dir \

DEPEND_CODE_SRC = $(shell find $(DEPEND_CODE_DIR) -name '*.cpp')
DEPEND_CODE_OBJ = $(DEPEND_CODE_SRC:.cpp=.o)

# root code dir is ./'s code, e.g. main.cpp
ROOT_CODE_SRC = $(shell find ./ -name '*.cpp')
ROOT_CODE_OBJ = $(ROOT_CODE_SRC:.cpp=.o)

# if no sub code dir, must fill a sub dir exist but empty. e.g. ./empty_dir
# if have sub code dir, fill it like DEPEND_CODE_DIR
# e.g. ./xx_subdir/xx_type/
SUB_CODE_DIR = ./helper ./inc
SUB_CODE_SRC = $(shell find $(SUB_CODE_DIR) -name '*.cpp')
SUB_CODE_OBJ = $(SUB_CODE_SRC:.cpp=.o)

clean:
    clear
    @echo $(LINE80)
    @echo make clean
    rm -f $(BIN) $(ROOT_CODE_OBJ) $(DEPEND_CODE_OBJ) $(SUB_CODE_OBJ)

all:$(BIN)
    @echo $(LINE80)
    @echo make all
    chmod 777 $(BIN)
    find . -name $(BIN)

$(BIN) : $(ROOT_CODE_OBJ) $(DEPEND_CODE_OBJ) $(SUB_CODE_OBJ)
    $(CC) $(CFLAGS) -o $@ $^

.cpp.o:
    $(CC) -c $(CFLAGS) $^ -o $@ $(INC) -L$(LIBPATH) $(LIBS)

rebuild:
    make clean
    @echo $(LINE80)
    make all

rebuild_and_run:
    make rebuild
    @echo $(LINE80)
    ./$(BIN)

你可能感兴趣的:(实验:环形队列)