blockqueue通过mutex和condition的队列切换,就会有同步机制上争抢队列的问题
1 volatile int a;
2 a = 1;
3
4 // this will loop while 'a' is not equal to 1.
5 // If it is equal to 1 the operation will atomically set a to
2 and return true
6 while (!CAS(&a, 1, 2))
7 {
8 ;
9 }
队列如果使用动态节点,对于高性能队列:
①每加入一个节点动态分配一次内存
②出队列一次节点,释放一次内存,这样势必影响到高性能服务器的性能
在多线程系统中,需要仔细的考虑动态内存分配。当⼀个任务从堆中分配内存时,标准的内存分配机制会阻塞所有与这个任务共享地址空间的其它任务(进程中的所有线程)。这样做的原因是让处理更简单,且它⼯作得很好。两个线程不会被分配到⼀块相同的地址的内存,因为它们没办法同时执⾏分配请求。显然线程频繁分配内存会导致应⽤程序性能下降(必须注意,向标准队列或map插⼊数据的时候都会导致堆上的动态内存分配)
参考zmq
// This class encapsulates several atomic operations on pointers.
template <typename T> class atomic_ptr_t
{
public:
inline void set (T *ptr_); //⾮原⼦操作,设置一个值,把值更新
inline T *xchg (T *val_); //原⼦操作,设置⼀个新的值,然后返回旧的值(交换函数)
inline T *cas (T *cmp_, T *val_);//原⼦操作(原来的值和cmp这个值去对比,如果相等就设置新值,否则不设置)
private:
volatile T *ptr;
}
1)局部性原理
正常情况chunk数量稳定,波动+1、-1
2)如果保存多个
chunk管理的时候,原子性问题不好管理:
- 加锁(基本不会怎么做)
- 或者再设计一个无锁队列管理回收的chunk
test4设计到内存阑珊各种技术
加锁是一种悲观的策略,它总是认为每次访问共享资源的时候,总会发生冲突,所以宁愿牺牲性能(时间)来保证数据安全。
无锁是一种乐观的策略,它假设线程访问共享资源不会发生冲突,所以不需要加锁,因此线程将不断执行,不需要停止。一旦碰到冲突,就重试当前操作直到没有冲突为止。
****
其实在CAS中,还有一种异常产生,也就是常说的`ABA`的现象。所谓ABA现象就是当前现象期望值是A,某个线程将A改为B,另外线程将B改为A,导致当前线程误以为还是原来的值,然后操作就会导致一些异常出现。
这里我们可以借用数据库乐观锁的方式,维护一个全局的版本号或者是标志,每次修改的时候需要期望值和内存值相等并且标识也没有发生改变的时候采取更新值。
****
无锁(CAS)本身编程就不是很友好,如果没有彻底掌握,最好还是使用锁去编写。
CAS 更多的是一种思想,也是实现高性能编程的一种途径,目前已经有一些开源级别的无锁库可以提供我们使用,也许这些才是我们最好的选择。
#ifndef _ARRAYLOCKFREEQUEUEIMP_H___
#define _ARRAYLOCKFREEQUEUEIMP_H___
#include "ArrayLockFreeQueue.h"
#include
#include "atom_opt.h"
template <typename ELEM_T, QUEUE_INT Q_SIZE>
ArrayLockFreeQueue<ELEM_T, Q_SIZE>::ArrayLockFreeQueue() :
m_writeIndex(0),
m_readIndex(0),
m_maximumReadIndex(0)
{
m_count = 0;
}
template <typename ELEM_T, QUEUE_INT Q_SIZE>
ArrayLockFreeQueue<ELEM_T, Q_SIZE>::~ArrayLockFreeQueue()
{
}
template <typename ELEM_T, QUEUE_INT Q_SIZE>
inline QUEUE_INT ArrayLockFreeQueue<ELEM_T, Q_SIZE>::countToIndex(QUEUE_INT a_count)
{
return (a_count % Q_SIZE);
}
template <typename ELEM_T, QUEUE_INT Q_SIZE>
QUEUE_INT ArrayLockFreeQueue<ELEM_T, Q_SIZE>::size()
{
QUEUE_INT currentWriteIndex = m_writeIndex;
QUEUE_INT currentReadIndex = m_readIndex;
if(currentWriteIndex>=currentReadIndex)
return currentWriteIndex - currentReadIndex;
else
return Q_SIZE + currentWriteIndex - currentReadIndex;
}
template <typename ELEM_T, QUEUE_INT Q_SIZE>
bool ArrayLockFreeQueue<ELEM_T, Q_SIZE>::enqueue(const ELEM_T &a_data)
{
QUEUE_INT currentWriteIndex; // 获取写指针的位置
QUEUE_INT currentReadIndex;
do
{
currentWriteIndex = m_writeIndex;
currentReadIndex = m_readIndex;
if(countToIndex(currentWriteIndex + 1) ==
countToIndex(currentReadIndex))
{
return false; //队列未空
}
} while(!CAS(&m_writeIndex, currentWriteIndex, (currentWriteIndex+1)));
// We know now that this index is reserved for us. Use it to save the data
m_thequeue[countToIndex(currentWriteIndex)] = a_data;
// update the maximum read index after saving the data. It wouldn't fail if there is only one thread
// inserting in the queue. It might fail if there are more than 1 producer threads because this
// operation has to be done in the same order as the previous CAS
while(!CAS(&m_maximumReadIndex, currentWriteIndex, (currentWriteIndex + 1)))
{
// this is a good place to yield the thread in case there are more
// software threads than hardware processors and you have more
// than 1 producer thread
// have a look at sched_yield (POSIX.1b)
sched_yield(); // 当线程超过cpu核数的时候如果不让出cpu导致一直循环在此。
}
AtomicAdd(&m_count, 1);
return true;
}
template <typename ELEM_T, QUEUE_INT Q_SIZE>
bool ArrayLockFreeQueue<ELEM_T, Q_SIZE>::try_dequeue(ELEM_T &a_data)
{
return dequeue(a_data);
}
template <typename ELEM_T, QUEUE_INT Q_SIZE>
bool ArrayLockFreeQueue<ELEM_T, Q_SIZE>::dequeue(ELEM_T &a_data)
{
QUEUE_INT currentMaximumReadIndex;
QUEUE_INT currentReadIndex;
do
{
// to ensure thread-safety when there is more than 1 producer thread
// a second index is defined (m_maximumReadIndex)
currentReadIndex = m_readIndex;
currentMaximumReadIndex = m_maximumReadIndex;
if(countToIndex(currentReadIndex) ==
countToIndex(currentMaximumReadIndex))
{
// the queue is empty or
// a producer thread has allocate space in the queue but is
// waiting to commit the data into it
return false;
}
// retrieve the data from the queue
a_data = m_thequeue[countToIndex(currentReadIndex)];
// try to perfrom now the CAS operation on the read index. If we succeed
// a_data already contains what m_readIndex pointed to before we
// increased it
if(CAS(&m_readIndex, currentReadIndex, (currentReadIndex + 1)))
{
AtomicSub(&m_count, 1); // 真正读取到了数据
return true;
}
} while(true);
assert(0);
// Add this return statement to avoid compiler warnings
return false;
}
#endif
#ifndef SIMPLE_LOCK_FREE_QUEUE_H
// Fairly simple, yet correct, implementation of a simple lock-free queue based on linked pointers with CAS
template <typename T>
class SimpleLockFreeQueue
{
public:
// Total maximum capacity: 2**39 (half a terabyte's worth -- off-by-one aligned indices)
static const int UBER_BLOCKS = 256;
static const int UBER_BLOCK_SIZE = 256;
static const int ULTRA_BLOCK_SIZE = 256;
static const int SUPER_BLOCK_SIZE = 256;
static const int BLOCK_SIZE = 128;
private:
static const uint64_t VERSION_MASK = 0xFFFFFF0000000000ULL;
static const uint64_t VERSION_INCR = 0x0000010000000000ULL;
static const uint64_t UBER_BLOCK_IDX_MASK = 0xFF00000000ULL;
static const uint64_t UBER_BLOCK_MASK = 0x00FF000000ULL;
static const uint64_t ULTRA_BLOCK_MASK = 0x0000FF0000ULL;
static const uint64_t SUPER_BLOCK_MASK = 0x000000FF00ULL;
static const uint64_t BLOCK_MASK = 0x00000000FEULL;
static const uint64_t UBER_BLOCK_IDX_SHIFT = 32;
static const uint64_t UBER_BLOCK_SHIFT = 24;
static const uint64_t ULTRA_BLOCK_SHIFT = 16;
static const uint64_t SUPER_BLOCK_SHIFT = 8;
static const uint64_t BLOCK_SHIFT = 1;
typedef std::uint64_t idx_t;
public:
SimpleLockFreeQueue()
: nextNodeIdx(2), freeListHead(0)
{
// Invariants: Head and tail are never null
auto initialNode = allocate_blank_node();
head.store(set_consumed_flag(initialNode), std::memory_order_relaxed);
tail.store(initialNode, std::memory_order_relaxed);
std::atomic_thread_fence(std::memory_order_seq_cst);
}
~SimpleLockFreeQueue()
{
std::atomic_thread_fence(std::memory_order_seq_cst);
idx_t idx = head.load(std::memory_order_relaxed);
if (is_consumed(idx))
{
idx = clear_consumed_flag(idx);
auto node = get_node_at(idx);
auto next = node->next.load(std::memory_order_relaxed);
node->~Node();
idx = next;
}
while (idx != 0)
{
auto node = get_node_at(idx);
auto next = node->next.load(std::memory_order_relaxed);
node->item()->~T();
node->~Node();
idx = next;
}
idx = freeListHead.load(std::memory_order_relaxed);
while (idx != 0)
{
auto node = get_node_at(idx);
auto next = node->next.load(std::memory_order_relaxed);
node->~Node();
idx = next;
}
}
template <typename U>
inline bool enqueue(U &&item)
{
idx_t nodeIdx = allocate_node_for(std::forward<U>(item));
auto tail_ = tail.load(std::memory_order_relaxed);
while (!tail.compare_exchange_weak(tail_, nodeIdx, std::memory_order_release, std::memory_order_relaxed))
continue;
get_node_at(tail_)->next.store(nodeIdx, std::memory_order_release);
return true;
}
inline bool try_dequeue(T &item)
{
while (true)
{
auto rawHead_ = head.load(std::memory_order_acquire);
auto head_ = clear_consumed_flag(rawHead_);
auto headNode = get_node_at(head_);
auto next = headNode->next.load(std::memory_order_relaxed);
if (next == 0)
{
// Can't move head (that would make head null), but can try to dequeue the node at head anyway
if (is_consumed(rawHead_))
{
return false;
}
if (head.compare_exchange_strong(head_, set_consumed_flag(head_), std::memory_order_release, std::memory_order_relaxed))
{
// Whee, we own the right to dequeue this item
item = std::move(*headNode->item());
headNode->item()->~T();
return true;
}
}
else
{
// Remove node whether it's already been consumed or not; if it hasn't been consumed, consume it!
// head_->next can't possibly change, since once it's not null nobody writes to it (and ABA is avoided with versioning)
if (head.compare_exchange_weak(rawHead_, next, std::memory_order_acq_rel, std::memory_order_relaxed))
{
// Aha, we successfully moved the head. But does it have anything in it?
if (!is_consumed(rawHead_))
{
item = std::move(*headNode->item());
headNode->item()->~T();
}
add_node_to_free_list(head_, headNode);
if (!is_consumed(rawHead_))
{
return true;
}
}
}
}
}
private:
struct Node
{
std::atomic<idx_t> next;
alignas(T) char rawItem[sizeof(T)];
template <typename U>
Node(U &&item)
: next(0)
{
new (this->item()) T(std::forward<U>(item));
}
Node()
: next(0)
{
}
inline T *item() { return reinterpret_cast<T *>(rawItem); }
};
struct Block
{
alignas(Node) char nodes[sizeof(Node) * BLOCK_SIZE];
inline char *node_pos(idx_t idx) { return nodes + ((idx & BLOCK_MASK) >> BLOCK_SHIFT) * sizeof(Node); }
};
template <typename TSubBlock, int BlockSize>
struct HigherOrderBlock
{
std::atomic<TSubBlock *> subblocks[BlockSize];
HigherOrderBlock()
{
for (int i = 0; i != BlockSize; ++i)
{
subblocks[i].store(nullptr, std::memory_order_release);
}
}
~HigherOrderBlock()
{
for (int i = 0; i != BlockSize; ++i)
{
if (subblocks[i].load(std::memory_order_relaxed) != nullptr)
{
delete subblocks[i].load(std::memory_order_relaxed);
}
}
}
};
typedef HigherOrderBlock<Block, SUPER_BLOCK_SIZE> SuperBlock;
typedef HigherOrderBlock<SuperBlock, ULTRA_BLOCK_SIZE> UltraBlock;
typedef HigherOrderBlock<UltraBlock, UBER_BLOCK_SIZE> UberBlock;
typedef HigherOrderBlock<UberBlock, UBER_BLOCKS> UberBlockContainer;
private:
inline idx_t set_consumed_flag(idx_t idx)
{
return idx | (idx_t)1;
}
inline idx_t clear_consumed_flag(idx_t idx)
{
return idx & ~(idx_t)1;
}
inline bool is_consumed(idx_t idx)
{
return (idx & 1) != 0;
}
inline void add_node_to_free_list(idx_t idx, Node *node)
{
auto head = freeListHead.load(std::memory_order_relaxed);
do
{
node->next.store(head, std::memory_order_relaxed);
} while (!freeListHead.compare_exchange_weak(head, idx, std::memory_order_release, std::memory_order_relaxed));
}
inline idx_t try_get_node_from_free_list()
{
auto head = freeListHead.load(std::memory_order_acquire);
while (head != 0 && !freeListHead.compare_exchange_weak(head, get_node_at(head)->next.load(std::memory_order_relaxed), std::mem
{
continue;
}
if (head != 0)
{
// Increment version
head = (head & ~VERSION_MASK) | ((head + VERSION_INCR) & VERSION_MASK);
}
return head;
}
inline Node *get_node_at(idx_t idx)
{
auto uberBlock = uberBlockContainer.subblocks[(idx & UBER_BLOCK_IDX_MASK) >> UBER_BLOCK_IDX_SHIFT].load(std::memory_order_relax
auto ultraBlock = uberBlock->subblocks[(idx & UBER_BLOCK_MASK) >> UBER_BLOCK_SHIFT].load(std::memory_order_relaxed);
auto superBlock = ultraBlock->subblocks[(idx & ULTRA_BLOCK_MASK) >> ULTRA_BLOCK_SHIFT].load(std::memory_order_relaxed);
auto block = superBlock->subblocks[(idx & SUPER_BLOCK_MASK) >> SUPER_BLOCK_SHIFT].load(std::memory_order_relaxed);
return reinterpret_cast<Node *>(block->node_pos(idx));
}
template <typename U>
inline idx_t allocate_node_for(U &&item)
{
auto idx = try_get_node_from_free_list();
if (idx != 0)
{
auto node = get_node_at(idx);
node->next.store(0, std::memory_order_relaxed);
new (node->item()) T(std::forward<U>(item));
return idx;
}
new (new_node_address(idx)) Node(std::forward<U>(item));
return idx;
}
inline idx_t allocate_blank_node()
{
idx_t idx;
new (new_node_address(idx)) Node();
return idx;
}
inline char *new_node_address(idx_t &idx)
{
idx = nextNodeIdx.fetch_add(static_cast<idx_t>(1) << BLOCK_SHIFT, std::memory_order_relaxed);
std::size_t uberBlockContainerIdx = (idx & UBER_BLOCK_IDX_MASK) >> UBER_BLOCK_IDX_SHIFT;
std::size_t uberBlockIdx = (idx & UBER_BLOCK_MASK) >> UBER_BLOCK_SHIFT;
std::size_t ultraBlockIdx = (idx & ULTRA_BLOCK_MASK) >> ULTRA_BLOCK_SHIFT;
std::size_t superBlockIdx = (idx & SUPER_BLOCK_MASK) >> SUPER_BLOCK_SHIFT;
auto uberBlock = lookup_subblock<UberBlockContainer, UberBlock>(&uberBlockContainer, uberBlockContainerIdx);
auto ultraBlock = lookup_subblock<UberBlock, UltraBlock>(uberBlock, uberBlockIdx);
auto superBlock = lookup_subblock<UltraBlock, SuperBlock>(ultraBlock, ultraBlockIdx);
auto block = lookup_subblock<SuperBlock, Block>(superBlock, superBlockIdx);
return block->node_pos(idx);
}
template <typename TBlock, typename TSubBlock>
inline TSubBlock *lookup_subblock(TBlock *block, std::size_t idx)
{
auto ptr = block->subblocks[idx].load(std::memory_order_acquire);
if (ptr == nullptr)
{
auto newBlock = new TSubBlock();
if (!block->subblocks[idx].compare_exchange_strong(ptr, newBlock, std::memory_order_release, std::memory_order_acquire))
{
delete newBlock;
}
else
{
ptr = newBlock;
}
}
return ptr;
}
private:
std::atomic<idx_t> nextNodeIdx;
std::atomic<idx_t> head; // 头
std::atomic<idx_t> tail; // 尾部
std::atomic<idx_t> freeListHead;
UberBlockContainer uberBlockContainer;
};
#endif