
  摘要: 工作已经有一段时间了,总的感觉实际工作中所使用到的C++相关的内容对实际提升C++没有太大帮助。基本都是搭积木做需求,前一段时间主要以看书为主,但是总感觉缺点什么。因此打算阅读一遍folly库的实现来提升自己的C++水平。

1 FBString简介

  fbstring is a drop-in replacement for std::string. The main benefit of fbstring is significantly increased performance on virtually all important primitives. This is achieved by using a three-tiered storage strategy and by cooperating with the memory allocator. In particular, fbstring is designed to detect use of jemalloc and cooperate with it to achieve significant improvements in speed and memory usage.

  fbstring supports 32- and 64-bit and little- and big-endian architectures.



  • 三种存储策略;
  • std::string100%兼容。
  • COW 存储时对于引用计数线程安全。
  • 对 Jemalloc 友好。如果检测到使用jemalloc,那么将使用jemalloc的一些非标准扩展接口来提高性能。
  • find()使用简化版的Boyer-Moore algorithm。在查找成功的情况下,相对于string::find()有 30 倍的性能提升。在查找失败的情况下也有 1.5 倍的性能提升。
  • 可以与std::string互相转换。

2 FBString的实现

2.1 存储策略


  • SSO:小字符串直接使用栈内存(小于等于23个字符);
  • Eager Copy:中长度字符(大于23个字符,小于等于255个字符)总是使用堆内存并且总是拷贝,行为类似std::string
  • COW:长字符(大于255个字符)使用引用计数和COW计数避免不必要的拷贝操作;
struct MediumLarge {
  Char* data_;
  size_t size_;
  size_t capacity_;

  size_t capacity() const {
    return kIsLittleEndian ? capacity_ & capacityExtractMask : capacity_ >> 2;

  void setCapacity(size_t cap, Category cat) {
    capacity_ = kIsLittleEndian
        ? cap | (static_cast<size_t>(cat) << kCategoryShift)
        : (cap << 2) | static_cast<size_t>(cat);

union {
    uint8_t bytes_[sizeof(MediumLarge)]; // For accessing the last byte.
    Char small_[sizeof(MediumLarge) / sizeof(Char)];
    MediumLarge ml_;


constexpr auto kIsLittleEndian = __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__;
constexpr static size_t lastChar = sizeof(MediumLarge) - 1;
constexpr static size_t maxSmallSize = lastChar / sizeof(Char);
constexpr static uint8_t categoryExtractMask = kIsLittleEndian ? 0xC0 : 0x3;
size_t smallSize() const {
    assert(category() == Category::isSmall);
    constexpr auto shift = kIsLittleEndian ? 0 : 2;
    auto smallShifted = static_cast<size_t>(small_[maxSmallSize]) >> shift;
    assert(static_cast<size_t>(maxSmallSize) >= smallShifted);
    return static_cast<size_t>(maxSmallSize) - smallShifted;

Category category() const {
    // works for both big-endian and little-endian
    return static_cast<Category>(bytes_[lastChar] & categoryExtractMask);


typedef uint8_t category_type;
enum class Category : category_type {
    isSmall = 0,
    isMedium = kIsLittleEndian ? 0x80 : 0x2,
    isLarge = kIsLittleEndian ? 0x40 : 0x1,

Eager CopyCOW

2.2 构造和拷贝


  fbstring_core( const Char* const data, const size_t size, bool disableSSO = FBSTRING_DISABLE_SSO) { 
    if (!disableSSO && size <= maxSmallSize) {
      initSmall(data, size);
    } else if (size <= maxMediumSize) {
      initMedium(data, size);
    } else {
      initLarge(data, size);
    assert(this->size() == size);
    assert(size == 0 || memcmp(this->data(), data, size * sizeof(Char)) == 0);


Eager Copy
  Eager Copy创建是直接分配一块内存将原字符串内存拷贝到申请的内存上。

template <class Char>
FOLLY_NOINLINE void fbstring_core<Char>::copyMedium(const fbstring_core& rhs) {
  // Medium strings are copied eagerly. Don't forget to allocate
  // one extra Char for the null terminator.
  auto const allocSize = goodMallocSize((1 + rhs.ml_.size_) * sizeof(Char));
  ml_.data_ = static_cast<Char*>(checkedMalloc(allocSize));
  // Also copies terminator.
      rhs.ml_.data_, rhs.ml_.data_ + rhs.ml_.size_ + 1, ml_.data_);
  ml_.size_ = rhs.ml_.size_;
  ml_.setCapacity(allocSize / sizeof(Char) - 1, Category::isMedium);
  assert(category() == Category::isMedium);


template <class Char>
FOLLY_NOINLINE void fbstring_core<Char>::initLarge(
    const Char* const data, const size_t size) {
  // Large strings are allocated differently
  size_t effectiveCapacity = size;
  auto const newRC = RefCounted::create(data, &effectiveCapacity);
  ml_.data_ = newRC->data_;
  ml_.size_ = size;
  ml_.setCapacity(effectiveCapacity, Category::isLarge);
  ml_.data_[size] = '\0';


  struct RefCounted {
    std::atomic<size_t> refCount_;
    Char data_[1];
    static RefCounted* create(size_t* size) {
        size_t capacityBytes;
        if (!folly::checked_add(&capacityBytes, *size, size_t(1))) {
        if (!folly::checked_muladd( &capacityBytes, capacityBytes, sizeof(Char), getDataOffset())) {
        const size_t allocSize = goodMallocSize(capacityBytes);
        auto result = static_cast<RefCounted*>(checkedMalloc(allocSize));
        result->, std::memory_order_release);
        *size = (allocSize - getDataOffset()) / sizeof(Char) - 1;
        return result;


template <class Char>
FOLLY_NOINLINE void fbstring_core<Char>::copyLarge(const fbstring_core& rhs) {
  // Large strings are just refcounted
  ml_ = rhs.ml_;
  assert(category() == Category::isLarge && size() == rhs.size());


template <class Char>
inline Char* fbstring_core<Char>::mutableDataLarge() {
  assert(category() == Category::isLarge);
  if (RefCounted::refs(ml_.data_) > 1) { // Ensure unique.
  return ml_.data_;

template <class Char>
FOLLY_NOINLINE void fbstring_core<Char>::unshare(size_t minCapacity) {
  assert(category() == Category::isLarge);
  size_t effectiveCapacity = std::max(minCapacity, ml_.capacity());
  auto const newRC = RefCounted::create(&effectiveCapacity);
  // If this fails, someone placed the wrong capacity in an
  // fbstring.
  assert(effectiveCapacity >= ml_.capacity());
  // Also copies terminator.
  fbstring_detail::podCopy(ml_.data_, ml_.data_ + ml_.size_ + 1, newRC->data_);
  ml_.data_ = newRC->data_;
  ml_.setCapacity(effectiveCapacity, Category::isLarge);
  // size_ remains unchanged.

3. 一些优化的细节

3.1 快速拷贝


// Small strings are bitblitted
template <class Char>
inline void fbstring_core<Char>::initSmall(
    const Char* const data, const size_t size) {
// If data is aligned, use fast word-wise copying. Otherwise,
// use conservative memcpy.
// The word-wise path reads bytes which are outside the range of
// the string, and makes ASan unhappy, so we disable it when
// compiling with ASan.
  if ((reinterpret_cast<size_t>(data) & (sizeof(size_t) - 1)) == 0) {
    const size_t byteSize = size * sizeof(Char);
    constexpr size_t wordWidth = sizeof(size_t);
    switch ((byteSize + wordWidth - 1) / wordWidth) { // Number of words.
      case 3:
        ml_.capacity_ = reinterpret_cast<const size_t*>(data)[2];
      case 2:
        ml_.size_ = reinterpret_cast<const size_t*>(data)[1];
      case 1:
        ml_.data_ = *reinterpret_cast<Char**>(const_cast<Char*>(data));
      case 0:
  } else
    if (size != 0) {
      fbstring_detail::podCopy(data, data + size, small_);

3.2 循环展开


template <class Pod, class T>
inline void podFill(Pod* b, Pod* e, T c) {
  assert(b && e && b <= e);
  constexpr auto kUseMemset = sizeof(T) == 1;
  if /* constexpr */ (kUseMemset) {
    memset(b, c, size_t(e - b));
  } else {
    auto const ee = b + ((e - b) & ~7u);
    for (; b != ee; b += 8) {
      b[0] = c;
      b[1] = c;
      b[2] = c;
      b[3] = c;
      b[4] = c;
      b[5] = c;
      b[6] = c;
      b[7] = c;
    // Leftovers
    for (; b != e; ++b) {
      *b = c;

3.3 __builtin_expect


#define FOLLY_BUILTIN_EXPECT(exp, c) __builtin_expect(static_cast<bool>(exp), c)
static RefCounted* create(const Char* data, size_t* size) {
    const size_t effectiveSize = *size;
    auto result = create(size);
    if (FOLLY_LIKELY(effectiveSize > 0)) {
    fbstring_detail::podCopy(data, data + effectiveSize, result->data_);
    return result;

3.3 memory_order

  std::atomic refCount_进行原子操作的 c++ memory model :

  • store,设置引用数为 1 : std::memory_order_release
  • load,获取当前共享字符串的引用数: std::memory_order_acquire
  • add/sub。增加/减少一个引用 : std::memory_order_acq_rel

3.4 malloc or realloc


    void* p,
    const size_t currentSize,
    const size_t currentCapacity,
    const size_t newCapacity) {
  assert(currentSize <= currentCapacity && currentCapacity < newCapacity);

  auto const slack = currentCapacity - currentSize;
  if (slack * 2 > currentSize) {
    // Too much slack, malloc-copy-free cycle:
    auto const result = checkedMalloc(newCapacity);
    std::memcpy(result, p, currentSize);
    return result;
  // If there's not too much slack, we realloc in hope of coalescing
  return checkedRealloc(p, newCapacity);

