Implement most-significant-digit-first string sort

namespace pratique
{
    namespace details
    {
        const size_t CUTOFF = 15;

        int compare(const char * s1, const char * s2, size_t d)
        {
            int ret = 0;
            s1 += d;
            s2 += d;
            while (*s1 != 0 && *s2 != 0) {
                if (*s1 > *s2) {
                    ret = 1;
                    break;
                }
                else if (*s1 < *s2) {
                    ret = -1;
                    break;
                }
                ++s1;
                ++s2;
            }
            if (ret == 0) {
                if (*s1 != 0) {
                    ret = 1;
                }
                else if (*s2 != 0) {
                    ret = -1;
                }
            }
            return ret;
        }

        void insertion_sort(std::vector::iterator begin, std::vector::iterator end, size_t n, size_t d)
        {
            std::vector::iterator last = begin + n - 1, i, j;
            for (i = begin; i < last; ++i) {
                j = i + 1;
                auto v = *j;
                while (j > begin && compare(*(j - 1), v, d) > 0) {
                    auto k = j - 1;
                    *j = *k;
                    j = k;
                }
                *j = v;
            }
        }

        void msd_sort(std::vector::iterator begin, std::vector::iterator end, size_t d, std::vector & buffer)
        {
            size_t n = end - begin;
            if (n > CUTOFF) {
                int ref[256] = { 0 };
                {
                    for (auto i = begin; i < end; ++i) {
                        int c = static_cast((*i)[d]);
                        ++ref[c];
                    }
                    int sum = ref[0];
                    for (size_t i = 1, e = _countof(ref); i < e; ++i) {
                        sum += ref[i];
                        if (ref[i] != 0) {
                            ref[i] = sum;
                        }
                    }
                }
                for (auto i = begin + n; i > begin;) {
                    --i;
                    int c = static_cast((*i)[d]);
                    int j = --ref[c];
                    buffer[j] = *i;
                }
                std::copy(buffer.begin(), buffer.begin() + n, begin);
                {
                    int i = ref[0], j;
                    for (int k = 1, ke = _countof(ref); k < ke; ++k) {
                        j = ref[k];
                        if (i < j) {
                            const char * v = *(begin + i);
                            if (v[d] != '\0') {
                                size_t diff = static_cast(j - i);
                                if (diff > CUTOFF) {
                                    msd_sort( begin + i, begin + j, d + 1, buffer );
                                }
                                else if (diff > 1) {
                                    insertion_sort( begin + i, begin + j, diff, d + 1 );
                                }
                            }
                            i = j;
                        }
                    }

                    j = n;
                    const char * v = *( begin + i );
                    if ( v[ d ] != '\0' ) {
                        size_t diff = static_cast( j - i );
                        if ( diff > CUTOFF ) {
                            msd_sort( begin + i, begin + j, d + 1, buffer );
                        } else if ( diff > 1 ) {
                            insertion_sort( begin + i, begin + j, diff, d + 1 );
                        }
                    }
                }
            }
            else {
                insertion_sort( begin, end, n, d );
            }
        }
    }

    std::vector msd_sort(std::vector const & ss)
    {
        size_t n = ss.size();

        std::vector buffer(n, NULL), ret(n, NULL);
        for (size_t i = 0; i < n; ++i) {
            ret[i] = ss[i].c_str();
        }

        if (n > 1) {
            details::msd_sort(ret.begin(), ret.end(), 0, buffer);
        }
        return std::move(ret);
    }
}


The testing code is:

    std::vector standard_sort(std::vector const & ss)
    {
        size_t n = ss.size();

        std::vector ret(n, NULL);
        for (size_t i = 0; i < n; ++i) {
            ret[i] = ss[i].c_str();
        }

        if (n > 1) {
            std::sort(ret.begin(), ret.end(), [](const char * s1, const char * s2) { return strcmp(s1, s2) < 0; });
        }
        return std::move(ret);
    }

    bool equals(std::vector const & vs1, std::vector const & vs2)
    {
        bool eq = vs1.size() == vs2.size();
        if (eq) {
            for (size_t i = 0, e = vs1.size(); i < e; ++i) {
                if (strcmp(vs1[i], vs2[i]) != 0) {
                    printf( "The %dth elements are different: %s %s\n", i + 1, vs1[ i ], vs2[ i ] );
                    eq = false;
                    break;
                }
            }
        }
        return eq;
    }


void msd_sort_test()
{
    printf("\nMSD sort test cases are being executed......\n");
    {
        std::vector input;
        for (size_t i = 0; i < 127; ++i) {
            input.emplace_back( std::string( "" ) );
        }
        for ( size_t i = 0; i < 127; ++i ) {
            input.emplace_back( std::string( "A" ) );
        }
        msd_sort( input );
    }

    {
        std::vector ss = { "she", "by", "the", "sea", "shells", "are", "seashells", "surely", "she", "sea", "short", "sells", "about", "method", "divide", "way", "key", "after", "few", "basic", "idea", "static", "final", "she", "by", "the", "sea", "shells", "are", "seashells", "surely", "she", "sea", "short", "sells", "about", "method", "divide", "way", "key", "see", "shy", "swing", "short", "shore", "shop", "sheep"};
        auto r1 = msd_sort(ss);
        auto r2 = standard_sort(ss);
        verify(equals(r1, r2), stderr, "The implementation of msd_sort is wrong\n");
    }

    for ( size_t i = 0, e = number_of_primes; i < e; ++i ) {
        size_t string_length_limit = 16;
        std::vector buffer(string_length_limit + 1, '\0');
        size_t items = global_primes[i] / string_length_limit;
        std::vector input;
        input.reserve(items);
        for (size_t j = 0; j < items; ++j) {
            size_t ke = pratique::random(static_cast(0), string_length_limit);
            for (size_t k = 0; k < ke; ++k) {
                buffer[k] = pratique::random('A', 'z');
            }
            buffer[ke] = '\0';
            input.emplace_back(std::string(&buffer[0]));
        }

        auto r1 = report_performance("MSD sort", msd_sort, input);
        auto r2 = report_performance("standard sort", standard_sort, input);

        verify(equals(r1, r2), stderr, "The implementation of msd_sort is wrong\n");
    }
}


你可能感兴趣的:(Implement most-significant-digit-first string sort)