本次来聊一聊isa-l,即ceph的纠删码插件之一。当然这个值曾经默认是jerasure。
关于纠删码的原理其实就像是线性代数中解线性方程组。这部分就不赘述。本次重点关注isa-l中的两个函数。
本次主要关心的问题:
在已经知道旧的校验块后和旧的数据块和修改后的数据块后怎么快速得到新的校验块。
首先isa-l库是可以独立单独运行的,首先从
https://github.com/ceph/isa-l下载对应的代码。
后面需要使用到
isa-l/erasure_code/erasure_code_update_test.c
首先进行安装
make
./autogen.sh
./configure
make
运行make check,后续修改测试代码后也需要make check进行编译
PASS: erasure_code/gf_vect_mul_test
PASS: erasure_code/erasure_code_test
PASS: erasure_code/gf_inverse_test
PASS: erasure_code/erasure_code_update_test
PASS: raid/xor_gen_test
PASS: raid/pq_gen_test
PASS: raid/xor_check_test
PASS: raid/pq_check_test
PASS: crc/crc16_t10dif_test
PASS: crc/crc32_ieee_test
PASS: crc/crc32_iscsi_test
PASS: crc/crc64_funcs_test
PASS: igzip/igzip_rand_test
============================================================================
Testsuite summary for libisal 2.18.0
============================================================================
# TOTAL: 13
# PASS: 13
# SKIP: 0
# XFAIL: 0
# FAIL: 0
# XPASS: 0
# ERROR: 0
============================================================================
在make check后可以到对应的目录下直接运行对应的可执行文件
ysydeMacBook-Pro:erasure_code ysy$ ./erasure_code_update_test
test ec_encode_data_update: 127x8192
..........................................................................
..........................................................................
..........................................................................
..........................................................................
..........................................................................
..........................................................................
..........................................................................
..........................................................................
..........................................................................
..........................................................................
..........................................................................
..........................................................................
..........................................................................
..........................................................................
.............done EC tests: Pass
2、优化思路
在isa-l中提供了两个计算校验块的方法,分别是直接使用ec_encode_data得到校验块和分别对对应的数据块调用ec_encode_data_update
void ec_encode_data(int len, int k, int rows,
unsigned char *gftbls,unsigned char **data,
unsigned char **coding);
void ec_encode_data_update(int len, int k, int rows, int vec_i,
unsigned char *g_tbls,unsigned char *data,
unsigned char **coding);
上述的两个函数分别调用了对应的base函数
void ec_encode_data(int len, int srcs, int dests, unsigned char *v,
unsigned char **src, unsigned char **dest)
{
ec_encode_data_base(len, srcs, dests, v, src, dest);
}
void ec_encode_data_update(int len, int k, int rows, int vec_i,
unsigned char *v,unsigned char *data,
unsigned char **dest)
{
ec_encode_data_update_base(len, k, rows, vec_i, v, data, dest);
}
在base中能找到一些思路
void ec_encode_data_base(int len, int srcs, int dests,
unsigned char *v,unsigned char **src,
unsigned char **dest)
{
int i, j, l;
unsigned char s;
for (l = 0; l < dests; l++) {
for (i = 0; i < len; i++) {
s = 0;
for (j = 0; j < srcs; j++)
s ^= gf_mul(src[j][i], v[j * 32 + l * srcs * 32 + 1]);
dest[l][i] = s;
}
}
}
void ec_encode_data_update_base(int len, int k, int rows,
int vec_i, unsigned char *v,
unsigned char *data,
unsigned char **dest)
{
int i, l;
unsigned char s;
for (l = 0; l < rows; l++) {
for (i = 0; i < len; i++) {
s = dest[l][i];
s ^= gf_mul(data[i], v[vec_i * 32 + l * k * 32 + 1]);
dest[l][i] = s;
}
}
}
在base函数中看到在其中使用了^(异或运算)
而异或运算有以下性质
(1)P ⊕ P = 0 任何数跟自身异或都等于0
(2)P ⊕ 0 = P 任何数和0异或都等于本身
(3)(A ⊕ B) ⊕ C = A ⊕(B ⊕ C)
(4)A ⊕ B = B ⊕ A
满足结合交换律
于是得到结论:
可以对旧数据调用一次ec_encode_data_update,
再对新数据使用一次ec_encode_data_update可得到新的校验数据
使用isa-l/erasure_code/erasure_code_update_test.c验证这个结论
验证的过程如下:
(1)调用ec_encode_data得到旧的校验值,
(2)用旧校验调用一次ec_encode_data_update,修改旧的数据值,对新数据调用一次ec_encode_data_update
(3)对修改后的所有数据重新进行一次ec_encode_data
(4)对比两次得到的校验块看结果是否相同
测试通过
test ec_encode_data_update: 127x8192
update_buffs0: 8 7e b5 a7 4b 2a 75 ba f1 58 d0 5e 1d 1f f9 45 bf ba a3 6d 4c 86 22 e1 59
buffs0: 8 7e b5 a7 4b 2a 75 ba f1 58 d0 5e 1d 1f f9 45 bf ba a3 6d 4c 86 22 e1 59
update_buffs1: 3d 62 17 fe 7d a0 53 9b 59 16 f2 42 60 1e 3d 91 c1 71 ad df ed 26 d2 2d 4e
buffs1: 3d 62 17 fe 7d a0 53 9b 59 16 f2 42 60 1e 3d 91 c1 71 ad df ed 26 d2 2d 4e
done EC tests: Pass
附录验证代码采用4+2纠删码验证
/**********************************************************************
Copyright(c) 2011-2015 Intel Corporation All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name of Intel Corporation nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
#include
#include
#include // for memset, memcmp
#include "erasure_code.h"
#include "types.h"
#ifndef ALIGN_SIZE
# define ALIGN_SIZE 16
#endif
//By default, test multibinary version
#ifndef FUNCTION_UNDER_TEST
# define FUNCTION_UNDER_TEST ec_encode_data_update
# define REF_FUNCTION ec_encode_data
#endif
#define TEST_LEN 8192
#define TEST_SIZE (TEST_LEN/2)
#ifndef TEST_SOURCES
# define TEST_SOURCES 127
#endif
#ifndef RANDOMS
# define RANDOMS 200
#endif
#define MMAX TEST_SOURCES
#define KMAX TEST_SOURCES
#ifdef EC_ALIGNED_ADDR
// Define power of 2 range to check ptr, len alignment
# define PTR_ALIGN_CHK_B 0
# define LEN_ALIGN_CHK_B 0 // 0 for aligned only
#else
// Define power of 2 range to check ptr, len alignment
# define PTR_ALIGN_CHK_B ALIGN_SIZE
# define LEN_ALIGN_CHK_B ALIGN_SIZE // 0 for aligned only
#endif
#ifndef TEST_SEED
#define TEST_SEED 11
#endif
#define str(s) #s
#define xstr(s) str(s)
typedef unsigned char u8;
void dump(unsigned char *buf, int len)
{
int i;
for (i = 0; i < len;) {
printf(" %2x", 0xff & buf[i++]);
if (i % 32 == 0)
printf("\n");
}
printf("\n");
}
void dump_matrix(unsigned char **s, int k, int m)
{
int i, j;
for (i = 0; i < k; i++) {
for (j = 0; j < m; j++) {
printf(" %2x", s[i][j]);
}
printf("\n");
}
printf("\n");
}
void dump_u8xu8(unsigned char *s, int k, int m)
{
int i, j;
for (i = 0; i < k; i++) {
for (j = 0; j < m; j++) {
printf(" %2x", 0xff & s[j + (i * m)]);
}
printf("\n");
}
printf("\n");
}
// Generate Random errors
static void gen_err_list(unsigned char *src_err_list,
unsigned char *src_in_err, int *pnerrs, int *pnsrcerrs, int k, int m)
{
int i, err;
int nerrs = 0, nsrcerrs = 0;
for (i = 0, nerrs = 0, nsrcerrs = 0; i < m && nerrs < m - k; i++) {
err = 1 & rand();
src_in_err[i] = err;
if (err) {
src_err_list[nerrs++] = i;
if (i < k) {
nsrcerrs++;
}
}
}
if (nerrs == 0) { // should have at least one error
while ((err = (rand() % KMAX)) >= m) ;
src_err_list[nerrs++] = err;
src_in_err[err] = 1;
if (err < k)
nsrcerrs = 1;
}
*pnerrs = nerrs;
*pnsrcerrs = nsrcerrs;
return;
}
#define NO_INVERT_MATRIX -2
// Generate decode matrix from encode matrix
static int gf_gen_decode_matrix(unsigned char *encode_matrix,
unsigned char *decode_matrix,
unsigned char *invert_matrix,
unsigned int *decode_index,
unsigned char *src_err_list,
unsigned char *src_in_err,
int nerrs, int nsrcerrs, int k, int m)
{
int i, j, p;
int r;
unsigned char *backup, *b, s;
int incr = 0;
b = malloc(MMAX * KMAX);
backup = malloc(MMAX * KMAX);
if (b == NULL || backup == NULL) {
printf("Test failure! Error with malloc\n");
free(b);
free(backup);
return -1;
}
// Construct matrix b by removing error rows
for (i = 0, r = 0; i < k; i++, r++) {
while (src_in_err[r])
r++;
for (j = 0; j < k; j++) {
b[k * i + j] = encode_matrix[k * r + j];
backup[k * i + j] = encode_matrix[k * r + j];
}
decode_index[i] = r;
}
incr = 0;
while (gf_invert_matrix(b, invert_matrix, k) < 0) {
if (nerrs == (m - k)) {
free(b);
free(backup);
printf("BAD MATRIX\n");
return NO_INVERT_MATRIX;
}
incr++;
memcpy(b, backup, MMAX * KMAX);
for (i = nsrcerrs; i < nerrs - nsrcerrs; i++) {
if (src_err_list[i] == (decode_index[k - 1] + incr)) {
// skip the erased parity line
incr++;
continue;
}
}
if (decode_index[k - 1] + incr >= m) {
free(b);
free(backup);
printf("BAD MATRIX\n");
return NO_INVERT_MATRIX;
}
decode_index[k - 1] += incr;
for (j = 0; j < k; j++)
b[k * (k - 1) + j] = encode_matrix[k * decode_index[k - 1] + j];
};
for (i = 0; i < nsrcerrs; i++) {
for (j = 0; j < k; j++) {
decode_matrix[k * i + j] = invert_matrix[k * src_err_list[i] + j];
}
}
/* src_err_list from encode_matrix * invert of b for parity decoding */
for (p = nsrcerrs; p < nerrs; p++) {
for (i = 0; i < k; i++) {
s = 0;
for (j = 0; j < k; j++)
s ^= gf_mul(invert_matrix[j * k + i],
encode_matrix[k * src_err_list[p] + j]);
decode_matrix[k * p + i] = s;
}
}
free(b);
free(backup);
return 0;
}
int main(int argc, char *argv[])
{
int re = 0;
int i, j, p, rtest, m, k;
int nerrs, nsrcerrs;
void *buf;
unsigned int decode_index[MMAX];
unsigned char *temp_buffs[TEST_SOURCES], *buffs[TEST_SOURCES];
unsigned char *update_buffs[TEST_SOURCES];
unsigned char *change_buffs[TEST_SOURCES];
unsigned char *encode_matrix, *decode_matrix, *invert_matrix, *g_tbls;
unsigned char src_in_err[TEST_SOURCES], src_err_list[TEST_SOURCES];
unsigned char *recov[TEST_SOURCES];
int rows, align, size;
unsigned char *efence_buffs[TEST_SOURCES];
unsigned char *efence_update_buffs[TEST_SOURCES];
unsigned int offset;
u8 *ubuffs[TEST_SOURCES];
u8 *update_ubuffs[TEST_SOURCES];
u8 *temp_ubuffs[TEST_SOURCES];
printf("test " xstr(FUNCTION_UNDER_TEST) ": %dx%d \n", TEST_SOURCES, TEST_LEN);
srand(TEST_SEED);
// Allocate the arrays
for (i = 0; i < TEST_SOURCES; i++) {
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
buffs[i] = buf;
}
for (i = 0; i < TEST_SOURCES; i++) {
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
temp_buffs[i] = buf;
memset(temp_buffs[i], 0, TEST_LEN); // initialize the destination buffer to be zero for update function
}
for (i = 0; i < TEST_SOURCES; i++) {
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
update_buffs[i] = buf;
memset(update_buffs[i], 0, TEST_LEN); // initialize the destination buffer to be zero for update function
}
for (i = 0; i < TEST_SOURCES; i++) {
if (posix_memalign(&buf, 64, TEST_LEN)) {
printf("alloc error: Fail");
return -1;
}
change_buffs[i] = buf;
memset(change_buffs[i], 0, TEST_LEN); // initialize the destination buffer to be zero for update function
}
// Test erasure code by encode and recovery
encode_matrix = malloc(MMAX * KMAX);
decode_matrix = malloc(MMAX * KMAX);
invert_matrix = malloc(MMAX * KMAX);
g_tbls = malloc(KMAX * TEST_SOURCES * 32);
if (encode_matrix == NULL || decode_matrix == NULL
|| invert_matrix == NULL || g_tbls == NULL) {
printf("Test failure! Error with malloc\n");
return -1;
}
// Pick a first test
m = 6;
k = 4;
if (m > MMAX || k > KMAX)
return -1;
// Make random data
for (i = 0; i < k; i++) {
for (j = 0; j < TEST_LEN; j++) {
buffs[i][j] = rand();
update_buffs[i][j] = buffs[i][j];
change_buffs[i][j] = buffs[i][j];
}
}
change_buffs[0][0] = rand();
// Generate encode matrix encode_matrix
// The matrix generated by gf_gen_rs_matrix
// is not always invertable.
gf_gen_rs_matrix(encode_matrix, m, k);
// Generate g_tbls from encode matrix encode_matrix
ec_init_tables(k, m - k, &encode_matrix[k * k], g_tbls);
// Perform matrix dot_prod for EC encoding
// using g_tbls from encode matrix encode_matrix
REF_FUNCTION(TEST_LEN, k, m - k, g_tbls, change_buffs, &change_buffs[k]);
REF_FUNCTION(TEST_LEN, k, m - k, g_tbls, buffs, &buffs[k]);
FUNCTION_UNDER_TEST(TEST_LEN, k, m - k, 0, g_tbls, buffs[0],
&buffs[k]);
buffs[0][0]=change_buffs[0][0];
for (i = 0; i < k; i++) {
FUNCTION_UNDER_TEST(TEST_LEN, k, m - k, i, g_tbls, update_buffs[i],
&update_buffs[k]);
}
FUNCTION_UNDER_TEST(TEST_LEN, k, m - k, 0, g_tbls, buffs[0],
&buffs[k]);
for (i = 0; i < m - k; i++) {
if (0 != memcmp(change_buffs[k + i], buffs[k + i], TEST_LEN)) {
printf("update_buffs%d :", i);
dump(change_buffs[k + i], 25);
printf("buffs%d :", i);
dump(buffs[k + i], 25);
return -1;
}
printf("update_buffs%d:", i);
dump(change_buffs[k + i], 25);
printf("buffs%d:", i);
dump(buffs[k + i], 25);
printf("\n");
}
printf("done EC tests: Pass\n");
return 0;
}