一、Trash translator for GlusterFS
在这里主要介绍一下trash translator的功能,Trash translator为从glusterfs卷里面删除的文件提供一个临时存放的地方,就是相当于为删除的文件提供一个回收站,可以帮助用户获取和恢复临时被删除的数据。每个块都会保留一个隐藏的目录.trash,它将会被用于存放被从各个块删除的文件。这个translator以后还会增强功能来支持被删除文件的恢复。
回收站的目录名应该是可配置的。trash translator也会被用于内部操作比如自卷的自修复以及再平衡。trash translator是设计来拦截unlink(文件删除),truncate(文件压缩),ftruncate(文件夹压缩),rmdir(目录删除)等操作,将目标文件在trash目录中做一份拷贝,然后在原文件上执行相应的操作。
在删除的操作过程中,trash translator将会拦截到unlink调用,然后检查是否和排除模式相匹配(eliminate pattern),如果要操作的文件和排除模式匹配,那么这个translator则继续向下执行unlink调用,直接删除文件,而不会把它保存到.trash目录下面。
然而,如果要操作的文件和排除模式不匹配,文件执行stat操作,成功之后,trash translator将会在.trash目录下面建立相同的路径作为文件的路径。
当trash translator拦截到一个truncate/ftruncate调用,一个新的文件将会在trash中建立,原来文件的内容也通过使用readv和writev函数调用拷贝到新的文件中。一旦所有内容被拷贝晚,trash translator将会继续在原文件上执行truncate调用。
/* * Function:generate_allow_ip_table * Descriptions:generate allow ip table so that the client can do anything not merely read only * Parameter:xlator, trash_private_t, dict_t * Return:int32_t * */ int32_t generate_allow_ip_table (xlator_t *this, trash_private_t *priv, dict_t *options) { int ret = 0; dict_t *ip_table = NULL; dict_t *server_option = NULL; char *brick_name = NULL; char *addr_cpy = NULL; char *addr_str = NULL; char *searchstr = NULL; char *trashdir_read_only = NULL; char *tmp = NULL; data_t *allow_addr = NULL; xlator_list_t *parents = NULL; char *allow_name = NULL; char *password = NULL; parents = this->parents; /*get the root parents xlator:server*/ while (parents->xlator->parents) { parents = parents->xlator->parents; } if ( strcmp(parents->xlator->type,"protocol/server")!=0 ) return 0; /*get the server xlator info including options,allow name,password and so on*/ server_option = parents->xlator->options; brick_name = FIRST_CHILD(parents->xlator)->name; //brick_name = parents->xlator->children->xlator->name; ret = gf_asprintf(&searchstr,"auth.login.%s.allow",brick_name); if (-1 == ret){ gf_log ("features/trashdir-read-only",GF_LOG_WARNING, "asprintf failed while setting search string"); goto out; } allow_name = gf_strdup ( dict_get (server_option,searchstr)->data ); ret = gf_asprintf (&searchstr, "auth.login.%s.password",allow_name); if (-1 == ret){ gf_log ("feature/trashdir-read-only",GF_LOG_WARNING, "asprintf failed while setting search string"); goto out; } password = gf_strdup (dict_get(server_option,searchstr)->data); priv->allow_name = allow_name; priv->password = password; if (this->private) priv->ip_table = ( (trash_private_t *)(this->private) )->ip_table; this->private = priv; /*get the white list from the options*/ if (options){ ret = dict_get_str (options,"trashdir-read-only",&trashdir_read_only); if(!strcmp(trashdir_read_only,"on")){ allow_addr = dict_get(options,"trashdir-white-list"); } else return ret; } else { ret = dict_get_str (this->options,"trashdir-read-only",&trashdir_read_only); if(!strcmp(trashdir_read_only,"on")){ allow_addr = dict_get (this->options,"trashdir-white-list"); } else return ret; } /*extract ips from the white list in the form of ip1,ip2,ip3... using strtok_r*/ if(allow_addr!=NULL){ addr_cpy = gf_strdup(allow_addr->data); if( NULL == addr_cpy ) goto out; if( strcmp(addr_cpy,"*")==0 ){ if(ip_table) { dict_destroy(ip_table); ip_table = NULL; } goto replace; } addr_str = strtok_r (addr_cpy,",",&tmp); ip_table = dict_new(); while(addr_str){ gf_log (this->name, GF_LOG_DEBUG,"allow = \"%s\"",addr_str); ret = dict_set_int32 (ip_table, addr_str, 1); if (ret != 0){ dict_destroy(ip_table); goto out; } addr_str = strtok_r(NULL,",",&tmp); } } replace: if(priv->ip_table){ dict_destroy(priv->ip_table); } priv->ip_table = ip_table; this->private = priv; out: GF_FREE(addr_cpy); return ret; }这个函数的调用位置有两个,一个是在translator初始化的函数init,以及options重置的函数reconfigure:
int32_t init (xlator *this) { ... GF_OPTION_INIT ("trashdir-read-only", priv->trashdir_read_only_enabled, bool, out); ret = generate_allow_ip_table(this,priv,NULL); this->private = (void *)priv; out: ... }
int reconfigure (xlator *this, dict_t *options) { .... GF_OPTION_RECONF ("trashdir-read-only",trashdir_read_only_enabled, options, bool, out); priv->trashdir_read_only_enabled = trashdir_read_only_enabled; ret = generate_allow_ip_table(this,priv,options); out: .... }
步骤一:首先通过遍历指针得到指向server translator的指针server_option,通过glusterfs的架构我们知道server translator是server端的第一层次,接受从client端发送过来的rpc请求,然后依次传递到posix层次。得到server xlator的指针后,获取得到块名称brick_name,允许客户端请求通过的名字allow_name,以及password等信息。
4.2 主要函数二:trash.c:is_client_read_only_on_trashdir( xlator *this, call_frame_t *frame )。
/* * Function:is_client_read_only_on_trashdir * Description:check whether the client has the permission to do other ops,except read * Parameters:xlator_t, call_frame_t * return:gf_boolean_t * */ gf_boolean_t is_client_read_only_on_trashdir (xlator_t *this, call_frame_t *frame) { char *ip = NULL; int val = 0; trash_private_t *priv = NULL; gf_boolean_t trashdir_read_only = _gf_true; char *name = NULL; char *password = NULL; /*crucial judgement which means option trashdir-read-only on or off*/ priv = this->private; trashdir_read_only = priv->trashdir_read_only_enabled; if ( !trashdir_read_only ){ return _gf_false; } if (frame == NULL){ return _gf_true; } if ( frame->root->client->volfile_id && strcmp(frame->root->client->volfile_id,"gluster/nfs")==0 ){ return _gf_true; } /*get white list from ip_table*/ if (priv->ip_table!=NULL){ ip = frame->root->client->identifier; dict_get_int32(priv->ip_table,ip,&val); if(val==1){ return _gf_false; } } if (trashdir_read_only){ name = frame->root->client->auth.username; password = frame->root->client->auth.passwd; if (!name||!password){ return _gf_true; } /*self access is allowed obviously*/ #if 1 if( (!strcmp(name,priv->allow_name)) && (!strcmp(password,priv->password)) ) trashdir_read_only = _gf_false; #endif } return trashdir_read_only; }这个函数判断一个客户端是否对trashdir是只读的(实际上是判断客户端对卷是否是只读的,对trashdir只读还要涉及到后面的路径问题),下面来分析一下这个函数的过程:
步骤一:首先通过私有变量priv = this->private; priv->trashdir_read_only_enabled来判断option:trashdir-read-only是开启还是关闭(on/off),this->private是当前translator定义的私有变量,在translator init以及reconfigure的过程中,相应的option的值会有相应的变化,那么就可以将这种变化写入到this->private(在这里是trash_private_t,通常是一个写在头文件中的结构体),那么通过this变量的传递来获取相应的option的值。
步骤二:如果trashdir_read_only变量是off,那么返回false,说明不是只读的;否则,获取ip table。
4.3 主要函数之三:is_trashdir_prefix_of_loc( call_frame_t *frame, xlator_t *this, loc_t *loc ).
/* * Function:is_trashdir_prefix_of_loc * Description:check whether the file operated by the client is in the trashcan directory * Parameters:call_frame_t ,xlator_t ,loc_t * Return:gf_boolean_t * */ gf_boolean_t is_trashdir_prefix_of_loc(call_frame_t *frame, xlator_t *this,loc_t *loc) { int str_len = 0; char *location_copy = NULL; char buffer[20] = {0}; if (frame == NULL){ gf_log (this->name, GF_LOG_ERROR, "The frame is NULL"); return _gf_true; } /* judge and compare the prefix of loc to "/.trashcan" */ str_len = strlen("/.trashcan"); if( strlen(loc->path) < str_len ){ return _gf_false; } else{ location_copy = gf_strdup( loc->path ); strncpy(buffer,location_copy,str_len); buffer[str_len] = '\0'; if( !strcmp(buffer,"/.trashcan") ){ return _gf_true; } else{ return _gf_false; } } GF_FREE(location_copy); }这个函数是判断被客户端操作的文件是不是在.trash目录里面。思想非常简单,就是将操作文件的路径loc的前缀和"/.trashcan"进行比较,如果匹配,那么说明在.trash目录里面,否则不在。当然,在此需要提醒的一个问题就是,如果获取所操作的文件的路径,有的fops里面有直接的参数是loc,但是有的只有文件描述符fd,因此问题就是如何通过fd得到loc,在这里提供一个函数将fd-->loc:
/* * Funcation:fd_to_loc * Descriptions:get loc from fd * Parameter:fd_t * Return:loc * */ loc_t fd_to_loc(fd_t *fd) { loc_t loc = {0,}; char *path = NULL; int ret = -1; loc.inode = inode_ref(fd->inode); uuid_copy(loc.gfid, fd->inode->gfid); ret = inode_path(fd->inode,NULL,&path);//get path from inode loc.path = path; return loc; <span style="font-family:Microsoft YaHei;font-size:14px;">}</span>观察一下源码里面的inode_ref,inode_path就可以得到相应的转化结果。
trash.h代码: (提取码:69d1)
trash.c代码: (提取码:0497)
5.1 测试方法
#!/bin/bash #Test case:This test checks the trashdir-read-only option . $(dirname $0)/../include.rc . $(dirname $0)/../volume.rc cleanup; ALLOWIP= file_exists () { vol=$1 shift for file in `ls $B0/${vol}1/$@ 2> /dev/null` ; do test -e ${file} && return 0 done for file in `ls $B0/${vol}2/$@ 2> /dev/null` ; do test -e ${file} && return 0 done return 1 } #[1-2] TEST glusterd TEST pidof glusterd #[3-4] TEST mkdir -p $B0/${V0}{0,1,2,3,4,5,6,7,8,9} TEST $CLI volume create $V0 disperse 10 redundancy 2 $H0:$B0/${V0}{0,1,2,3,4,5,6,7,8,9} #[5-7] EXPECT "$V0" volinfo_field $V0 'Volume Name' EXPECT 'Created' volinfo_field $V0 'Status' EXPECT '10' brick_count $V0 #[8-9] TEST $CLI volume start $V0 EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status' #Mount FUSE and create file/dir,create should succeed as the trashdir-read-only #is off by default #[10-16] TEST glusterfs -s $H0 --volfile-id $V0 $M0 TEST mkdir $M0/AA TEST touch $M0/AA/aa TEST dd if=/dev/zero of=$M0/bb bs=1024 count=1024 TEST file_exists $V0 AA/aa bb TEST rm -rf $M0/bb TEST ! file_exists $V0 bb # turn on trashdir-read-only option through volume set [17-18] TEST gluster volume set $V0 trash on TEST gluster volume set $V0 trashdir-read-only on # All write operations outside .trashcan directory should succeed now [19-24] TEST mkdir $M0/BB TEST touch $M0/BB/bb TEST dd if=/dev/zero of=$M0/cc bs=1024 count=1024 TEST file_exists $V0 BB/bb cc TEST rm -rf $M0/cc $M0/BB TEST ! file_exists $V0 cc BB/bb # All write operations inside .trashcan directory should fail now TEST ! -e mkdir $M0/.trashcan/DD TEST ! -e touch $M0/.trashcan/DD/dd TEST ! -e dd if=/dev/zero of=$M0/.trashcan/dd bs=1024 count=1024 TEST ! -e rm -rf $M0/.trashcan/cc #cc is rm at TEST XX TEST ! -e rm -rf $M0/.trashcan/BB/bb TEST ls TEST ! -e ll -a $M0/.trashcan # set trashdir-white-list through volume set option [21-23] TEST gluster volume set $V0 features.trashdir-white-list $ALLOWIP EXPECT "on" volinfo_field $V0 'features.trashdir-read-only' EXPECT "$ALLOWIP" volinfo_field $V0 'features.trashdir-white-list' # All write and remove operations should succeed now [24-26] TEST ls -a $M0/.trashcan TEST file_exists $V0 .trashcan/cc* .trashcan/BB/bb* TEST rm -rf $M0/.trashcan/cc* TEST ! file_exists $V0 .trashcan/cc* TEST file_exists $V0 .trashcan/BB/bb* ## Finish up TEST $CLI volume stop $V0 EXPECT 'Stopped' volinfo_field $V0 'Status' TEST $CLI volume delete $V0 TEST ! $CLI volume info $V0 cleanup; <<mark mark