struct GCHashEntry{
uint64_t cid;
Queue* chunk_queue;
};
void destructor(gpointer ptr){
struct GCHashEntry* eptr = (struct GCHashEntry*)ptr;
queue_free(eptr->chunk_queue, free);
free(ptr);
}
static void* gether_fingerprint_for_deletion(void *arg) {
struct chunk* c;
global_gc_HashTable = g_hash_table_new_full(g_int64_hash, g_int64_equal, NULL, destructor);
while(c = sync_queue_pop(delete_recipe_queue)){
if (CHECK_CHUNK(c, CHUNK_FILE_START)) {
free_chunk(c);
continue;
}
if (CHECK_CHUNK(c, CHUNK_FILE_END)) {
free_chunk(c);
endFlag = true;
break;
}
struct GCHashEntry* gcHashEntry = g_hash_table_lookup(global_gc_HashTable, &c->id);
if(gcHashEntry == NULL){
struct GCHashEntry* entry = (struct GCHashEntry*)malloc(sizeof(struct GCHashEntry));
entry->cid = c->id;
entry->chunk_queue = queue_new();
fingerprint* fp = (fingerprint*)malloc(sizeof(fingerprint));
memcpy(fp, &c->fp, sizeof(fingerprint));
queue_push(entry->chunk_queue, fp);
g_hash_table_insert(global_gc_HashTable, &entry->cid, entry);
}else{
fingerprint* fp = (fingerprint*)malloc(sizeof(fingerprint));
memcpy(fp, &c->fp, sizeof(fingerprint));
queue_push(gcHashEntry->chunk_queue, fp);
}
free_chunk(c);
}
return NULL;
}
struct metaEntry {
int32_t off;
int32_t len;
fingerprint fp;
};
void chunk_filter(void* item, void* user_data){
fingerprint * fp = (fingerprint*)item;
GHashTable * gHashTable = (GHashTable*)user_data;
if(g_hash_table_contains(gHashTable, item)){
g_hash_table_remove(gHashTable, item);
}
}
struct containerMeta {
containerid id;
int32_t data_size;
int32_t chunk_num;
/* Map fingerprints to chunk offsets. */
GHashTable *map;
};
struct container {
struct containerMeta meta;
unsigned char *data;
};
struct chunk* new_chunk(int32_t size) {
struct chunk* ck = (struct chunk*) malloc(sizeof(struct chunk));
ck->flag = CHUNK_UNIQUE;
ck->id = TEMPORARY_ID;
memset(&ck->fp, 0x0, sizeof(fingerprint));
ck->size = size;
if (size > 0)
ck->data = malloc(size);
else
ck->data = NULL;
return ck;
}
void chunk_migrate(gpointer key, gpointer value, gpointer user_data){
struct container* con = (struct container*)user_data;
struct metaEntry* metaEntry = (struct metaEntry*)value;
struct chunk* c = new_chunk(metaEntry->len);
memcpy(&c->fp, key, sizeof(fingerprint));
c->size = metaEntry->len;
memcpy(c->data, con->data + metaEntry->off, metaEntry->len);
migrate_counter++;
migrate_size += c->size;
sync_queue_push(migrate_data_queue, c);
}
函数 void read_container_filter(gpointer key, gpointer value, gpointer user_data)
这个函数的作用:把value指向的entry找到,然后找到entry所在的container,然后利用函数container_meta_foreach,删除htable中container中meta中map的指纹所对应的container id(删除的是htable中指纹对应的container id,是container中meta中map的指纹对应的container id)。
然后利用函数queue_foreach,在container中meta中map的指纹中,删除entry中指向的指纹队列中的指纹。(删除container中的指纹,但是是指纹队列中有的指纹才删除)
疑问:为什么container id删除的时候是container中meta中map的指纹,但是删除指纹的时候,却是entry中指向的指纹队列中的指纹
然后利用函数 g_hash_table_foreach来将container中剩下的meta中map的指纹合并为新的chunk,并将这些chunk放入migrate_data_queue中,然后删除这个container。
所以,这函数的主要目的就是,找到包含value的container,然后先在htable中删除container的id,然后删除value中包含的指纹,然后将container中剩下的指纹对应的chunk放入migrate_data_queue队列中,最后删除这个container。
先看函数retrieve_container_by_id。这个函数的作用是根据container的id找到对应的container。方法是。。。太复杂了明天再看。
struct container* retrieve_container_by_id(containerid id) {
struct container *c = (struct container*) malloc(sizeof(struct container));
init_container_meta(&c->meta);
unsigned char *cur = 0;
if (destor.simulation_level >= SIMULATION_RESTORE) {
c->data = malloc(CONTAINER_META_SIZE);
pthread_mutex_lock(&mutex);
if (destor.simulation_level >= SIMULATION_APPEND)
fseek(fp, id * CONTAINER_META_SIZE + 8, SEEK_SET);
else
fseek(fp, (id + 1) * CONTAINER_SIZE - CONTAINER_META_SIZE + 8,
SEEK_SET);
fread(c->data, CONTAINER_META_SIZE, 1, fp);
pthread_mutex_unlock(&mutex);
cur = c->data;
} else {
c->data = malloc(CONTAINER_SIZE);
pthread_mutex_lock(&mutex);
fseek(fp, id * CONTAINER_SIZE + 8, SEEK_SET);
fread(c->data, CONTAINER_SIZE, 1, fp);
pthread_mutex_unlock(&mutex);
cur = &c->data[CONTAINER_SIZE - CONTAINER_META_SIZE];
}
unser_declare;
unser_begin(cur, CONTAINER_META_SIZE);
unser_int64(c->meta.id);
unser_int32(c->meta.chunk_num);
unser_int32(c->meta.data_size);
if(c->meta.id != id){
WARNING("expect %lld, but read %lld", id, c->meta.id);
assert(c->meta.id == id);
}
int i;
for (i = 0; i < c->meta.chunk_num; i++) {
struct metaEntry* me = (struct metaEntry*) malloc(
sizeof(struct metaEntry));
unser_bytes(&me->fp, sizeof(fingerprint));
unser_bytes(&me->len, sizeof(int32_t));
unser_bytes(&me->off, sizeof(int32_t));
g_hash_table_insert(c->meta.map, &me->fp, me);
}
unser_end(cur, CONTAINER_META_SIZE);
if (destor.simulation_level >= SIMULATION_RESTORE) {
free(c->data);
c->data = 0;
}
return c;
}
函数 void container_meta_foreach(struct containerMeta* cm, void (func)(fingerprint, void*), void* data)
遍历container中的指纹,在每个指纹上应用函数func。
/*
* foreach the fingerprints in the container.
* Apply the 'func' for each fingerprint.
*/
void container_meta_foreach(struct containerMeta* cm, void (*func)(fingerprint*, void*), void* data){
GHashTableIter iter;
gpointer key, value;
g_hash_table_iter_init(&iter, cm->map);
while(g_hash_table_iter_next(&iter, &key, &value)){
func(key, data);
}
}
void queue_foreach(Queue *queue, void (*func)(void *data, void *user_data),
void *user_data) {
queue_ele_t *item = 0;
if (queue->elem_num == 0)
return;
item = queue->first;
while (item) {
func(item->data, user_data);
item = item->next;
}
}
void read_container_filter(gpointer key, gpointer value, gpointer user_data){
struct GCHashEntry* e = (struct GCHashEntry*)value;
struct container* con = retrieve_container_by_id(e->cid);
printf("container %lu, total chunk %d, drop chunk %d\n", e->cid, con->meta.chunk_num, e->chunk_queue->elem_num);
con_counter++;
container_meta_foreach(&con->meta, delete_an_entry, &e->cid);
queue_foreach(e->chunk_queue, chunk_filter, con->meta.map);
g_hash_table_foreach(con->meta.map, chunk_migrate, con);
free_container(con);
}
struct GCHashEntry{
uint64_t cid;
Queue* chunk_queue;
};
函数首先创建一个起始的chunk放入migrate_data_queue中,然后遍历 global_gc_hashTable,对每一个GCHashEntry都使用函数read_container_filter。
相当于是删除了包含有GCHashEntry中包含的指纹的container,然后将container中剩下的指纹对应的chunk放入migrate_data_queue队列中。
最后放入一个结束的chunk放入migrate_data_queue中。
static void* load_container_for_deletion(void *arg) {
struct chunk* c = new_chunk(0);
SET_CHUNK(c, CHUNK_FILE_START);
sync_queue_push(migrate_data_queue, c);
con_counter = 0, migrate_counter = 0, migrate_size = 0;
g_hash_table_foreach(global_gc_HashTable, read_container_filter, NULL);
printf("%lu containers involved, %lu chunks (%lu bytes) migrated\n", con_counter, migrate_counter, migrate_size);
c = new_chunk(0);
SET_CHUNK(c, CHUNK_FILE_END);
sync_queue_push(migrate_data_queue, c);
return NULL;
}
/*
* Input features with a container/segment ID.
* For physical locality, this function is called for each written container.
* For logical locality, this function is called for each written segment.
*/
void index_update(GHashTable *features, int64_t id){
VERBOSE("Filter phase: update %d features", g_hash_table_size(features));
GHashTableIter iter;
gpointer key, value;
g_hash_table_iter_init(&iter, features);
while (g_hash_table_iter_next(&iter, &key, &value)) {
index_overhead.update_requests++;
kvstore_update(key, id);
}
}
int container_overflow(struct container* c, int32_t size) {
if (c->meta.data_size + size > CONTAINER_SIZE - CONTAINER_META_SIZE)
return 1;
/*
* 28 is the size of metaEntry.
*/
if ((c->meta.chunk_num + 1) * 28 + 16 > CONTAINER_META_SIZE)
return 1;
return 0;
}
void write_container_async(struct container* c) {
assert(c->meta.chunk_num == g_hash_table_size(c->meta.map));
if (container_empty(c)) {
/* An empty container
* It possibly occurs in the end of backup */
container_count--;
VERBOSE("Append phase: Deny writing an empty container %lld",
c->meta.id);
return;
}
sync_queue_push(container_buffer, c);
}
static void* write_container_for_deletion(void *arg) {
struct chunk* c;
struct container* con;
GSequence * gseq;
GHashTable * features;
int32_t seq_count = 0;
while(c = sync_queue_pop(migrate_data_queue)){
if (CHECK_CHUNK(c, CHUNK_FILE_START)) {
con = create_container();
gseq = g_sequence_new(free_chunk);
seq_count = 0;
features = g_hash_table_new_full(g_feature_hash, g_feature_equal, free, NULL);
free_chunk(c);
continue;
}
if (CHECK_CHUNK(c, CHUNK_FILE_END)) {
free_chunk(c);
if(seq_count == 0){
endFlag = true;
break;
}
write_container_async(con);
features = sampling(gseq, seq_count);
GSequenceIter* iter = g_sequence_get_begin_iter(gseq);
while(!g_sequence_iter_is_end(iter)){
struct chunk* ck = g_sequence_get(iter);
fingerprint *ft = malloc(sizeof(fingerprint));
memcpy(ft, &ck->fp, sizeof(fingerprint));
g_hash_table_insert(features, ft, NULL);
iter = g_sequence_iter_next(iter);
}
index_update(features, seq_count);
g_sequence_free(gseq);
g_hash_table_destroy(features);
container_store_sync();
endFlag = true;
break;
}
if (container_overflow(con, c->size)) {
write_container_async(con);
features = sampling(gseq, seq_count);
GSequenceIter* iter = g_sequence_get_begin_iter(gseq);
while(!g_sequence_iter_is_end(iter)){
struct chunk* ck = g_sequence_get(iter);
fingerprint *ft = malloc(sizeof(fingerprint));
memcpy(ft, &ck->fp, sizeof(fingerprint));
g_hash_table_insert(features, ft, NULL);
iter = g_sequence_iter_next(iter);
}
index_update(features, seq_count);
g_sequence_free(gseq);
g_hash_table_destroy(features);
seq_count = 0;
gseq = g_sequence_new(free_chunk);
features = g_hash_table_new_full(g_feature_hash, g_feature_equal, free, NULL);
con = create_container();
}
add_chunk_to_container(con, c);
g_sequence_append(gseq, c);
seq_count++;
}
return NULL;
}
endflag是表示write_container_for_deletion函数已完成
/*
* We assume a FIFO order of deleting backup, namely the oldest backup is deleted first.
*/
void do_delete(int jobid) {
invalid_containers = trunc_manifest(jobid);
init_index();
init_recipe_store();
init_container_store();
struct backupVersion* backupVersion = open_backup_version(jobid);
delete_recipe_queue = sync_queue_new(100);
pthread_t read_t, build_t, load_t, write_t;
endFlag = false;
pthread_create(&read_t, NULL, read_recipe_for_deletion, backupVersion);
pthread_create(&build_t, NULL, gether_fingerprint_for_deletion, NULL);
do{
usleep(100);
}while(!endFlag);
endFlag = false;
migrate_data_queue = sync_queue_new(100);
pthread_create(&load_t, NULL, load_container_for_deletion, NULL);
pthread_create(&write_t, NULL, write_container_for_deletion, NULL);
do{
usleep(100);
}while(!endFlag);
/* Delete the invalid entries in the key-value store */
if(destor.index_category[1] == INDEX_CATEGORY_PHYSICAL_LOCALITY){
struct backupVersion* bv = open_backup_version(jobid);
/* The entries pointing to Invalid Containers are invalid. */
GHashTableIter iter;
gpointer key, value;
g_hash_table_iter_init(&iter, invalid_containers);
while(g_hash_table_iter_next(&iter, &key, &value)){
containerid id = *(containerid*)key;
NOTICE("Reclaim container %lld", id);
struct containerMeta* cm = retrieve_container_meta_by_id(id);
container_meta_foreach(cm, delete_an_entry, &id);
free_container_meta(cm);
}
bv->deleted = 1;
update_backup_version(bv);
free_backup_version(bv);
}else if(destor.index_category[1] == INDEX_CATEGORY_LOGICAL_LOCALITY){
/* Ideally, the entries pointing to segments in backup versions of a 'bv_num' less than 'jobid' are invalid. */
/* (For simplicity) Since a FIFO order is given, we only need to remove the IDs exactly matched 'bv_num'. */
struct backupVersion* bv = open_backup_version(jobid);
struct segmentRecipe* sr;
while((sr=read_next_segment(bv))){
segment_recipe_foreach(sr, delete_an_entry, &sr->id);
}
bv->deleted = 1;
update_backup_version(bv);
free_backup_version(bv);
}else{
WARNING("Invalid index type");
exit(1);
}
close_container_store();
close_recipe_store();
close_index();
char logfile[] = "delete.log";
FILE *fp = fopen(logfile, "a");
/*
* ID of the job we delete,
* number of live containers,
* memory footprint
*/
fprintf(fp, "%d %d %d\n",
jobid,
destor.live_container_num,
destor.index_memory_footprint);
fclose(fp);
/* record the IDs of invalid containers */
sds didfilepath = sdsdup(destor.working_directory);
char s[128];
sprintf(s, "recipes/delete_%d.id", jobid);
didfilepath = sdscat(didfilepath, s);
FILE* didfile = fopen(didfilepath, "w");
if(didfile){
GHashTableIter iter;
gpointer key, value;
g_hash_table_iter_init(&iter, invalid_containers);
while(g_hash_table_iter_next(&iter, &key, &value)){
containerid id = *(containerid*)key;
fprintf(didfile, "%lld\n", id);
}
fclose(didfile);
}
g_hash_table_destroy(invalid_containers);
}