typedef struct ChainNode {
struct ChainNode *next;
char *data; // 文件路径 + 文件名
}ChainNode;
typedef struct Chain {
struct ChainNode *head;
int count;
}Chain;
#define MAX_CONTEXT_LEN 50
typedef struct Hash {
char key[MAX_CONTEXT_LEN + 2];
Chain chain;
UT_hash_handle hh;
}Hash;
Hash *strHash = NULL;
void AddChainNode(char* data, Chain *chain)
{
ChainNode *node = (ChainNode*) malloc(sizeof(ChainNode));
node->next = 0;
node->data = (char*)malloc(sizeof(char) * (strlen(data) + 1));
strcpy(node->data, data);
chain->count++;
if (chain->head == 0) {
chain->head = node;
} else {
node->next = chain->head;
chain->head = node;
}
return;
}
void AddHashNode(char *key, char* data)
{
Hash *hashNode;
HASH_FIND_STR(strHash, key, hashNode);
if (hashNode == 0) {
hashNode = (Hash*)malloc(sizeof(Hash));
strcpy(hashNode->key, key);
hashNode->chain.count = 0;
hashNode->chain.head = 0;
AddChainNode(data, &hashNode->chain);
HASH_ADD_STR(strHash, key, hashNode);
} else {
AddChainNode(data, &hashNode->chain);
}
return;
}
/**
* Return an array of arrays of size *returnSize.
* The sizes of the arrays are returned as *returnColumnSizes array.
* Note: Both returned array and *columnSizes array must be malloced, assume caller calls free().
*/
#define MAX_PATH_LENGTH 10000
#define MAX_CONTEXT_LEN 50
int GetPath(char *path, char *pathBuffer)
{
int pos;
for (pos = 0; pos < strlen(path); pos++) {
if (path[pos] == ' ') {
break;
}
pathBuffer[pos] = path[pos];
}
pathBuffer[pos++] = '/';
return pos;
}
void FileProc(int pathLen, char *pathBuffer, char *path)
{
int count = pathLen;
char context[MAX_CONTEXT_LEN + 2];
// root/a 1.txt(abcd) 2.txt(efgh)
//pathBuffer[count++] = '/';
for (int i = pathLen; i < strlen(path); i++) {
if (path[i] == '(') {
pathBuffer[count++] = 0; // 路径 + 文件名
int contextLen = 0;
while (path[++i] != ')') {
context[contextLen++] = path[i];
}
context[contextLen++] = 0; // 文件内容
// 文件内容已经取出, 插入hash
AddHashNode(context, pathBuffer);
// 插入完成后,回退count到只包含路径, 此时i停留在 ), 过滤掉空格
i++;
count = pathLen;
continue;
} else {
pathBuffer[count++] = path[i];
}
}
}
void DestroyHashNode(Hash *node)
{
Chain chain = node->chain;
ChainNode *head, *temp;
head = chain.head;
while(head) {
temp = head->next;
free(head->data);
free(head);
head = temp;
}
free(node);
return;
}
char ***Output(int *retuenSize, int **returnColSize)
{
int count = 0;
Hash *current_user, *tmp;
HASH_ITER(hh, strHash, current_user, tmp) {
if (current_user->chain.count > 1) {
count++;
} else {
HASH_DEL(strHash, current_user); /* delete; users advances to next */
DestroyHashNode(current_user); /* optional- if you want to free */
}
}
*retuenSize = count;
char *** output = (char***)malloc(sizeof(char**) * count);
int *retCol = (int *)malloc(sizeof(int) * count);
*returnColSize = retCol;
count = 0;
ChainNode *head;
HASH_ITER(hh, strHash, current_user, tmp) {
if (current_user->chain.count > 1) {
output[count] = (char**)malloc(sizeof(char*) * (current_user->chain.count));
head = current_user->chain.head;
int fileNUm = 0;
while (head) {
output[count][fileNUm] = (char*)malloc(sizeof(char) * (strlen(head->data) + 1));
strcpy(output[count][fileNUm], head->data);
fileNUm++;
head = head->next;
}
retCol[count] = current_user->chain.count;
count++;
HASH_DEL(strHash, current_user);
DestroyHashNode(current_user);
}
}
strHash = NULL;
return output;
}
char *** findDuplicate(char ** paths, int pathsSize, int* returnSize, int** returnColumnSizes){
char pathBuffer[MAX_PATH_LENGTH];
int pathLen;
int pathLoop;
// 便利每一行输入,取出文件路径path
for (pathLoop = 0; pathLoop < pathsSize; pathLoop++) {
// root/a 1.txt(abcd) 2.txt(efgh)
pathLen = GetPath(paths[pathLoop], pathBuffer); // 记录文件路径长度, 存储在0 至 count - 1
FileProc(pathLen, pathBuffer, paths[pathLoop]);
}
// 遍历所有的hash节点,data个数超过1的进行输
return Output(returnSize, returnColumnSizes);
}
https://blog.csdn.net/qq_23091073/article/details/86485095
uthash使用
初始化
uthash需要用户定义自己的数据结构,一个包含UT_hash_handle hh的结构体
还需要定义键和值(可选),这里将id作为key, name作为value
struct my_struct {
int id; /* key */
char name[10];
UT_hash_handle hh; /* makes this structure hashable */
};
typedef struct my_struct HashNode;
typedef struct my_struct *HashHead;
1
2
3
4
5
6
7
8
添加
向hashtable中添加数据
key是int,可以使用 HASH_ADD_INT
key是字符串,可以使用 HASH_ADD_STR
key是指针,可以使用 HASH_ADD_PTR
其它,可以使用 HASH_ADD,上述实际都是调用这个方法,不过简化了参数
void hashTabel_add(HashHead *head, HashNode *users) {
// id是key的属性名字,虽然很奇怪,实际作为宏参数会被替换掉
// 可以看下面源码,intfield会替换换成&((add)->fieldname)
if(!find_user(*head, users->id))
HASH_ADD_INT(*head, id, users);
}
1
2
3
4
5
6
#define HASH_ADD_INT(head,intfield,add) \
HASH_ADD(hh,head,intfield,sizeof(int),add)
#define HASH_ADD(hh,head,fieldname,keylen_in,add) \
HASH_ADD_KEYPTR(hh, head, &((add)->fieldname), keylen_in, add)
1
2
3
4
替换
与添加差不多,会在添加前,删除key相同的节点,再添加新的节点
如果key是int,可以使用 HASH_REPLACE_INT
void replace_user(HashHead *head, HashNode *newNode) {
HashNode *oldNode = find_user(*head, newNode->id);
if (oldNode)
HASH_REPLACE_INT(*head, id, newNode, oldNode);
}
1
2
3
4
5
查找
根据key查找节点
如果key是int,可以使用 HASH_FIND_INT
HashNode *find_user(HashHead head, int user_id) {
HashNode *s;
HASH_FIND_INT(head, &user_id, s); /* s: output pointer */
return s;
}
1
2
3
4
5
删除
删除节点
使用 HASH_DEL
void delete_user(HashHead *head,HashNode *user) {
if (user) {
HASH_DEL(*head, user); /* user: pointer to deletee */
free(user); /* optional; it's up to you! */
}
}
1
2
3
4
5
6
计数
统计节点数
使用 HASH_COUNT
int count_user(HashHead head) {
return HASH_COUNT(head);
}
1
2
3
遍历
遍历节点
可以用循环或者使用 HASH_ITER
void print_user(HashHead head) {
HashNode *s;
printf("size is %d\n", count_user(head));
for (s = head; s != NULL; s = s->hh.next) {
printf("user id %d, name %s\n", s->id, s->name);
}
}
void print_user_iterator(HashHead head) {
HashNode *s, *tmp;
printf("size is %d\n", count_user(head));
HASH_ITER(hh, head, s, tmp) {
printf("user id %d: name %s\n", s->id, s->name);
/* ... it is safe to delete and free s here */
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
排序
给节点排序,可以根据key或者value
使用 HASH_SORT
int name_sort(HashNode *a, HashNode *b) {
return strcmp(a->name,b->name);
}
int id_sort(HashNode *a, HashNode *b) {
return (a->id - b->id);
}
void sort_by_name(HashHead *head) {
HASH_SORT(*head, name_sort);
}
void sort_by_id(HashHead *head) {
HASH_SORT(*head, id_sort);
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
三、完整代码
#include
#include
#include "uthash.h"
typedef struct my_struct {
int id; /* we'll use this field as the key */
char name[10];
UT_hash_handle hh; /* makes this structure hashable */
}HashNode;
typedef HashNode* HashHead;
int count_user(HashHead head);
HashNode *find_user(HashHead head, int user_id) {
HashNode *s;
HASH_FIND_INT(head, &user_id, s); /* s: output pointer */
return s;
}
void add_user(HashHead *head, HashNode *users) {
if(!find_user(*head, users->id))
HASH_ADD_INT(*head, id, users);
}
void replace_user(HashHead *head, HashNode *newNode) {
HashNode *oldNode = find_user(*head, newNode->id);
if (oldNode)
HASH_REPLACE_INT(*head, id, newNode, oldNode);
}
void delete_user(HashHead *head,HashNode *user) {
if (user) {
HASH_DEL(*head, user); /* user: pointer to deletee */
free(user); /* optional; it's up to you! */
}
}
void print_user(HashHead head) {
HashNode *s;
printf("size is %d\n", count_user(head));
for (s = head; s != NULL; s = s->hh.next) {
printf("user id %d, name %s\n", s->id, s->name);
}
}
void print_user_iterator(HashHead head) {
HashNode *s, *tmp;
printf("size is %d\n", count_user(head));
HASH_ITER(hh, head, s, tmp) {
printf("user id %d: name %s\n", s->id, s->name);
/* ... it is safe to delete and free s here */
}
}
int count_user(HashHead head) {
return HASH_COUNT(head);
}
int name_sort(HashNode *a, HashNode *b) {
return strcmp(a->name,b->name);
}
int id_sort(HashNode *a, HashNode *b) {
return (a->id - b->id);
}
void sort_by_name(HashHead *head) {
HASH_SORT(*head, name_sort);
}
void sort_by_id(HashHead *head) {
HASH_SORT(*head, id_sort);
}
int main()
{
printf("--------------init---------------\n");
HashHead head = NULL;
printf("--------------add---------------\n");
HashNode *node = malloc(sizeof(HashNode));
node->id = 1;
strcpy(node->name, "tom");
add_user(&head, node);
node = malloc(sizeof(HashNode));
node->id = 2;
strcpy(node->name, "jerry");
add_user(&head, node);
node = malloc(sizeof(HashNode));
node->id = 3;
strcpy(node->name, "jack");
add_user(&head, node);
node = malloc(sizeof(HashNode));
node->id = 0;
strcpy(node->name, "zero");
add_user(&head, node);
print_user(head);
printf("--------------replace---------------\n");
HashNode *newNode = malloc(sizeof(HashNode));
newNode->id = 3;
strcpy(newNode->name, "rose");
replace_user(&head, newNode);
print_user(head);
printf("--------------delete---------------\n");
delete_user(&head, find_user(head, 1));
print_user(head);
printf("--------------sort-by-id---------------\n");
sort_by_id(&head);
print_user(head);
printf("--------------sort-by-name---------------\n");
sort_by_name(&head);
print_user(head);
return 0;
}
————————————————
版权声明:本文为CSDN博主「aabond」的原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接及本声明。
原文链接:https://blog.csdn.net/qq_23091073/article/details/86485095