所有 DNA 都由一系列缩写为 ‘A’,‘C’,‘G’ 和 ‘T’ 的核苷酸组成,例如:“ACGAATTCCG”。在研究 DNA 时,识别 DNA 中的重复序列有时会对研究非常有帮助。
编写一个函数来找出所有目标子串,目标子串的长度为 10,且在 DNA 字符串 s 中出现次数超过一次。
示例 1:
输入:s = “AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT”
输出:[“AAAAACCCCC”,“CCCCCAAAAA”]
示例 2:
输入:s = “AAAAAAAAAAAAA”
输出:[“AAAAAAAAAA”]
提示:
0 <= s.length <= 105
s[i] 为 ‘A’、‘C’、‘G’ 或 ‘T’
hash+滑动窗口
方法一:指针
#define NUMSIZE 10
typedef struct {
char *key;
int count;
UT_hash_handle hh;
} DNAhash;
char ** findRepeatedDnaSequences(char * s, int* returnSize) {
*returnSize = 0;
int len = strlen(s);
if (len < NUMSIZE) {
return NULL;
}
DNAhash *g_hash = NULL;
int returnStrSize = 0;
for (int i = 0; i <= len - NUMSIZE; i++) {
char *temp = (char *)malloc(sizeof(char) * (NUMSIZE + 1));
memcpy(temp, &s[i], NUMSIZE * sizeof(char));
temp[NUMSIZE] = '\0';
DNAhash *find = NULL;
HASH_FIND_STR(g_hash, temp, find);
if (find == NULL) {
DNAhash *cur = (DNAhash *)malloc(sizeof(DNAhash));
cur->key = temp;
cur->count = 1;
HASH_ADD_KEYPTR(hh, g_hash, cur->key, strlen(cur->key), cur);
} else {
(find->count)++;
returnStrSize++;
}
}
char **res = (char **)malloc(sizeof(char *) *returnStrSize);
DNAhash *cur = NULL;
DNAhash *next = NULL;
HASH_ITER(hh, g_hash, cur, next) {
if (cur->count > 1) {
res[*returnSize] = (char *)malloc(sizeof(char) * (NUMSIZE + 1));
memcpy(res[*returnSize], cur->key, sizeof(char) * (NUMSIZE + 1));
(*returnSize)++;
}
HASH_DEL(g_hash, cur);
free(cur);
}
return res;
}
方法二:数组
#define NUMSIZE 10
typedef struct {
char key[NUMSIZE + 1];
int value;
UT_hash_handle hh;
} hash_node;
char ** findRepeatedDnaSequences(char * s, int* returnSize)
{
hash_node *g_node = NULL;
int returnStrSize = 0;
int len = strlen(s);
*returnSize = 0;
if (len < NUMSIZE) {
return NULL;
}
for (int i = 0; i <= len - NUMSIZE; i++) {
char *temp = (char *)malloc(sizeof(char) * (NUMSIZE + 1));
memcpy(temp, &s[i], sizeof(char) * NUMSIZE);
temp[NUMSIZE] = '\0';
hash_node *findNode = (hash_node *)malloc(sizeof(hash_node));
HASH_FIND_STR(g_node, temp, findNode);
if (findNode == NULL) {
hash_node *cur = (hash_node *)malloc(sizeof(hash_node));
memcpy(cur->key, temp, sizeof(char) * (NUMSIZE + 1));
cur->value = 1;
HASH_ADD_STR(g_node, key, cur);
} else {
(findNode->value)++;
returnStrSize++;
}
}
char **res = (char **)malloc(sizeof(char *) * returnStrSize);
hash_node *cur = NULL;
hash_node *tmp = NULL;
HASH_ITER(hh, g_node, cur, tmp) {
if (cur->value > 1) {
res[*returnSize] = (char *)malloc(sizeof(char) * (NUMSIZE + 1));
memcpy(res[*returnSize], cur->key, sizeof(char) * (NUMSIZE + 1));
(*returnSize)++;
}
HASH_DEL(g_node, cur);
free(cur);
}
return res;
}