leetcode187. 重复DNA序列

leetcode187. 重复DNA序列

  • 1. 题目
  • 2. 解答

1. 题目

所有 DNA 都由一系列缩写为 ‘A’,‘C’,‘G’ 和 ‘T’ 的核苷酸组成,例如:“ACGAATTCCG”。在研究 DNA 时,识别 DNA 中的重复序列有时会对研究非常有帮助。
编写一个函数来找出所有目标子串,目标子串的长度为 10,且在 DNA 字符串 s 中出现次数超过一次。

示例 1:
输入:s = “AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT”
输出:[“AAAAACCCCC”,“CCCCCAAAAA”]

示例 2:
输入:s = “AAAAAAAAAAAAA”
输出:[“AAAAAAAAAA”]

提示:
0 <= s.length <= 105
s[i] 为 ‘A’、‘C’、‘G’ 或 ‘T’

2. 解答

hash+滑动窗口
方法一:指针

#define NUMSIZE 10

typedef struct {
    char *key;
    int count;
    UT_hash_handle hh;
} DNAhash;

char ** findRepeatedDnaSequences(char * s, int* returnSize) {
    *returnSize = 0;
    int len = strlen(s);
    if (len < NUMSIZE) {
        return NULL;
    }
    DNAhash *g_hash = NULL;
    int returnStrSize = 0;
    for (int i = 0; i <= len - NUMSIZE; i++) {
        char *temp = (char *)malloc(sizeof(char) * (NUMSIZE + 1));
        memcpy(temp, &s[i], NUMSIZE * sizeof(char));
        temp[NUMSIZE] = '\0';
        DNAhash *find = NULL;
        HASH_FIND_STR(g_hash, temp, find);
        if (find == NULL) {
            DNAhash *cur = (DNAhash *)malloc(sizeof(DNAhash));
            cur->key = temp;
            cur->count = 1;
            HASH_ADD_KEYPTR(hh, g_hash, cur->key, strlen(cur->key), cur);
        } else {
            (find->count)++;
            returnStrSize++;
        }
    }

    char **res = (char **)malloc(sizeof(char *) *returnStrSize);
    DNAhash *cur = NULL;
    DNAhash *next = NULL;
    HASH_ITER(hh, g_hash, cur, next) {
        if (cur->count > 1) {
            res[*returnSize] = (char *)malloc(sizeof(char) * (NUMSIZE + 1));
            memcpy(res[*returnSize], cur->key, sizeof(char) * (NUMSIZE + 1));
            (*returnSize)++;
        }
        HASH_DEL(g_hash, cur);
        free(cur);
    }
 
    return res;
}

方法二:数组

#define NUMSIZE 10

typedef struct {
    char key[NUMSIZE + 1];
    int value;
    UT_hash_handle hh;
} hash_node;

char ** findRepeatedDnaSequences(char * s, int* returnSize)
{
    hash_node *g_node = NULL;
    int returnStrSize = 0;
    int len = strlen(s);
    *returnSize = 0;
    if (len < NUMSIZE) {
        return NULL;
    }

    for (int i = 0; i <= len - NUMSIZE; i++) {
        char *temp = (char *)malloc(sizeof(char) * (NUMSIZE + 1));
        memcpy(temp, &s[i], sizeof(char) * NUMSIZE);
        temp[NUMSIZE] = '\0';
        hash_node *findNode = (hash_node *)malloc(sizeof(hash_node));
        HASH_FIND_STR(g_node, temp, findNode);
        if (findNode == NULL) {
            hash_node *cur = (hash_node *)malloc(sizeof(hash_node));
            memcpy(cur->key, temp, sizeof(char) * (NUMSIZE + 1));
            cur->value = 1;
            HASH_ADD_STR(g_node, key, cur);
        } else {
            (findNode->value)++;
            returnStrSize++;
        }
    }

    char **res = (char **)malloc(sizeof(char *) * returnStrSize);
    hash_node *cur = NULL;
    hash_node *tmp = NULL;
    HASH_ITER(hh, g_node, cur, tmp) {
        if (cur->value > 1) {
            res[*returnSize] = (char *)malloc(sizeof(char) * (NUMSIZE + 1));
            memcpy(res[*returnSize], cur->key, sizeof(char) * (NUMSIZE + 1));
            (*returnSize)++;
        }
        HASH_DEL(g_node, cur);
        free(cur);
    }
    return res;
}

你可能感兴趣的:(c语言,leetcode)