将大数据载入内存中检索字符串

<pre name="code" class="cpp"><pre name="code" class="cpp">#define _CRT_SECURE_NO_WARNINGS

#include<stdio.h>
#include<stdlib.h>
#include <string.h>

char **pp = NULL;  // 存储指针数组的地址

void initDataToMemory(char *path)
{
	pp = (char **)malloc(sizeof(char *)* 13180807);
	FILE *pf = fopen(path, "r");
	if (pf == NULL)
	{
		printf("Fail!\n");
	}
	else
	{
		for (int i = 0; i < 13180807; i++)
		{
			char str[275] = { 0 }; //读取字符串缓冲区
			fgets(str, 275, pf); //从文件中逐行读取字符串
			int strlength = strlen(str) + 1;//获取要分配的字符串的长度
			char *px = (char *)malloc(sizeof(char)*strlength);
			strcpy(px, str);
			px[strlength - 1] = '\0'; //设定最后一个字符为:'\0'
			pp[i] = px; //存储字符串的首地址到指针数组

		}
	}
	printf("载入内存成功!\n");
}

int getLine(char *path)
{
	FILE *pf;
	pf = fopen(path, "r");  //读取
	if (pf == NULL)
	{
		return -1;  //获取失败
	}
	else
	{
		int i = 0;
		while (!feof(pf)) //是否到文件末尾
			//返回值为0,表示没有到文件末尾
		{
			char str[275];
			fgets(str, 275, pf); //读取一行
			i++;  //统计行数
		}
		fclose(pf);

		return i;
	}
}
//共 13180807 行 *4/1024/1024  50M
//申请指针,每一个指针指向一行

int getFileSize(char *path)
{
	FILE *pf;
	pf = fopen(path, "r");  //读取
	if (pf == NULL)
	{
		return -1;  //获取失败
	}
	else
	{
		fseek(pf, 0, SEEK_END); //到文件末尾
		int num = ftell(pf);  //文件开头到当前位置一共多少字节
		fclose(pf);
		return num;
	}
}  

char *findStr(char *searchStr)
{
	for (int i = 0; i < 13180807; i++)
	{
		char *pTemp = strstr(pp[i], searchStr);
		//遍历所有的指针数组的地址,字符串查找
		if (pTemp != NULL)
		{
			printf("\n%s", pp[i]); //打印字符串
		}
	}
}

void main()
{
	/*char *path = "E:\\dangdangwang.txt";
	int num = getFileSize(path);
	printf("%d字节, %f K, %f M,%f G", num, num / 1024.0,
		num / 1024.0 / 1024.0, num / 1024.0 / 1024.0 / 1024.0);
<span style="white-space:pre">	</span>*/
	char *path = "E:\\dangdangwang.txt";

	//printf("\n\n有%d行\n", getLine(path));
	initDataToMemory(path);  //将文件数据载入内存

	while (1)  //在内存中检索
	{
		char searchStr[100] = { 0 };
		scanf("%s", searchStr);
		findStr(searchStr);
	}
	
	system("pause");
}


 
 
 
 
 

你可能感兴趣的:(C语言,内存检索,大数据检索基础)