6、读取所有的result_i。txt文档,然后在这排名前100的查询字符串中找到排名前10的放在result.txt当中
// ConsoleApplication1.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include
#include
#include
#include
#include
#include"Header.h"
using namespace std;
#define FILENAME "proc.txt"
const int read_byte_count = 512000000;
const _int64 max_ul_int = 4294967296;
_int64 file_start_pos;
_int64 file_end_pos;
struct str_equal
{
bool operator()(char* a,char* b){
return strcmp(a, b)==0;
}
int operator()(char* a)
{
int i = 0;
int sum = 0;
while (a[i] != '\0')
{
sum += a[i];
}
return sum % 100;
}
};
int hash_char(char*str)
{
int sum = 0;
int i = 0;
while (str[i] != '\0')
{
sum += str[i];
i++;
}
return sum % 10;
}
void save2txt(char* str,int rand)
{
char txtname[20] = { 0 };
sprintf(txtname, "%d", rand);
ofstream out_txt;
out_txt.open(txtname,ios::app);
out_txt.write(str,strlen(str));
out_txt.close();
}
void proc_str(LPVOID & str)
{
//获得此次处理的文件长度
char* str_index = (char*)str;
_int64 str_length = file_end_pos - file_start_pos;
_int64 byte_count = 0;
_int64 index = 0;
_int64 end = 0;
while (byte_count != str_length)
{
char buf[256] = { 0 };
if (str_index[byte_count] == '\n')
{
end = byte_count;
strncpy(buf, str_index + index, end - index);
//对buf进行处理,将查询字符串和字符串出现的频度放在里面
int a = hash_char(buf);
save2txt(buf, a);
index = end;
}
byte_count++;
}
}
void rePosition(LPVOID lpBase)
{
int Enter_num = 0;
_int64 byte_count = 0;
char* mm = (char*)lpBase;
while (Enter_num != 2000000)
{
try
{
if (mm[byte_count] == '\n')
{
Enter_num++;
}
byte_count++;
}
catch (...)
{
//如果基数还不到4000个换行字符,就出现了读取错误,也即已经是最后一个了,不满4000行数据
file_end_pos = file_start_pos + byte_count;
return;
}
}
//到这里
file_end_pos = file_start_pos + byte_count;
}
void getFilePointer(HANDLE& hFile)
{
assert(hFile);
if (file_start_pos + read_byte_count < max_ul_int)
{
//还在4G范围之内
file_end_pos = file_start_pos + read_byte_count;
HANDLE hMap = ::CreateFileMapping(hFile, NULL, PAGE_READWRITE, 0, file_start_pos, NULL);
LPVOID lpBase = ::MapViewOfFile(hMap, FILE_MAP_ALL_ACCESS, 0, 0, read_byte_count);
//开始进行处理,查找首次出现的4000个换行符
rePosition(lpBase);
proc_str(lpBase);
::UnmapViewOfFile(lpBase);
file_start_pos = file_end_pos;
}
else
{
//已经超出了4G的范围了,现在开始进行使用高位
file_end_pos = file_start_pos + read_byte_count;
DWORD dwHigh = (file_start_pos + read_byte_count) / max_ul_int;
DWORD dwLow = (file_start_pos + read_byte_count) - dwHigh*max_ul_int;
HANDLE hMap = ::CreateFileMapping(hFile, NULL, PAGE_READWRITE, dwHigh, dwLow, NULL);
LPVOID lpBase = ::MapViewOfFile(hMap, FILE_MAP_ALL_ACCESS, 0, 0, read_byte_count);
//开始进行处理,查找首次出现的4000个换行符
rePosition(lpBase);
proc_str(lpBase);
::UnmapViewOfFile(lpBase);
file_start_pos = file_end_pos;
}
}
int main()
{
add(5, 6);
file_start_pos = 0;
file_end_pos = 0;
HANDLE hFile = ::CreateFileA(FILENAME, GENERIC_ALL, FILE_SHARE_READ, NULL, 0, FILE_ATTRIBUTE_NORMAL, NULL);
for (int i = 0; i < 5; i++)
{
getFilePointer(hFile);
}
CloseHandle(hFile);
hash_map hash_obj;
hash_map::iterator itr;
//最后开始对0-9的txt文档
for (int i = 0; i < 10; i++)
{
hash_obj.clear();
char buf[20] = { 0 };
sprintf(buf, "%d", i);
ifstream in_txt;
in_txt.open(buf);
//开始对每一个文件的查询字符串进行统计频次
char con_buf[256] = { 0 };
in_txt.getline(con_buf,256);
itr = hash_obj.find(con_buf);
if (itr == hash_obj.end())
{
//不存在这个查询字符串
pair tmp;
tmp.first = con_buf;
tmp.second = 1;
hash_obj.insert(tmp);
}
else
{
//存在查询字符串,修改当前频度
itr->second++;
}
in_txt.close();
//修改完成之后将所有hash_map中的内容放在结果txt当中
//使用堆排序进行完成,放在i对应的result_i.txt 当中
int ele_count = hash_obj.size();
itr = hash_obj.begin();
hash_map::iterator the_max;
the_max = hash_obj.begin();
//将前10个
for (int j = 0; j<10; j++)
{
for (itr=hash_obj.begin(); itr != hash_obj.end(); itr++)
{
if (itr->second >(the_max->second))
{
the_max = itr;
}
}
//将记录放在result_i.txt当中
ofstream result_i;
char result_txt[20];
sprintf(result_txt, "result_%d.txt", j);
result_i.open(result_txt, ios::app);
result_i.write(the_max->first,strlen(the_max->first));
result_i.write("\n", strlen("\n"));
char mm[5] = { 0 };
sprintf(mm, "%d", the_max->second);
result_i.write(mm,strlen(mm));
result_i.write("\n", strlen("\n"));
result_i.close();
hash_obj.erase(the_max);
}
}
//最后从result_0~result_9这10个文件中读取出前十个
hash_map result_hash;
for (int i = 0; i < 10; i++)
{
char file_name[20] = { 0 };
sprintf(file_name, "result_i", i);
ifstream in_txt;
in_txt.open(file_name);
char buf[256] = { 0 };
in_txt.getline(buf, 256);
char uh[10] = { 0 };
in_txt.getline(uh, 10);
int value = atoi(uh);
pair mm;
mm.first = buf;
mm.second = value;
result_hash.insert(mm);
}
//最后根据冒泡法,进行前十的输出
itr = result_hash.begin();
hash_map::iterator the_max;
for (int i = 0; i < 10; i++)
{
the_max = result_hash.begin();
for (itr = result_hash.begin(); itr != result_hash.end(); itr++)
{
if (the_max->second < itr->second)
{
the_max = itr;
}
}
ofstream result;
result.open("result.txt", ios::app);
result.write(the_max->first, strlen(the_max->first));
result.write("\n", strlen("\n"));
char buf[20] = { 0 };
sprintf(buf, "%d", the_max->second);
result.write(buf, 20);
result.write("\n", strlen("\n"));
result.close();
}
}