因为工作关系,需要从大文件中筛选数据,进行比对。文件大小一般都在6g左右。
读取大文件有如下两种方法,一是用fopen打开文件,fgetline循环读取,fclose关闭文件;二是用open打开函数,用lseek获取文件大小,用mmap大文件内存映射,用munmap关闭内存映射,用close关闭文件句柄。方式一教慢,就不再详细描述。主要描述方式二。
方式二,网上介绍也有很多,但是鲜有介绍当大于4G后,读取方法。用long型读取文件大小时,最多是4294967295个字节,也就是4G。解决方法是用long long来读取文件的大小。
样例代码如下:
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
using namespace std;
#include "dlist.h"
//呼叫流程数组
typedef struct s_callflow
{
char identifier[100];
char billingid[200];
bool response;
s_callflow()
{
strcpy(identifier,"");
strcpy(billingid,"");
response=false;
}
} callflow;
callflow *call_instance(char *identifier,char *billingid ,bool response)
{
callflow *call_ptr;
call_ptr = (callflow *)malloc(sizeof(callflow));
if( call_ptr==NULL )
return NULL;
strcpy(call_ptr->identifier,identifier);
strcpy(call_ptr->billingid,billingid);
call_ptr->response = response;
return call_ptr;
}
void updatecall(DList *list,char *identifier)
{
if(list==NULL) return;
DListElmt *new_element;
new_element = list->tail;
while(new_element!=NULL)
{
callflow * flow = (callflow *)new_element->data;
if(flow!=NULL)
{
if(strcmp(flow->identifier,identifier)==0 && flow->response==false)
{
flow->response=true;
break;
}
}
if( new_element == list->head )
{
break;
}
new_element = new_element->prev;
}
}
/*destroy */
void destroy(void *data)
{
free(data);
return;
}
void output(DList *list,bool calling)
{
if(list==NULL) return;
DListElmt *new_element;
new_element=list->head;
int count = list->size;
int response=0;
FILE *out;
if(calling==1)
{
out=fopen("calling.txt","w");
}
else
{
out=fopen("called.txt","w");
}
char buffer[255];
while(new_element!=NULL)
{
callflow * flow = (callflow *)new_element->data;
if(flow!=NULL)
{
if(flow->response==true)
{
response++;
}
else
{
sprintf(buffer,"billingid=%s identifier=%s\n",flow->billingid,flow->identifier);
fwrite(buffer,strlen(buffer),1,out);
}
}
new_element=new_element->next;
}
sprintf(buffer,"count=%d response=%d\n",count,response);
fwrite(buffer,strlen(buffer),1,out);
fclose(out);
}
//
int main(int argc,char * argv[])
{
//size_t lsize=0;
long long lsize=0;
const char *localpc="16592304";
DList calling_node;
DList called_node;
dlist_init(&calling_node, destroy);
dlist_init(&called_node, destroy);
char opc[3][200];
char dpc[3][200];
char identifier[2][100];
char billingID[2][200];
//FILE * fp=NULL;
//fp = fopen(argv[1],"r");
char *pBuffer=NULL;
char *pStart=NULL,*pEnd=NULL;
int fd = open(argv[1],O_RDONLY);
//size_t nFileSize=0;
//size_t nOffset=0;
//size_t nLineAmount=0;
//struct stat fileState;
//fstat(fd,&fileState);
//nFileSize=fileState.st_size;
long long nFileSize=0;
long long nOffset;
long long nLineAmount;
nFileSize =(long long)lseek(fd,0,SEEK_END);
//nFileSize = (unsigned long)fileState.st_size;
pBuffer=(char *)mmap(NULL,nFileSize,PROT_READ,MAP_SHARED,fd,0);
pEnd=pStart=pBuffer;
char line[2048];
int flag;
flag=-1;
int in=-1;
printf("nFileSize=%lld\n",nFileSize);
int load = 0;
int preload = 0;
while(lsize
lsize++;
load = (int) (((float)lsize / (float)nFileSize) * 100);
if(preload!=load)
{
printf("%3d \n",load);
preload = load;
sleep(1);
printf("\b");
}
char a = *pEnd;
if(a==13)
{
char b = *(++pEnd);
if(b==10)
{
int len = pEnd-pStart;
if(len<1) break;
if(len>2047) break;
strncpy(line,pStart,len);
pStart=pEnd+1;
if(strstr(line," OPC")!=NULL)
{
sscanf(line,"%s%s%s",opc[0],opc[1],opc[2]);
}
else if(strstr(line," DPC")!=NULL)
{
sscanf(line,"%s%s%s",dpc[0],dpc[1],dpc[2]);
}
else if(strstr(line,"queryWithPerm")!=NULL)
{
flag=0; //
}
else if(strstr(line,"response")!=NULL)
{
flag=1; //
}
else if(strstr(line,"identifier")!=NULL)
{
sscanf(line,"%s%s",identifier[0],identifier[1]);
//
if(strcmp(opc[1],localpc)==0)
{
//
if(flag==1) //response
{
//找主叫流程
updatecall(&calling_node,identifier[1]);
//找被叫流程
updatecall(&called_node,identifier[1]);
flag=-1;
}
}
}
else if(strstr(line,"originationRequest")!=NULL)
{
//主叫流程
if(strstr(line,"originationRequestRes")==NULL)
{
in = 1;
}
}
else if(strstr(line,"billingID")!=NULL)
{
sscanf(line,"%s%s",billingID[0],billingID[1]);
if(in==1)
{
callflow * calling = call_instance(identifier[1],billingID[1] ,false);
dlist_ins_next(&calling_node,calling_node.tail,(void *)calling);
in = -1;
}
}
else if(strstr(line,"initial-Termination (38)")!=NULL)
{
//被叫流程
callflow * called = call_instance(identifier[1],billingID[1] ,false);
dlist_ins_next(&called_node,called_node.tail,(void *)called);
}
}
}
else
{
pEnd++;
}
}
output(&calling_node,true);
output(&called_node,false);
dlist_destroy(&calling_node);
dlist_destroy(&called_node);
munmap(pBuffer,nFileSize);
//fclose(fp);
close(fd);
}
DList.h
/*
* filename: dlist.h
* author: zhm
* date: 2012-12-08
*/
#ifndef _DLIST_H
#define _DLIST_H
#include
/* define a structure for the list element*/
typedef struct DListElmt_
{
void *data;
struct DListElmt_ *prev;
struct DListElmt_ *next;
}DListElmt;
/* define a structure for the double linked list */
typedef struct DList_
{
int size;
void (*destroy)(void *data);
DListElmt *head;
DListElmt *tail;
}DList;
/* define public interface */
void dlist_init(DList *list, void (*destroy)(void *data));
void dlist_destroy(DList *list);
int dlist_ins_prev(DList *list, DListElmt *element, const void *data);
int dlist_ins_next(DList *list, DListElmt *element, const void *data);
int dlist_ins_next(DList *list, DListElmt *element, const void *data);
int dlist_remove(DList *list, DListElmt *element, void **data);
#define dlist_size(list) ((list)->size) //get the size of the list.
#define dlist_head(list) ((list)->head) //get the head element
#define dlist_tail(list) ((list)->tail) //get the tail element
#define dlist_is_head(element) ((element)->prev == NULL ? 1 : 0) //whether the element is head or not
#define dlist_is_tail(element) ((element)->next == NULL ? 1 : 0) //whether the element is tail or not
#define dlist_data(element) ((element)->data) //get the data of the element
#define dlist_prev(element) ((element)->prev) //get the prev element
#define dlist_next(element) ((element)->next) //get the next element
#endif