1、每一次只能读取同一种数据类型,不能读取字符串。
2、第次读取会返回一个 CSV数据结构,有源数据和二维数组,行列数信息
3、可以转换二维数组,但总大小不能变
123.csv
Month,"CO2 (ppm) mauna loa, 1965-1980",,,CO2 (ppm) mauna loa
Jan-65,319.32,1,,,
Feb-65,320.36,2,,,
Mar-65,320.82,3,,,
Apr-65,322.06,4,,,
May-65,322.17,5,,,
Jun-65,321.95,6,,,
头文件 read_csv_data.h
#include
#include // for memset
#include // for malloc, free
#include // for strtok
//fgets函数读取的最大字节数
#define MAX_LINE_SIZE 1024
//字符串分割结构
struct str_split
{
int count;
char *str;
char **str_array;
};
//可变数据类型
typedef union dtype_u{
long int s32;
char s8;
short int s16;
unsigned char u8;
unsigned short int u16;
unsigned long int u32;
float f32;
double f64;
} Dtype;
//CSV文件结构
struct csv_s{
Dtype **darray; //二维数组
int drow;
int dcol;
int dnum; //总有多少个
Dtype data[];
};
enum{
S8,
S16,
S32,
U8,
U16,
U32,
F32,
F64,
DEF = S32
};
/*
释放CSV文件结构,使用方法,查看testFile()
*/
void FreeCsvData(struct csv_s **csv_ptr);
/*
对CSV数据进行二维转换,生成新的二维数据。使用方法,查看testFile()
*/
int setNewArray( struct csv_s *tt, int row, int col);
/*
读取CSV文件数据,生成CSV结构。使用方法,查看testFile()
csvFilePath:文件路径
delimiter: 分割符
skiprows: 跳过头N行
ColumnList:选把指定列,从0列开始,
dclass: 数据类型
*/
struct csv_s *ReadCsvData(char* csvFilePath, char delimiter, int skiprows, char *ColumnList, int dclass);
源码 read_csv_data.c
#include "read_csv_data.h"
int str_split_func(struct str_split *split, char * src, char delimiter)
{
int count = 0;
char *pchar, **ptr;
if ( NULL != split ) {
memset(split, 0, sizeof(struct str_split));
}
if(NULL == split || NULL == src || src[0] == '\0')
{
return 0;
}
split->str = strdup(src);
if(NULL == split->str)
{
return 0;
}
count = 1;
pchar = src;
while('\0' != *pchar)
{
if (delimiter == *pchar)
{
count++;
}
pchar++;
}
split->str_array = (char **)malloc(count*sizeof(char*));
if(NULL == split->str_array)
{
return 0;
}
split->count = count;
ptr = split->str_array;
*ptr = split->str;
pchar = split->str;
while('\0' != *pchar && count > 1)
{
if (delimiter == *pchar)
{
ptr++;
*ptr = pchar+1;
*pchar = '\0';
count--;
}
pchar++;
}
return 0;
}
int str_split_free(struct str_split *split)
{
if(split == NULL)
{
return 0;
}
if(split->str!=NULL)
{
free(split->str);
split->str=NULL;
}
if(split->str_array != NULL)
{
free(split->str_array);
split->str_array=NULL;
}
return 0;
}
//计算csv文件中的总大小(字节)
int GetTotalSize(FILE * fp)
{
if(fp==NULL){
return -1;
}
fseek(fp,0,SEEK_END);
return ftell(fp);
}
//计算csv文件中的总行数
int GetTotalLineCount(FILE * fp)
{
int i = 0;
char strLine[MAX_LINE_SIZE];
fseek(fp,0,SEEK_SET);
while (fgets(strLine, MAX_LINE_SIZE, fp))
i++;
fseek(fp,0,SEEK_SET);
return i;
}
//计算csv文件中的总列数(以第一行的列数为基准)
int GetTotalColCount(FILE * fp, char delimiter)
{
int i = 0;
char strLine[MAX_LINE_SIZE];
struct str_split tss;
fseek(fp,0,SEEK_SET);
char *str=NULL;
str = fgets(strLine, MAX_LINE_SIZE, fp);
if (str)
{
str_split_func(&tss, strLine, delimiter);
i = tss.count;
str_split_free(&tss);
}
return i;
}
// 通过指针*giCsvData给数据动态分配内存空间
int strToData(Dtype *d, char *str, int dtpye)
{
switch(dtpye){
case S8:
*((char *)d) =(char) strtol(str,NULL,10);
break;
case S16:
*((short int *)d) =(short int) strtol(str,NULL,10);
break;
case S32:
*((long int *)d) =(long int) strtol(str,NULL,10);
break;
case U8:
*((unsigned char *)d) =(unsigned char ) strtol(str,NULL,10);
break;
case U16:
*((unsigned short int *)d) =(unsigned short int) strtol(str,NULL,10);
break;
case U32:
*((unsigned long int *)d) =(unsigned long int) strtol(str,NULL,10);
break;
case F32:
*((float *)d) =(float) strtod(str,NULL);
break;
case F64:
*((double *)d) =(double) strtod(str,NULL);
break;
default:
printf("读取数据类型不对\n");
return -1;
}
return 0;
}
// 对CSV结构中的二维数组进行转换
int setNewArray( struct csv_s *tt, int row, int col)
{
int i;
//CSV结构不NULL
if(tt == NULL)return -1;
//新的行列总个数,与旧的数据个数不等
if(row * col != tt->dnum)return -2;
Dtype **darray=(Dtype **)malloc( sizeof(Dtype *[row]) );
//分配内存空间失败,出错的可能性不大
if(!darray)return -3;
if(tt->darray)free(tt->darray);
tt->drow = row;
tt->dcol = col;
tt->dnum = row * col;
tt->darray = darray;
for(i=0; idarray[i]=(Dtype *)(tt->data+i*col);
}
return 0;
}
// 释放动态数据内存
void FreeCsvData(struct csv_s **csv_ptr)
{
if(*csv_ptr){
if((*csv_ptr)->darray)
free((*csv_ptr)->darray);
free(*csv_ptr);
}
*csv_ptr=NULL;
return ;
}
// 从csv文件中读取数据
struct csv_s *ReadCsvData(char* csvFilePath, char delimiter, int skiprows, char *ColumnList, int dclass)
{
/*
delimiter:分割符
dtype:数据类型,多个数据类型使用逗号分割,从第一列到最后一列开始一一对应。默认其他的是float 类型。
例:有5列 dtype=“str, str, str” 则前三列为string类型,后两列为float类型
skiprows:跳过开头N行
ColumnList:取N列数据。
例:"2,1,3",取第 2,1,3列数据返回
*/
FILE* fCsv=NULL;
int rowTotal=0;
int colTotal=0;
struct str_split tss;
struct csv_s *temp_csv=NULL;
int cur_ptr=0;
char strLine[MAX_LINE_SIZE];
int i;
int j;
//获取指定列
int t_column_list=0;
int *t_column_list_ptr=NULL;
if(access(csvFilePath, 0)<0){
printf("%s 文件不存在\n", csvFilePath);
goto label_error;
}
// 打开文件
fCsv = fopen( csvFilePath, "r" );
if( fCsv==NULL )
{
printf("open file %s failed",csvFilePath);
goto label_error;
}
rowTotal = GetTotalLineCount(fCsv);
colTotal = GetTotalColCount(fCsv, delimiter);
//获取指定列
if(ColumnList==NULL){
t_column_list=colTotal;
t_column_list_ptr = malloc(sizeof(t_column_list) * t_column_list);
for(i=0;ielse{
str_split_func(&tss, ColumnList, ',');
t_column_list = tss.count;
t_column_list_ptr = malloc(sizeof(t_column_list) * t_column_list);
for(i=0;i10);
}
str_split_free(&tss);
//检查参数ColumnList
if (t_column_list>colTotal){
printf("[%s] 超过最大列数 %d\n", ColumnList, colTotal);
goto label_error;
}
for(i=0; iif(t_column_list_ptr[i] > colTotal){
printf("[%s] 超过最大列数 %d\n", ColumnList, colTotal);
goto label_error;
}
}
}
//检查是否超过,最大列数
if(rowTotal <1 || colTotal <1 || skiprows>rowTotal){
printf("数据不对,有%d行,%d列\n", rowTotal, colTotal);
goto label_error;
}
//分配内在空间
rowTotal = rowTotal - skiprows;
temp_csv = (struct csv_s *)malloc(sizeof(struct csv_s) + (rowTotal * t_column_list)*sizeof(Dtype));
if(!temp_csv){
printf("分配内存失败 \n");
goto label_error;
}
// 读取数据
i = skiprows;
fseek(fCsv, 0, SEEK_SET);
//跳过开头N行
while(i>0 && fgets(strLine,MAX_LINE_SIZE,fCsv) )i--;
i = rowTotal;
while( i>=0 && fgets(strLine,MAX_LINE_SIZE,fCsv)){
//printf("%d %s\n", i, strLine);
i--;
str_split_func(&tss, strLine, delimiter);
if(t_column_list > tss.count){
printf("read error\n");
goto label_error;
}
int t_res = 0;
for(j=0; jdata+cur_ptr, tss.str_array[t_column_list_ptr[j]], dclass);
if(t_res < 0){
printf("str to data error\n");
goto label_error;
}
cur_ptr++;
}
str_split_free(&tss);
}
temp_csv->drow = rowTotal;
temp_csv->dcol = t_column_list;
temp_csv->dnum = rowTotal * t_column_list;
//printf("%d %d %d\n", i, rowTotal, t_column_list);
//for(i=0;idnum; i++)
// printf("%d ", temp_csv->data[i].s32);
//temp_csv->dtype = dtype;
temp_csv->darray=NULL;
setNewArray(temp_csv,temp_csv->drow, temp_csv->dcol);
// 关闭文件
fclose(fCsv);
free(t_column_list_ptr);
return temp_csv;
label_error:
if(fCsv)fclose(fCsv);
if(temp_csv)free(temp_csv);
if(t_column_list_ptr)free(t_column_list_ptr);
return NULL;
}
int testData()
{
int size=1000;
//分配内存空间
struct csv_s *stdata=(struct csv_s *)malloc(sizeof(struct csv_s) + size*sizeof(Dtype));
int i;
//初始化数据
for(i=900; i// *((long int *)(stdata->data+i)) = i;
*((double *)(stdata->data+i)) = i*1.0;
}
//随机测试 初始化数据
*((double *)(stdata->data+1)) = 10*1.0;
printf("%f asdfasfasdf\n", *((double *)(stdata->data+1)));
//初始化结构
int row = 500;
int col = 2;
stdata->drow= row;
stdata->dcol = col;
//stdata->dtype = sizeof(Dtype);
stdata->dnum = row * col;
int j=0;
j = setNewArray(stdata,row, col);
if(j<0){
printf("setNewArray %d error\n", j);
return 0;
}
//显示结构数据
printf("显示定义结构最后一行数据 \n");
for(j=0; j printf("%f\n", stdata->darray[row-1][j].f64);
}
//结构转换
row = 100;
col = 10;
j = setNewArray(stdata,row, col);
if(j<0){
printf("setNewArray 转换 %d error\n", j);
return 0;
}
printf("显示结构转换最后一行数据 \n");
//显示结构数据
for(j=0; j printf("%f\n", stdata->darray[row-1][j].f64);
}
return 0;
}
//测试浮点数
void testFile()
{
struct csv_s *stdata;
//stdata = ReadCsvData("123.csv", ',', 1, "1,2", F32);
stdata = ReadCsvData("123.csv", ',', 1, "2,1", F32);
printf("testFile row %d col %d\n", stdata->drow, stdata->dcol);
int i, j;
//单独以Dtype显示数据,是一个二维数组,以stdata->drow, stdata->dcol为行,列:Dtype show[stdata->drow][stdata->dcol]
Dtype ** show = stdata->darray;
printf("显示最后一行数据 \n");
for(j=0; jdcol; j++){
printf("%f\n", show[stdata->drow-1][j].f32);
}
//结构转换
int row = 3;
int col = 4;
j = setNewArray(stdata,row, col);
if(j<0){
printf("setNewArray 转换 %d error\n", j);
}else{
//显示结构数据
printf("显示结构转换最后一行数据 \n");
for(j=0; j printf("%f\n", stdata->darray[row-1][j].f32);
}
}
printf("转换失败测试\n");
//结构转换
row = 3;
col = 2;
j = setNewArray(stdata,row, col);
if(j<0){
printf("setNewArray 转换 %d error\n", j);
}
//释放内存
FreeCsvData(&stdata);
if(stdata==NULL)
printf("ok t is null\n");
else
printf("ok t is not null\n");
}
//测试整数类型
void testFile2()
{
struct csv_s *stdata;
//stdata = ReadCsvData("123.csv", ',', 1, "1,2", F32);
stdata = ReadCsvData("123.csv", ',', 1, "2,1", DEF);
printf("testFile row %d col %d\n", stdata->drow, stdata->dcol);
int i, j;
//单独以Dtype显示数据,是一个二维数组,以stdata->drow, stdata->dcol为行,列:Dtype show[stdata->drow][stdata->dcol]
//注意:这个默认long int 数据类型
Dtype ** show = stdata->darray;
printf("显示最后一行数据 \n");
for(j=0; jdcol; j++){
printf("%d\n", show[stdata->drow-1][j]);
}
//结构转换
int row = 3;
int col = 4;
j = setNewArray(stdata,row, col);
if(j<0){
printf("setNewArray 转换 %d error\n", j);
}else{
//显示结构数据
printf("显示结构转换最后一行数据 \n");
for(j=0; j printf("%d\n", stdata->darray[row-1][j]);
}
}
printf("转换失败测试\n");
//结构转换
row = 3;
col = 2;
j = setNewArray(stdata,row, col);
if(j<0){
printf("setNewArray 转换 %d error\n", j);
}
//释放内存
FreeCsvData(&stdata);
if(stdata==NULL)
printf("ok t is null\n");
else
printf("ok t is not null\n");
}
int main(int args, char *argv)
{
// testData();
testFile();
testFile2();
return 0;
}