基于状态机的 CSV 文件数据解析

#include 
#include 
#include 
#include 
#ifndef CStringA
#define CStringA CString
#endif //CStringA

//解析CSV行数据核心代码
//chLine 输入行
//szDataArray 子项解析输出
//返回值:子项个数
int AnalysCSVDataLine(LPCSTR chLine, CStringArray &szDataArray)
{
  ASSERT(chLine != NULL);

  //先清理输出项
  szDataArray.RemoveAll();

  //子项状态
  enum _tagItemType_t
  {
    _Invalid_Item, //无效项
      _Comma_Item, //逗号分隔项
      _Quota_Item, //引号分隔项
  };
  
  //子项状态初始化为无效
  _tagItemType_t eItem = _Invalid_Item;
  //子项数据字符缓存区
  CStringA szSubData;
  //行长度
  int iLineLen = (int)strlen(chLine);
    
  //解析子项
  for(int iPos=0; iPos &szDataArray)
{
  FILE *fp = NULL;
  if((fp = _tfopen(szFile, _T("rt"))) != NULL)
  { 
    while(!feof(fp))
    {
      //行字符缓存区
      CHAR chLine[8192];
      memset(chLine, 0, sizeof(chLine));

      //读行
      if(fgets(chLine, sizeof(chLine), fp) == NULL)
        break;
      
      //解析行
      CStringArray szDataLines;
      if(AnalysCSVDataLine(chLine, szDataLines) > 0)
      {
        //追加行
        CStringArray *pStrA = new CStringArray;
        pStrA->Copy(szDataLines);
        szDataArray.Add(pStrA);
      }
    }
    
    //关闭文件
    fclose(fp);
  }  

  return (INT)szDataArray.GetSize();
}


 

//调用示例
void CDlg3Dlg::OnButton1()
{
  //打开文件
  CFileDialog dlg(TRUE, _T(".csv"), NULL, 0, 
    _T("csv files(*.csv)|*.csv||"), this);
  if(dlg.DoModal() == IDOK)
  {
    //解析文件
    CTypedPtrArray sData;
    AnalsysCSVDataFile(dlg.GetPathName(), sData);

    //输出内容
    for(INT_PTR iLine=0; iLineGetSize();
      TRACE(_T("Line%d==>(%d)==>"), iLine, iMaxItem);
      for(INT iItem=0; iItemGetAt(iItem);
        TRACE(_T("[%s]"), sItem);
      }
      TRACE(_T("\n"));
    }

    //结束清理
    while(sData.GetSize() > 0)
    {
      delete sData.GetAt(0);
      sData.RemoveAt(0);
    }
  }
}

 

//测试用csv文件内容

基于状态机的 CSV 文件数据解析_第1张图片

1,"""2""","3,ABC",4,"""""",",",""",""",,"aaa"",""bbb"
1,"""2""","3,ABC",4,"""""",",",""",""",,"aaa""??bbb"""
1,"""2""","3,ABC",4,"""""",",",""",""",,"aaa""??bbb"""

//调试输出

Line0==>(9)==>[1]["2"][3,ABC][4][""][,][","][][aaa","bbb]
Line1==>(9)==>[1]["2"][3,ABC][4][""][,][","][][aaa"??bbb"]
Line2==>(9)==>[1]["2"][3,ABC][4][""][,][","][][aaa"??bbb"]

 

你可能感兴趣的:(对话框/属性页,算法,系统/线程/调试)