分词DLL导出小结

今天整理下,原来写的,用了计算所得那个API,DLL的任务有三个
  1. 输入句子,导出结构体,结构体包含了句子中的专有名词,动词,动宾结构
  2. 对句子进行分词
  3. 添加词典
DLL要使用动态调用来实现,这样内存才不会一直增长。DLL调用的时候调用了DLL,也算是DLL嵌套调用
#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include <Windows.h>
#include <string.h>
#include "SplitDll.h"
#include "ICTCLAS50.h"
//#pragma comment(lib, "ICTCLAS50.lib")

//入口地址
typedef bool (*mICTCLAS_Init)(const char* pszInitDir);
typedef int (*mICTCLAS_ParagraphProcess)(const char* sSentence,int nPaLen,char*	pszResult, eCodeType Type,bool	nPos);
typedef unsigned int (*mICTCLAS_ImportUserDict)(const char* pszDictBuffer, const int nLength, eCodeType codeType);
typedef LPICTCLAS_RESULT (*mICTCLAS_Para)(const char *sSentence,int nPaLen,int &rstCount, eCodeType mCode , bool bSet);
typedef bool (*mICTCLAS_SetPOSmap)(int nPOSmap);
typedef bool (*mICTCLAS_ResultFree)(LPICTCLAS_RESULT pRetVec);
typedef bool (*mICTCLAS_Exit)();

DICT_API int SplitPara( const char* strSrc, char* StrDes, bool bPos )
{

  HINSTANCE hInstLibrary = LoadLibrary(L"ICTCLAS50.dll");
  if (hInstLibrary == NULL)
  {
    FreeLibrary(hInstLibrary);
  }
  mICTCLAS_Init mmICTCLAS_Init;
  mICTCLAS_ParagraphProcess mmICTCLAS_ParagraphProcess; 
  mICTCLAS_Exit mmICTCLAS_Exit;
  mmICTCLAS_Init = (mICTCLAS_Init)GetProcAddress(hInstLibrary, "ICTCLAS_Init");
  mmICTCLAS_ParagraphProcess = (mICTCLAS_ParagraphProcess)GetProcAddress(hInstLibrary, "ICTCLAS_ParagraphProcess");
  mmICTCLAS_Exit = (mICTCLAS_Exit)GetProcAddress(hInstLibrary, "ICTCLAS_Exit");
  if (!mmICTCLAS_Init(NULL))
  {
    printf("初始化错误");
    return -1;
  }
  unsigned int nLen = strlen(strSrc);
  char* Strtmp = 0;
  Strtmp = (char*)malloc(nLen * 6);
  int WordCount = mmICTCLAS_ParagraphProcess(strSrc, nLen, StrDes, CODE_TYPE_UTF8, bPos);
  mmICTCLAS_Exit();
  FreeLibrary(hInstLibrary);//释放资源!
  return WordCount;

}

DICT_API int UpdataUserDict( const char* AddDict )
{

  HINSTANCE hInstLibrary = LoadLibrary(L"ICTCLAS50.dll");
  if (hInstLibrary == NULL)
  {
    FreeLibrary(hInstLibrary);
  }
  mICTCLAS_Init mmICTCLAS_Init;
  mICTCLAS_ImportUserDict mmICTCLAS_ImportUserDict; 
  mICTCLAS_Exit mmICTCLAS_Exit;
  mmICTCLAS_Init = (mICTCLAS_Init)GetProcAddress(hInstLibrary, "ICTCLAS_Init");
  mmICTCLAS_ImportUserDict = (mICTCLAS_ImportUserDict)GetProcAddress(hInstLibrary, "ICTCLAS_ImportUserDict");
  mmICTCLAS_Exit = (mICTCLAS_Exit)GetProcAddress(hInstLibrary, "ICTCLAS_Exit");

  if (!mmICTCLAS_Init(NULL))
  {
    return -1;
  }
  return mmICTCLAS_ImportUserDict(AddDict, strlen(AddDict), CODE_TYPE_UNKNOWN);
  mmICTCLAS_Exit();
  FreeLibrary(hInstLibrary);

}

//DLL改为动态调用
DICT_API ResultInfo* ProcessSenstence( const char* strSrc )
{
  HINSTANCE hInstLibrary = LoadLibrary(L"ICTCLAS50.dll");
  if (hInstLibrary == NULL)
  {
    FreeLibrary(hInstLibrary);
    return NULL;
  }
  mICTCLAS_Init mmICTCLAS_Init;
  mICTCLAS_Para mmICTCLAS_Para;
  mICTCLAS_SetPOSmap mmICTCLAS_SetPOSmap;
  mICTCLAS_ResultFree mmICTCLAS_ResultFree;
  mICTCLAS_Exit mmICTCLAS_Exit;
  mmICTCLAS_Init = (mICTCLAS_Init)GetProcAddress(hInstLibrary, "ICTCLAS_Init");
  mmICTCLAS_Para = (mICTCLAS_Para)GetProcAddress(hInstLibrary, "ICTCLAS_ParagraphProcessA");
  mmICTCLAS_SetPOSmap = (mICTCLAS_SetPOSmap)GetProcAddress(hInstLibrary, "ICTCLAS_SetPOSmap");
  mmICTCLAS_ResultFree = (mICTCLAS_ResultFree)GetProcAddress(hInstLibrary, "ICTCLAS_ResultFree");
  mmICTCLAS_Exit = (mICTCLAS_Exit)GetProcAddress(hInstLibrary, "ICTCLAS_Exit");

  if (!mmICTCLAS_Init(NULL))
  {
    return NULL;
  }//初始化
  int nPaLen=strlen(strSrc);    
  int rstCount=0;
  mmICTCLAS_SetPOSmap(ICT_POS_MAP_SECOND);
  LPICTCLAS_RESULT stVec=mmICTCLAS_Para(strSrc, nPaLen,rstCount,CODE_TYPE_UNKNOWN,1);
  /************************************************************************/
  /* 0814modified,返回输出数组                                              */
  /************************************************************************/
  ResultInfo* mResult = (ResultInfo*)malloc(sizeof(ResultInfo));
  if (mResult == NULL)
    return NULL;  
  int NListSize_Pre = 0, NListSize_Non = 0,  VListSize = 0, VNListSize = 0;
  //中间结构体,其中名词分两种,NWordList_Pre专有名词,地名,人名 NWordList_Non一般名词
  pWord NWordList_Pre = (pWord)malloc(sizeof(Word));
  pWord NWordList_Non = (pWord)malloc(sizeof(Word));
  pWord VWordList = (pWord)malloc(sizeof(Word));
  pWord VNWordList = (pWord)malloc(sizeof(Word));
  if (NWordList_Pre == NULL && NWordList_Non == NULL && VWordList == NULL && VNWordList ==NULL)
    return NULL;
  //中间结构体,字符串预分配
  NWordList_Pre->sWord = (char*)calloc(nPaLen, sizeof(char));
  NWordList_Non->sWord = (char*)calloc(nPaLen, sizeof(char));
  VWordList->sWord = (char*)calloc(nPaLen, sizeof(char));
  VNWordList->sWord = (char*)calloc(nPaLen, sizeof(char));
  //判断最终struct中是否为空
  bool bN = false, bV = false, bVN = false;
  //分词结构体的大小,分词结果LPICTCLAS_RESULT结构详见头文件
  
  for (int i = 0; i < rstCount; i++)
  {    
    static LPICTCLAS_RESULT lastWordInfo = &stVec[0];
    if (stVec[i].iPOS == 24 || stVec[i].iPOS == 29 ||  stVec[i].iPOS == 27|| stVec[i].iPOS == 32|| stVec[i].iPOS == 21 )
    {

      if (stVec[i].iPOS != 21)//非n
      {
        //NListSize_Pre += (stVec[i].iLength + 1);
        //处理非nlist
        char* pAddWord = (char*)malloc(stVec[i].iLength+2);
        memcpy(pAddWord, &strSrc[stVec[i].iStartPos], stVec[i].iLength);      
        pAddWord[stVec[i].iLength] = ' ';
        pAddWord[stVec[i].iLength + 1] = '\0';    
        //printf("%s\n", pAddWord);
        //NWordList_Pre->WordSize = NListSize_Pre;
        if(strstr(NWordList_Pre->sWord, pAddWord) == NULL)
          strcat(NWordList_Pre->sWord, pAddWord); 
        NWordList_Pre->WordSize = strlen(NWordList_Pre->sWord);
        free(pAddWord);
        bN = true;
        //printf("%s\n", NWordList_Pre->sWord);
      }
      else//n
      {             
        //NListSize_Non += (stVec[i].iLength + 1);    
        //处理n
        char* pAddWord = (char*)malloc(stVec[i].iLength+2);
        memcpy(pAddWord, &strSrc[stVec[i].iStartPos], stVec[i].iLength);
        pAddWord[stVec[i].iLength] = ' ';
        pAddWord[stVec[i].iLength + 1] = '\0';     
        //printf("%s\n", pAddWord);
        //NWordList_Non->WordSize = NListSize_Non;
        if(strstr(NWordList_Non->sWord, pAddWord) == NULL)
          strcat(NWordList_Non->sWord, pAddWord); 
        NWordList_Non->WordSize = strlen(NWordList_Non->sWord);
        free(pAddWord);
        bN = true;
        // printf("%s\n", NWordList_Non->sWord);
        //处理V+N的情况
        if (lastWordInfo->iPOS == 68 || lastWordInfo->iPOS == 74 ||lastWordInfo->iPOS == 2)
        {
          //VNListSize +=(lastWordInfo->iLength + stVec[i].iLength + 1);
          int tmpLen = lastWordInfo->iLength + stVec[i].iLength;
          char* pAddWord = (char*)malloc(tmpLen + 2);
          memcpy(pAddWord, &strSrc[lastWordInfo->iStartPos], tmpLen);
          pAddWord[tmpLen] = ' ';
          pAddWord[tmpLen+1] = '\0';
          // printf("%s\n", pAddWord);
          //VNWordList->WordSize = VNListSize;
          if(strstr(VNWordList->sWord, pAddWord) == NULL)
            strcat(VNWordList->sWord, pAddWord); 
          VNWordList->WordSize = strlen(VNWordList->sWord);
          free(pAddWord);
          bVN = true;
          //printf("%s\n", VNWordList->sWord);
        }
      }
    }

    if (stVec[i].iPOS == 74 ||stVec[i].iPOS == 72 ||stVec[i].iPOS == 68)
    {

      //VListSize += (stVec[i].iLength + 1);
      char* pAddWord = (char*)malloc(stVec[i].iLength+2);
      memcpy(pAddWord, &strSrc[stVec[i].iStartPos], stVec[i].iLength);
      pAddWord[stVec[i].iLength] = ' ';
      pAddWord[stVec[i].iLength + 1] = '\0'; 
      //printf("%s\n", pAddWord);
      //VWordList->WordSize = VListSize;
      if(strstr(VWordList->sWord, pAddWord) == NULL)
        strcat(VWordList->sWord, pAddWord); 
      VWordList->WordSize = strlen(VWordList->sWord);
      free(pAddWord);
      bV = true;
      //printf("%s\n", VWordList->sWord);

    }

    lastWordInfo = &stVec[i];    
    if (i == rstCount - 1)//最终结果
    {

      pWord NList = (pWord)malloc(sizeof(Word));
      NList->sWord = (char*)calloc(2 * nPaLen,sizeof(char));      
      strcat(strcat(NList->sWord, NWordList_Pre->sWord), NWordList_Non->sWord);  
      strcat(strcat(NList->sWord, NWordList_Pre->sWord), NWordList_Non->sWord); 
      free(NWordList_Pre->sWord);
      free(NWordList_Non->sWord);
      free(NWordList_Pre);
      free(NWordList_Non);
      //NList->WordSize = NWordList_Pre->WordSize +NWordList_Non->WordSize;
      NList->WordSize = strlen(NList->sWord);
      mResult->NrWordList = NList;
      mResult->VWordList = VWordList;
      mResult->VNWordList = VNWordList;
      if(!bN)
      {
        free(NList->sWord);
        free(NList);
        mResult->NrWordList = NULL;
      }

      if(!bV)
      {
        free(VWordList->sWord);
        free(VWordList);
        mResult->VWordList = NULL;
      }

      if(!bVN)
      {
        free(VNWordList->sWord);
        free(VNWordList);
        mResult->VNWordList = NULL;
      }       
    }

  }

  mmICTCLAS_ResultFree(stVec);
  mmICTCLAS_Exit();
  FreeLibrary(hInstLibrary);
  return mResult;
  
}

DICT_API void FreeProcessResult( ResultInfo* pStruResult )
{
  if (pStruResult != NULL)
  {
    if (pStruResult->NrWordList != NULL)
      if (pStruResult->NrWordList->sWord != NULL)
        free(pStruResult->NrWordList->sWord);
    free(pStruResult->NrWordList);
    if (pStruResult->VWordList != NULL)
      if(pStruResult->VWordList->sWord != NULL)
        free(pStruResult->VWordList->sWord );
    free(pStruResult->VWordList);
    if (pStruResult->VNWordList != NULL)
      if (pStruResult->VNWordList->sWord != NULL)
        free(pStruResult->VNWordList->sWord);
    free(pStruResult->VNWordList);
    free(pStruResult);

  }
}

//最终导出来的函数就这三个,注意调用的时候使用动态调用就行了=_=

你可能感兴趣的:(api,struct,null,dll,任务)