#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include <Windows.h>
#include <string.h>
#include "SplitDll.h"
#include "ICTCLAS50.h"
//#pragma comment(lib, "ICTCLAS50.lib")
//入口地址
typedef bool (*mICTCLAS_Init)(const char* pszInitDir);
typedef int (*mICTCLAS_ParagraphProcess)(const char* sSentence,int nPaLen,char* pszResult, eCodeType Type,bool nPos);
typedef unsigned int (*mICTCLAS_ImportUserDict)(const char* pszDictBuffer, const int nLength, eCodeType codeType);
typedef LPICTCLAS_RESULT (*mICTCLAS_Para)(const char *sSentence,int nPaLen,int &rstCount, eCodeType mCode , bool bSet);
typedef bool (*mICTCLAS_SetPOSmap)(int nPOSmap);
typedef bool (*mICTCLAS_ResultFree)(LPICTCLAS_RESULT pRetVec);
typedef bool (*mICTCLAS_Exit)();
DICT_API int SplitPara( const char* strSrc, char* StrDes, bool bPos )
{
HINSTANCE hInstLibrary = LoadLibrary(L"ICTCLAS50.dll");
if (hInstLibrary == NULL)
{
FreeLibrary(hInstLibrary);
}
mICTCLAS_Init mmICTCLAS_Init;
mICTCLAS_ParagraphProcess mmICTCLAS_ParagraphProcess;
mICTCLAS_Exit mmICTCLAS_Exit;
mmICTCLAS_Init = (mICTCLAS_Init)GetProcAddress(hInstLibrary, "ICTCLAS_Init");
mmICTCLAS_ParagraphProcess = (mICTCLAS_ParagraphProcess)GetProcAddress(hInstLibrary, "ICTCLAS_ParagraphProcess");
mmICTCLAS_Exit = (mICTCLAS_Exit)GetProcAddress(hInstLibrary, "ICTCLAS_Exit");
if (!mmICTCLAS_Init(NULL))
{
printf("初始化错误");
return -1;
}
unsigned int nLen = strlen(strSrc);
char* Strtmp = 0;
Strtmp = (char*)malloc(nLen * 6);
int WordCount = mmICTCLAS_ParagraphProcess(strSrc, nLen, StrDes, CODE_TYPE_UTF8, bPos);
mmICTCLAS_Exit();
FreeLibrary(hInstLibrary);//释放资源!
return WordCount;
}
DICT_API int UpdataUserDict( const char* AddDict )
{
HINSTANCE hInstLibrary = LoadLibrary(L"ICTCLAS50.dll");
if (hInstLibrary == NULL)
{
FreeLibrary(hInstLibrary);
}
mICTCLAS_Init mmICTCLAS_Init;
mICTCLAS_ImportUserDict mmICTCLAS_ImportUserDict;
mICTCLAS_Exit mmICTCLAS_Exit;
mmICTCLAS_Init = (mICTCLAS_Init)GetProcAddress(hInstLibrary, "ICTCLAS_Init");
mmICTCLAS_ImportUserDict = (mICTCLAS_ImportUserDict)GetProcAddress(hInstLibrary, "ICTCLAS_ImportUserDict");
mmICTCLAS_Exit = (mICTCLAS_Exit)GetProcAddress(hInstLibrary, "ICTCLAS_Exit");
if (!mmICTCLAS_Init(NULL))
{
return -1;
}
return mmICTCLAS_ImportUserDict(AddDict, strlen(AddDict), CODE_TYPE_UNKNOWN);
mmICTCLAS_Exit();
FreeLibrary(hInstLibrary);
}
//DLL改为动态调用
DICT_API ResultInfo* ProcessSenstence( const char* strSrc )
{
HINSTANCE hInstLibrary = LoadLibrary(L"ICTCLAS50.dll");
if (hInstLibrary == NULL)
{
FreeLibrary(hInstLibrary);
return NULL;
}
mICTCLAS_Init mmICTCLAS_Init;
mICTCLAS_Para mmICTCLAS_Para;
mICTCLAS_SetPOSmap mmICTCLAS_SetPOSmap;
mICTCLAS_ResultFree mmICTCLAS_ResultFree;
mICTCLAS_Exit mmICTCLAS_Exit;
mmICTCLAS_Init = (mICTCLAS_Init)GetProcAddress(hInstLibrary, "ICTCLAS_Init");
mmICTCLAS_Para = (mICTCLAS_Para)GetProcAddress(hInstLibrary, "ICTCLAS_ParagraphProcessA");
mmICTCLAS_SetPOSmap = (mICTCLAS_SetPOSmap)GetProcAddress(hInstLibrary, "ICTCLAS_SetPOSmap");
mmICTCLAS_ResultFree = (mICTCLAS_ResultFree)GetProcAddress(hInstLibrary, "ICTCLAS_ResultFree");
mmICTCLAS_Exit = (mICTCLAS_Exit)GetProcAddress(hInstLibrary, "ICTCLAS_Exit");
if (!mmICTCLAS_Init(NULL))
{
return NULL;
}//初始化
int nPaLen=strlen(strSrc);
int rstCount=0;
mmICTCLAS_SetPOSmap(ICT_POS_MAP_SECOND);
LPICTCLAS_RESULT stVec=mmICTCLAS_Para(strSrc, nPaLen,rstCount,CODE_TYPE_UNKNOWN,1);
/************************************************************************/
/* 0814modified,返回输出数组 */
/************************************************************************/
ResultInfo* mResult = (ResultInfo*)malloc(sizeof(ResultInfo));
if (mResult == NULL)
return NULL;
int NListSize_Pre = 0, NListSize_Non = 0, VListSize = 0, VNListSize = 0;
//中间结构体,其中名词分两种,NWordList_Pre专有名词,地名,人名 NWordList_Non一般名词
pWord NWordList_Pre = (pWord)malloc(sizeof(Word));
pWord NWordList_Non = (pWord)malloc(sizeof(Word));
pWord VWordList = (pWord)malloc(sizeof(Word));
pWord VNWordList = (pWord)malloc(sizeof(Word));
if (NWordList_Pre == NULL && NWordList_Non == NULL && VWordList == NULL && VNWordList ==NULL)
return NULL;
//中间结构体,字符串预分配
NWordList_Pre->sWord = (char*)calloc(nPaLen, sizeof(char));
NWordList_Non->sWord = (char*)calloc(nPaLen, sizeof(char));
VWordList->sWord = (char*)calloc(nPaLen, sizeof(char));
VNWordList->sWord = (char*)calloc(nPaLen, sizeof(char));
//判断最终struct中是否为空
bool bN = false, bV = false, bVN = false;
//分词结构体的大小,分词结果LPICTCLAS_RESULT结构详见头文件
for (int i = 0; i < rstCount; i++)
{
static LPICTCLAS_RESULT lastWordInfo = &stVec[0];
if (stVec[i].iPOS == 24 || stVec[i].iPOS == 29 || stVec[i].iPOS == 27|| stVec[i].iPOS == 32|| stVec[i].iPOS == 21 )
{
if (stVec[i].iPOS != 21)//非n
{
//NListSize_Pre += (stVec[i].iLength + 1);
//处理非nlist
char* pAddWord = (char*)malloc(stVec[i].iLength+2);
memcpy(pAddWord, &strSrc[stVec[i].iStartPos], stVec[i].iLength);
pAddWord[stVec[i].iLength] = ' ';
pAddWord[stVec[i].iLength + 1] = '\0';
//printf("%s\n", pAddWord);
//NWordList_Pre->WordSize = NListSize_Pre;
if(strstr(NWordList_Pre->sWord, pAddWord) == NULL)
strcat(NWordList_Pre->sWord, pAddWord);
NWordList_Pre->WordSize = strlen(NWordList_Pre->sWord);
free(pAddWord);
bN = true;
//printf("%s\n", NWordList_Pre->sWord);
}
else//n
{
//NListSize_Non += (stVec[i].iLength + 1);
//处理n
char* pAddWord = (char*)malloc(stVec[i].iLength+2);
memcpy(pAddWord, &strSrc[stVec[i].iStartPos], stVec[i].iLength);
pAddWord[stVec[i].iLength] = ' ';
pAddWord[stVec[i].iLength + 1] = '\0';
//printf("%s\n", pAddWord);
//NWordList_Non->WordSize = NListSize_Non;
if(strstr(NWordList_Non->sWord, pAddWord) == NULL)
strcat(NWordList_Non->sWord, pAddWord);
NWordList_Non->WordSize = strlen(NWordList_Non->sWord);
free(pAddWord);
bN = true;
// printf("%s\n", NWordList_Non->sWord);
//处理V+N的情况
if (lastWordInfo->iPOS == 68 || lastWordInfo->iPOS == 74 ||lastWordInfo->iPOS == 2)
{
//VNListSize +=(lastWordInfo->iLength + stVec[i].iLength + 1);
int tmpLen = lastWordInfo->iLength + stVec[i].iLength;
char* pAddWord = (char*)malloc(tmpLen + 2);
memcpy(pAddWord, &strSrc[lastWordInfo->iStartPos], tmpLen);
pAddWord[tmpLen] = ' ';
pAddWord[tmpLen+1] = '\0';
// printf("%s\n", pAddWord);
//VNWordList->WordSize = VNListSize;
if(strstr(VNWordList->sWord, pAddWord) == NULL)
strcat(VNWordList->sWord, pAddWord);
VNWordList->WordSize = strlen(VNWordList->sWord);
free(pAddWord);
bVN = true;
//printf("%s\n", VNWordList->sWord);
}
}
}
if (stVec[i].iPOS == 74 ||stVec[i].iPOS == 72 ||stVec[i].iPOS == 68)
{
//VListSize += (stVec[i].iLength + 1);
char* pAddWord = (char*)malloc(stVec[i].iLength+2);
memcpy(pAddWord, &strSrc[stVec[i].iStartPos], stVec[i].iLength);
pAddWord[stVec[i].iLength] = ' ';
pAddWord[stVec[i].iLength + 1] = '\0';
//printf("%s\n", pAddWord);
//VWordList->WordSize = VListSize;
if(strstr(VWordList->sWord, pAddWord) == NULL)
strcat(VWordList->sWord, pAddWord);
VWordList->WordSize = strlen(VWordList->sWord);
free(pAddWord);
bV = true;
//printf("%s\n", VWordList->sWord);
}
lastWordInfo = &stVec[i];
if (i == rstCount - 1)//最终结果
{
pWord NList = (pWord)malloc(sizeof(Word));
NList->sWord = (char*)calloc(2 * nPaLen,sizeof(char));
strcat(strcat(NList->sWord, NWordList_Pre->sWord), NWordList_Non->sWord);
strcat(strcat(NList->sWord, NWordList_Pre->sWord), NWordList_Non->sWord);
free(NWordList_Pre->sWord);
free(NWordList_Non->sWord);
free(NWordList_Pre);
free(NWordList_Non);
//NList->WordSize = NWordList_Pre->WordSize +NWordList_Non->WordSize;
NList->WordSize = strlen(NList->sWord);
mResult->NrWordList = NList;
mResult->VWordList = VWordList;
mResult->VNWordList = VNWordList;
if(!bN)
{
free(NList->sWord);
free(NList);
mResult->NrWordList = NULL;
}
if(!bV)
{
free(VWordList->sWord);
free(VWordList);
mResult->VWordList = NULL;
}
if(!bVN)
{
free(VNWordList->sWord);
free(VNWordList);
mResult->VNWordList = NULL;
}
}
}
mmICTCLAS_ResultFree(stVec);
mmICTCLAS_Exit();
FreeLibrary(hInstLibrary);
return mResult;
}
DICT_API void FreeProcessResult( ResultInfo* pStruResult )
{
if (pStruResult != NULL)
{
if (pStruResult->NrWordList != NULL)
if (pStruResult->NrWordList->sWord != NULL)
free(pStruResult->NrWordList->sWord);
free(pStruResult->NrWordList);
if (pStruResult->VWordList != NULL)
if(pStruResult->VWordList->sWord != NULL)
free(pStruResult->VWordList->sWord );
free(pStruResult->VWordList);
if (pStruResult->VNWordList != NULL)
if (pStruResult->VNWordList->sWord != NULL)
free(pStruResult->VNWordList->sWord);
free(pStruResult->VNWordList);
free(pStruResult);
}
}
//最终导出来的函数就这三个,注意调用的时候使用动态调用就行了=_=