/*
***********************************构建倒排表: key=word,val= a list of pairs which consists of articleid ,and count, count=tf************************************************************
*/
int
ConstructMap(map
<
string
,vector
<
pair
<
int
,
int
>>>&
mymap,
int
beginindex,
int
endindex)
{
//
vector<string> mySplit(string s);
set
<
string
>
MakeStopSet();
vector
<
string
>
goodWordsinPieceArticle(
string
rawtext,
set
<
string
>
stopwords);
CoInitialize(NULL);
_ConnectionPtr pConn(__uuidof(Connection));
_RecordsetPtr pRst(__uuidof(Recordset));
char
*
select
=
new
char
[
5000
];
memset(select,
0
,
5000
);
char
*
firstpart
=
"
select CKeyWord,ArticleId,CAbstract from Article where ArticleId between
"
;
char
*
lastpart
=
"
order by ArticleId
"
;
char
middlepart1[
100
];
char
middlepart2[
100
];
sprintf_s(middlepart1,
sizeof
(middlepart1),
"
%d
"
,beginindex);
sprintf_s(middlepart2,
sizeof
(middlepart2),
"
%d
"
,endindex);
strcat(select,firstpart);
strcat(select,middlepart1);
strcat(select,
"
and
"
);
strcat(select,middlepart2);
strcat(select,lastpart);
pConn
->
ConnectionString
=
"
Provider=SQLOLEDB.1;Password=xxxxxx;Persist Security Info=True; User ID=sa;Initial Catalog=ArticleCollection
"
;
pConn
->
Open(
""
,
""
,
""
,adConnectUnspecified);
pRst
=
pConn
->
Execute(select,NULL,adCmdText);
set
<
string
>
stopwords
=
MakeStopSet();
while
(
!
pRst
->
rsEOF)
{ vector
<
string
>
wordcollection;
//
string keywordstr=(_bstr_t)pRst->GetCollect("CKeyWord");
string
rawtext
=
(_bstr_t)pRst
->
GetCollect(
"
CAbstract
"
);
if
(rawtext
!=
""
)
{
wordcollection
=
goodWordsinPieceArticle(rawtext,stopwords);
string
tempid
=
(_bstr_t)pRst
->
GetCollect(
"
ArticleId
"
);
int
articleid
=
atoi(tempid.c_str());
for
(vector
<
string
>
::iterator strit
=
wordcollection.begin();strit
!=
wordcollection.end();strit
++
)
{
vector
<
pair
<
int
,
int
>>
::iterator it;
if
(mymap[
*
strit].empty())
{
pair
<
int
,
int
>
mytemppair
=
make_pair(articleid,
1
);
mymap[
*
strit].push_back(mytemppair);
}
else
{
for
(it
=
mymap[
*
strit].begin();it
!=
mymap[
*
strit].end();it
++
)
{
if
(it
->
first
==
articleid)
{
it
->
second
=++
(it
->
second);
break
;
}
}
if
(it
==
mymap[
*
strit].end())
{
pair
<
int
,
int
>
mytemppair
=
make_pair(articleid,
1
);
mymap[
*
strit].push_back(mytemppair);
}
}
}
}
pRst
->
MoveNext();
wordcollection.clear();
}
pRst
->
Close();
pConn
->
Close();
pRst.Release();
pConn.Release();
CoUninitialize();
delete[] select;
return
0
;
}