int
_tmain(
int
argc, _TCHAR
*
argv[])
{
int
end;
//
set<string>labels;
vector
<
string
>
labelsTrain
=
GetLabels(
"
ReteursTrain
"
);
vector
<
string
>
labelsTest
=
GetLabels(
"
ReteursTest
"
);
vector
<
string
>
finalLabels;
for
(vector
<
string
>
::iterator it
=
labelsTrain.begin();it
!=
labelsTrain.end();it
++
)
{
trim(
*
it,
"
"
);
}
for
(vector
<
string
>
::iterator it
=
labelsTest.begin();it
!=
labelsTest.end();it
++
)
{
trim(
*
it,
"
"
);
}
for
(vector
<
string
>
::iterator it
=
labelsTrain.begin();it
!=
labelsTrain.end();it
++
)
{
if
(count_if(labelsTest.begin(),labelsTest.end(),GT_clss(
*
it)))
{
finalLabels.push_back(
*
it);
}
}
char
*
selectbySpecificId
=
new
char
[
1000
];
memset(selectbySpecificId,
0
,
1000
);
sprintf_s(selectbySpecificId,
1000
,
"
select CArticleName,CAbstract,Categorization from ReteursTest
"
);
CoInitialize(NULL);
_ConnectionPtr pConn(__uuidof(Connection));
_RecordsetPtr pRst(__uuidof(Recordset));
_ConnectionPtr pConn2(__uuidof(Connection));
pConn
->
ConnectionString
=
"
Provider=SQLOLEDB.1;Password=finally;Persist Security Info=True; User ID=sa;Initial Catalog=NewsInfo
"
;
pConn2
->
ConnectionString
=
"
Provider=SQLOLEDB.1;Password=finally;Persist Security Info=True; User ID=sa;Initial Catalog=FinallyCorpus
"
;
pConn
->
Open(
""
,
""
,
""
,adConnectUnspecified);
pConn2
->
Open(
""
,
""
,
""
,adConnectUnspecified);
pRst
=
pConn
->
Execute(selectbySpecificId,NULL,adCmdText);
while
(
!
pRst
->
rsEOF)
{
string
label
=
(_bstr_t)pRst
->
GetCollect(
"
Categorization
"
);
trim(label,
"
"
);
if
(count_if(finalLabels.begin(),finalLabels.end(),GT_clss(label)))
{
string
ArticleTitle
=
(_bstr_t)pRst
->
GetCollect(
"
CArticleName
"
);
string
ArticleText
=
(_bstr_t)pRst
->
GetCollect(
"
CAbstract
"
);
ArticleTitle
=
ProcessforMSSQL(ArticleTitle);
ArticleText
=
ProcessforMSSQL(ArticleText);
char
*
sqlInsert
=
new
char
[
1000000
];
_variant_t RecordsAffected;
memset(sqlInsert,
0
,
1000000
);
sprintf_s(sqlInsert,
1000000
,
"
insert into ReteursTestingCorpus(CArticleName,CAbstract,Categorization) values('%s','%s','%s')
"
,ArticleTitle.c_str(),ArticleText.c_str(),label.c_str());
pConn2
->
Execute(sqlInsert,
&
RecordsAffected,
-
1
);
delete []sqlInsert;
}
pRst
->
MoveNext();
}
pRst
->
Close();
pConn
->
Close();
pRst.Release();
pConn.Release();
pConn2
->
Close();
pConn2.Release();
CoUninitialize();
delete []selectbySpecificId;
cout
<<
"
两标签集交集为
"
<<
endl;
cout
<<
finalLabels.size()
<<
endl;
//
DictionaryToDataBase();
//
FindFile(L"E:\\新闻语料\\reuters21578");
//
pRst=pConn->Execute(,NULL,adCmdText);
cout
<<
"
finish
"
<<
endl;
cin
>>
end;
}