using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Net;
using System.IO;
using System.Xml;
/* 标题:调用新浪分词服务算法
* 作者:贾道远
* 时间:2012/4/13
* 用法:设置sentence为需要分词的文章.然后运行即可
* 返回值结构: 得到每个token的划分位置和词性id.
* 定义:"划分位置"意思是,这个token末尾的位置,比如第一个token是"明天",则划分位置是2,接着第二个是"是",则它的划分位置是3
* */
namespace CSPostData
{
class Program
{
class token {
int position; //划分位置
int id;//词性
string lexicon;//分得的词语
public int getPos(){
return position;
}
public string getLexicon() {
return lexicon;
}
public token(int p, int i,string le) {
position = p; id = i; lexicon = le;
}
}
static void Main(string[] args)
{
Encoding encoding = Encoding.GetEncoding("UTF-8");
Console.WriteLine("股票数据库构造中……");
List<string> stockNames = new List<string>();
List<string> stockIDs = new List<string>();
StreamReader ssr = new StreamReader("stocks.txt", encoding);
string tuple;
while ((tuple = ssr.ReadLine()) != null)
{
int stat = 0;
string sname = "", sid = "";
foreach (char ch in tuple)
{
if (ch == '(')
{
stat = 1;
continue;
}
else if (ch == ')')
{
stat = 0;
continue;
}
switch (stat)
{
case 0:
sname += ch;
break;
case 1:
sid += ch;
break;
}
}
stockNames.Add(sname);
stockIDs.Add(sid);
}
ssr.Close();
Console.WriteLine("成功!");
//对文章分词并且判断涨跌以及抽选股票名称
int fid = 4398;
for(;fid<=6108;fid++){
StreamReader fsr = new StreamReader("data\\"+fid+".txt", encoding);//要分词的文章
Console.Write("请您耐心等待,文章分词中.......\n");
//读取字符串
string sentence = fsr.ReadToEnd();
fsr.Close();
Stream outstream = null;
Stream instream = null;
StreamReader sr = null;
string url = "http://1.caunion.sinaapp.com/a.php";
HttpWebRequest request = null;
HttpWebResponse response = null;
// 准备请求,设置参数
request = WebRequest.Create(url) as HttpWebRequest;
request.Method = "POST";
request.ContentType ="application/x-www-form-urlencoded";
byte[] data = encoding.GetBytes(url + "&sentence="+sentence);
request.ContentLength = data.Length;
outstream = request.GetRequestStream();
outstream.Write(data, 0, data.Length);
outstream.Flush();
outstream.Close();
//发送请求并获取相应回应数据
response = request.GetResponse() as HttpWebResponse;
//直到request.GetResponse()程序才开始向目标网页发送Post请求
instream = response.GetResponseStream();
sr = new StreamReader(instream, encoding);
//返回结果网页(html)代码
string content = sr.ReadToEnd();
sr.Close();
XmlDocument xmlDoc = new XmlDocument();
xmlDoc.LoadXml(content.Trim());
XmlNodeList nodeList= xmlDoc.GetElementsByTagName("w");
List<token> tokens=new List<token>();
for (int j = 0; j < nodeList.Count; j++) {
token tokennow;
XmlNode node1 = nodeList.Item(j);
int start1 = Convert.ToInt32(node1.InnerText), attr1 = Convert.ToInt32(node1.Attributes["t"].Value);
if (j > 0)
{
XmlNode node2 = nodeList.Item(j - 1);
int start2 = Convert.ToInt32(node2.InnerText), attr2 = Convert.ToInt32(node2.Attributes["t"].Value);
string tem = "";
for (int i = start2; i < start1; i++)
{
tem += sentence[i];//构造分词结果
}
tokennow = new token(start1, attr1, tem);
tokens.Add(tokennow);
}
else {
string tem = "";
for (int i = 0; i < start1; i++)
{
tem += sentence[i];//构造分词结果
}
tokennow = new token(start1, attr1, tem);
tokens.Add(tokennow);
}
}
Console.Write("分词结果为:\n\n\n");
//此时tokens储存每个token的位置和词性id以及分词的结果
foreach (token tem in tokens)
{
Console.Write(tem.getLexicon()+"\n");
}
Console.Write("\n\n\n");
//下面对股票走势进行判断,并且提取股票名称
Random ro = new Random();
int iResult;
int iUp = 2851;
iResult = ro.Next(iUp);
string s_name = stockNames[iResult];
Boolean finish = false;
int up = 0, down = 0;
StreamReader fsrup = new StreamReader("up.txt", encoding);//看涨词语
StreamReader fsrdown = new StreamReader("down.txt", encoding);//看跌词语
//构造链表
List<string> upString = new List<string>(),downString = new List<string>();
string temstr1 = fsrup.ReadLine();
do{
upString.Add(temstr1);
temstr1 = fsrup.ReadLine();
}while(temstr1 != null);
fsrup.Close();
string temstr2 = fsrdown.ReadLine();
do{
downString.Add(temstr2);
temstr2 = fsrdown.ReadLine();
}while(temstr2 != null);
fsrdown.Close();
foreach (token tem in tokens) {
//将分得的词和涨数据库做匹配
foreach(string tems in upString){
if(tem.getLexicon() == tems)
up++;
}
//将分得的词和跌数据库做匹配
foreach(string tems in downString){
if(tem.getLexicon() == tems)
down++;
}
//将分得的词和股票数据库做匹配
if (!finish)
{
//股票名称
foreach (string sn in stockNames)
{
if (tem.getLexicon() == sn)
{
s_name = tem.getLexicon();
finish = true;
break;
}
}
//股票id
foreach (string sn in stockNames)
{
if (tem.getLexicon() == sn)
{
s_name = tem.getLexicon();
finish = true;
break;
}
}
}
}
//根据打分情况进行判断
string st_trend = "";
if (up > down)
{
Console.Write("本个股预测结果为 涨\n");
st_trend = "涨";
}
else if (up == down)
{
iResult = ro.Next();
if (iResult % 2 == 0)
{
Console.Write("本个股预测结果为 涨\n");
st_trend = "涨";
}
else {
Console.Write("本个股预测结果为 跌\n");
st_trend = "跌";
}
}
else
{
Console.Write("本个股预测结果为 跌\n");
st_trend = "跌";
}
Console.WriteLine(s_name);
//将信息写入文件中
StreamWriter swr = new StreamWriter("data\\"+fid+".txt",true,encoding );
swr.WriteLine(s_name);
swr.WriteLine(st_trend);
swr.Close();
}
Console.WriteLine("操作完成!");
Console.WriteLine("退出?");
Console.ReadKey();
return ;
}
}
}