Daily Report 2012/11/06 陈伯雄(step 7)

  今天完成了中文分词功能,修改了sql建立倒排索引的框架,原来打算建立一个倒排索引,但考虑到数据库存在4个不同的文件类型的表:DOC,PDF,VEDIO,QAPAIR,现在分别针对每个表建立倒排索引,根据用户搜索需求搜索不同的表(效率问题有待商榷)。

 1        List<Result> resultList = new List<Result>();

 2             string connectionString = GetConnectionString();                            //SQL Server链接字符串   

 3             using (SqlConnection connection = new SqlConnection(connectionString))      //SQL链接类的实例化

 4             {

 5                 connection.Open();                                                      //打开数据库

 6                 //建立倒排表

 7                 string sqlstr = "CREATE table index_doc(key varchar(50) primary key, ID varchar(50))";                

 8                 SqlCommand cmd = new SqlCommand();

 9                 cmd.Connection = connection;

10                 cmd.CommandText = sqlstr;

11                 cmd.ExecuteNonQuery();

12                 sqlstr = "CREATE table index_pdf(key varchar(50) primary key, ID varchar(50))";

13                 cmd.CommandText = sqlstr;

14                 cmd.ExecuteNonQuery();

15                 sqlstr = "CREATE table index_vedio(key varchar(50) primary key, ID varchar(50))";

16                 cmd.CommandText = sqlstr;

17                 cmd.ExecuteNonQuery();

18                 sqlstr = "CREATE table index_qapair(key varchar(50) primary key, ID varchar(50))";

19                 cmd.CommandText = sqlstr;

20                 cmd.ExecuteNonQuery();

21 

22                 for (int i = 0; i < 4;i++ )

23                 {

24                     string table = "";

25                     if (i == 0) table = "index_doc";

26                     else if(i == 1) table = "index_pdf";

27                     else if(i == 2) table = "index_vedio";

28                     else table = "index_qapair";

29 

30                     //读取顺序表

31                     sqlstr = "SELECT * FROM" + table;

32                     cmd.CommandText = sqlstr;

33                     SqlDataReader reader = cmd.ExecuteReader();                             //它的返回类型为SqlDataReader。此方法用于用户进行的查询操作。使用SqlDataReader对象的Read();方法进行逐行读取。

34                     try

35                     {

36                         while (reader.Read())

37                         {

38                             string title = reader["title"].ToString();

39                             string keyword = reader["keyword"].ToString();

40                             string ID = reader["ID"].ToString();

41                             //分词处理

42                             //string[] words = getWords(title, keyword);

43                             List<string> words = getWords(title, keyword);          //中文分词详细请看李忠部分。

44                             //将keyword信息添加到倒排表

45                             foreach (string word in words)

46                             {

47                                 //倒排表中加入新关键词                            

48                                 cmd.CommandText = "SELECT value FROM index3 WHERE value = word";

49                                 object val = cmd.ExecuteScalar();

50                                 if (val == System.DBNull.Value)                               //if(cmd.ExecuteScalar() is DBNull)

51                                 {

52                                     cmd.CommandText = "INSERT INTO index3 VALUES(word, ID)";

53                                     cmd.ExecuteNonQuery();

54                                 }

55                                 //倒排索引中存在的关键词,加上属性ID信息

56                                 else

57                                 {

58                                     string newValue = val.ToString() + "," + ID;

59                                     cmd.CommandText = "UPDATE index3 SET value = newValue WHERE key = word";

60                                     cmd.ExecuteNonQuery();

61                                 }

62                             }

63                         }

64                     }

65                     finally

66                     {

67                         // Always call Close when done reading.

68                         reader.Close();

69                     }

70                 }

71 

72                 

73                 //匹配(未完成)

74                 string userWord = "用户搜索的关键词";

75                 //match(userWord);

76                 

77 

78                 connection.Close();        

你可能感兴趣的:(port)