轮子精神:用于分析Lingoes结果的HTML解析对象
先说前面那个“暴力硬编码”,我一直比较反对这种方式,虽然有时真的很好用,但这种解决问题的方式不太符合我编程的习惯--走一步看三步,就是说编写一个对象或者一个函数的时候,要考虑到其未来可能的更改需求,并为可能的需求预留出必要的空间。而硬编码恰恰相反,是以解决当前问题为目标的方法。当然这种方法也没有错误,只是像我这样总是担心未来,悲观的人来说,能不用就不用,因为用了怕睡不着。。。
从十几年前学习HTML4时,就想着写一个HTML解析器,由于认为自己不合适写这种与算法紧密相关的代码,也就一直没有敢动手,怕自己陷进去出不来。这次在分析Lingoes结果时,找不到像tinyxml好用的,轻量的解析库,而硬编码实在有太多的弊端,折腾了几天,最终决定--开始造轮子。。。
基于上面的阐述,就有了下面这个HTML解析对象--TinyHtmlParser。TinyHtmlParser仅仅是分解HTML文档,有些类似tinyxml,但要比其能力差很多,且编码完全没有参考tinyxml,效率也是大问题。此对象对我来说,就是要验证一下是否有能力分解HTML文档,因此功能只是集中在字符串的处理上。TinyHtmlParser由CDocumentObject、CElementObject等简单几个类构成,使用标准且简单的HTML字串测试还算达到预期了,但分析Lingoes的结果时就有些混乱了,对于此问题,不知道是我理解HTML有误,还是浏览器们兼容错误的能力超强。因此后期真对Lingoes的结果做了一些特殊的处理。(真是“源码在手,天下我有”啊。。。)
TinyHtmlParser现在还没有到达可以使用的目标,对我来说,查找某个Tag的value是主要需求,而TinyHtmlParser还没有完成此功能,但这只是时间的问题了。今天着急贴上,是因为要过节了,中间一周不碰电脑,怕自己都忘记现在做什么了,因此记录一下,下次都写好,再慢慢加上为什么会写成这样。
2 #define __TINYHTMLPARSER_H__
3
4 #include < iostream >
5 #include < string >
6 #include < queue >
7 #include < stack >
8
9 namespace TinyHtmlParser
10 {
11
12enum ElementType { ET_UNKNOWN = -1, ET_TAG = 0, ET_NODE, ET_ELEMENT };//0:just a tag, 1:no value, 2:have value
13
14class CAttributeObject
15{
16public:
17 CAttributeObject(const std::string& a, const std::string& v)
18 : attr(a), value(v), next(NULL)
19 {
20 }
21 virtual ~CAttributeObject() {}
22
23public:
24 std::string attr;
25 std::string value;
26 CAttributeObject* next;
27};
28
29class CElementObject
30{
31public:
32 CElementObject();
33 virtual ~CElementObject();
34
35 virtual int Analyse();
36
37 void Show(std::ostream& os) const;
38protected:
39 int AnalyseAttribute(const std::string& attr);
40 int MakeAttribute(const std::string& attr);
41 int MakeAttribute(const std::string& attr, const std::string& value);
42 void FreeAnalyseAttribute();
43 int AnalyseValue();
44public:
45 ElementType type;
46 size_t level;
47 CElementObject* parent;
48 CElementObject* child;
49 CElementObject* sibling;
50
51 CAttributeObject* attrib;
52public:
53 std::string tag;
54 std::string value;
55};
56
57class CParserData
58{
59public:
60 enum DataType { DT_UNKNOWN = -1, DT_TAG = 0, DT_VALUE, DT_END, DT_DONE, DT_TAG_VALUE };
61public:
62 CParserData()
63 : type(DT_UNKNOWN)
64 {
65 }
66 virtual ~CParserData() {}
67
68public:
69 DataType type;
70 size_t start;
71 size_t end;
72 size_t vstart;
73 size_t vend;
74};
75
76class CDocumentObject
77{
78protected:
79 static const char TAG_LT = '<';
80 static const char TAG_GT = '>';
81 static const char TAG_SLASH = '/';
82 static const char TAG_BSLASH = '\\';
83 static const char TAG_AND = '&';
84
85 typedef std::vector<CParserData> TDataVector;
86
87 typedef std::stack<CParserData> TDataStack;
88 struct TNodeData
89 {
90 size_t level;
91 CParserData tag;
92 CParserData value;
93// CParserData end;
94 };
95 typedef std::deque<TNodeData> TNodeQueue;
96public:
97 CDocumentObject();
98 virtual ~CDocumentObject();
99
100 int Load(const std::string& str);
101
102 const CElementObject* Root() const;
103
104 const CElementObject* FindTag() const;
105
106 void Show(std::ostream& os) const;
107protected:
108 int PreProcess(const std::string& str, std::string& html);
109 int PreParser(const std::string& html, TNodeQueue& vct);
110 int Parser(const std::string& html, TNodeQueue& que);
111private:
112 int PreParserLT(const std::string& html, std::string::size_type& pos, CParserData& data);
113 int PushValueData(const CParserData& data, TDataStack& datastack) const;
114 int PushTagData(const std::string& html, const CParserData& data, TDataStack& datatstack, TNodeQueue& nodeque) const;
115
116 int CheckSpecialTag(const std::string& html, const CParserData& data) const;
117 int CheckTag(const std::string& html, const CParserData& tag, const CParserData& end) const;
118 CElementObject* MakeElement(const std::string& html, const TNodeData& node, CElementObject* parent, CElementObject* sibling) const;
119
120 void CDocumentObject::ShowElement(std::ostream& os, const CElementObject* e) const;
121
122 void FreeElement(CElementObject* root);
123private:
124 CElementObject* _root;
125};
126
127}
128
129 #endif
130
2 #include " TinyHtmlParser.h "
3
4 namespace TinyHtmlParser
5 {
6
7CElementObject::CElementObject()
8: type(ET_UNKNOWN)
9, level(0)
10, parent(NULL)
11, child(NULL)
12, sibling(NULL)
13, attrib(NULL)
14{
15}
16
17CElementObject::~CElementObject()
18{
19 FreeAnalyseAttribute();
20}
21
22int CElementObject::Analyse()
23{
24 std::string str = tag;
25
26 std::string::size_type pos = str.find(" ");
27 if(pos != std::string::npos)
28 {
29 tag = str.substr(0, pos + 1);
30
31 str = str.substr(pos + 1);
32 if(AnalyseAttribute(str) != 0)
33 {
34 return -1;
35 }
36 }
37 if(type == ET_ELEMENT)
38 {
39 if(AnalyseValue() != 0)
40 return -1;
41 }
42 return 0;
43}
44
45int CElementObject::AnalyseAttribute(const std::string& attr)
46{
47 if(attr.size() == 0)
48 return 0;
49
50 std::string a, v;
51 std::string::size_type pos = attr.find("="), start = 0;
52 while(pos != std::string::npos)
53 {
54 a = attr.substr(start, pos - start);
55 if(pos == attr.size() - 1)
56 return -1;
57 start = pos + 1;
58 if(attr[pos + 1] == '\"')
59 {
60 pos = attr.find("\"", start + 1);
61 if(pos == std::string::npos)
62 return -1;
63 v = attr.substr(start, pos - start + 1);
64 start = pos + 2;
65 }
66 else
67 {
68 pos = attr.find(" ", start);
69 if(pos == std::string::npos)
70 pos = attr.size() - 1;
71 v = attr.substr(start, pos - start);
72 start = pos + 1;
73 }
74 if(MakeAttribute(a, v) != 0)
75 return -1;
76
77 if(start >= attr.size())
78 break;
79
80 pos = attr.find("=", start);
81 }
82 return 0;
83}
84
85int CElementObject::MakeAttribute(const std::string &attr)
86{
87 std::string::size_type pos = attr.find("=");
88 if(pos == std::string::npos)
89 return -1;
90
91 return MakeAttribute(attr.substr(0, pos), attr.substr(pos));
92}
93
94int CElementObject::MakeAttribute(const std::string &attr, const std::string& value)
95{
96 std::auto_ptr<CAttributeObject> obj(new CAttributeObject(attr, value));//attr.substr(0, pos), attr.substr(pos)));
97
98 if(attrib != NULL)
99 {
100 CAttributeObject* tmp = attrib;
101 while(tmp->next != NULL)
102 tmp = tmp->next;
103 tmp->next = obj.release();
104 }
105 else
106 {
107 attrib = obj.release();
108 }
109 return 0;
110}
111
112
113void CElementObject::FreeAnalyseAttribute()
114{
115 CAttributeObject* tmp = attrib;
116 while(attrib != NULL)
117 {
118 tmp = attrib->next;
119 delete attrib;
120 attrib = tmp;
121 }
122
123}
124
125int CElementObject::AnalyseValue()
126{
127 std::string::size_type pos = this->value.find(" ");
128 while(pos != std::string::npos)
129 {
130 this->value.replace(pos, 6, " ");
131 pos = this->value.find(" ", pos + 1);
132 }
133
134 return 0;
135}
136
137
138void CElementObject::Show(std::ostream& os) const
139{
140 os << "[" << this->level << "]" << "Tag:" << this->tag;
141 if(this->type == ET_ELEMENT)
142 os << " -- value:" << this->value;
143 os << std::endl;
144
145 const CAttributeObject* attr = this->attrib;
146 while(attr != NULL)
147 {
148 os << " attr:" << attr->attr << " -- value:" << attr->value << std::endl;
149 attr = attr->next;
150 }
151 os << std::endl;
152}
153//
154
155CDocumentObject::CDocumentObject()
156: _root(NULL)
157{
158}
159
160CDocumentObject::~CDocumentObject()
161{
162 if(_root != NULL)
163 FreeElement(_root);
164}
165
166int CDocumentObject::Load(const std::string &str)
167{
168 std::string html;
169 if(PreProcess(str, html) != 0)
170 return -1;
171 TNodeQueue que;
172 if(PreParser(html, que) != 0)
173 return -1;
174 if(Parser(html, que) != 0)
175 return -1;
176 return 0;
177}
178
179int CDocumentObject::PreProcess(const std::string& str, std::string& html)
180{
181 bool tag = false;
182 for(std::string::const_iterator it = str.begin(); it != str.end(); ++ it)
183 {
184 if(*it == TAG_LT)
185 {
186 if(tag == true)
187 return -1;
188 tag = true;
189 }
190 else if(*it == TAG_GT)
191 {
192 if(tag == false)
193 return -1;
194 tag = false;
195 }
196 else
197 {
198 if(tag == false)
199 {
200 if(isspace((unsigned char)*it) != 0)
201 continue;
202 }
203 }
204 html += *it;
205 }
206
207 return 0;
208}
209
210int CDocumentObject::PreParser(const std::string& html, CDocumentObject::TNodeQueue& que)
211{
212 std::string::size_type pos = 0;
213
214 if(html.size() == 0)
215 return -1;
216 if(html[pos] != TAG_LT)
217 return -1;
218
219 TDataStack datastack;
220
221 CParserData data;
222
223 while(pos < html.size())
224 {
225 if(html[pos] == TAG_LT)
226 {
227 if(pos > data.start)
228 {
229 data.type = CParserData::DT_VALUE;
230 data.end = pos;
231
232// std::cout << "VALUE - " << html.substr(data.start, data.end - data.start) << std::endl;
233 if(PushValueData(data, datastack) != 0)
234 return -1;
235 }
236
237 if(PreParserLT(html, pos, data) != 0)
238 return -1;
239// std::cout << "TAG - " << html.substr(data.start, data.end - data.start) << std::endl;
240 if(PushTagData(html, data, datastack, que) != 0)
241 return -1;
242
243 ++ pos;
244 data.start = pos;
245 }
246 //else if(html[pos] == TAG_GT || html[pos] == TAG_SLASH)
247 //{
248 // return -1;
249 //}
250 else
251 {
252 ++ pos;
253 }
254// std::cout << (char)html[pos] << std::endl;
255 }
256
257 return 0;
258}
259
260int CDocumentObject::Parser(const std::string& html, CDocumentObject::TNodeQueue& que)
261{
262 CElementObject *pe = NULL, *pp = NULL, *ps = NULL;
263 size_t level = 0;
264 while(que.size()> 0)
265 {
266 const TNodeData &node = que.front();
267 if(level < que.front().level)
268 {
269 pp = pe;
270 ps = NULL;
271 }
272 else if(level == que.front().level)
273 {
274 ps = pe;
275 }
276 else//>
277 {
278 ps = pe;
279 pp = pe->parent;
280 int t = level - que.front().level;
281 while(t > 0)
282 {
283 ps = ps->parent;
284 pp = pp->parent;
285 -- t;
286 }
287 }
288 level = que.front().level;
289
290 pe = MakeElement(html, que.front(), pp, ps);
291
292 if(pe == NULL)
293 return -1;
294
295 que.pop_front();
296 }
297
298 if(pp != NULL)
299 {
300 while(pp->parent != NULL)
301 pp = pp->parent;
302 _root = pp;
303 }
304 else
305 _root = pe;
306
307 return 0;
308}
309
310int CDocumentObject::PreParserLT(const std::string& html, std::string::size_type& pos, CParserData& data)
311{
312 if(pos == html.size() - 1)
313 return -1;
314
315 data.start = pos;
316
317 ++ pos;
318
319 if(html[pos] != TAG_SLASH)
320 {
321 data.type = CParserData::DT_TAG;
322 }
323 else
324 {
325 data.type = CParserData::DT_END;
326 ++ pos;
327 }
328
329 while(pos < html.size())
330 {
331 if(html[pos] == TAG_GT)
332 {
333 if(html[pos - 1] == TAG_SLASH)
334 {
335 data.type = CParserData::DT_DONE;
336 }
337
338 data.end = pos;
339
340 return 0;
341 }
342 else if(html[pos] == TAG_LT)
343 {
344 return -1;
345 }
346
347 ++ pos;
348 }
349
350 return -1;
351}
352
353int CDocumentObject::PushValueData(const TinyHtmlParser::CParserData &data, CDocumentObject::TDataStack &datastack) const
354{
355 if(datastack.size() == 0)
356 return -1;
357 datastack.push(data);
358 return 0;
359}
360
361int CDocumentObject::PushTagData(const std::string& html, const CParserData& data, CDocumentObject::TDataStack& datastack, CDocumentObject::TNodeQueue& nodeque) const
362{
363 if(data.type == CParserData::DT_TAG)
364 {
365 if(CheckSpecialTag(html, data) == 0)
366 {
367 TNodeData node;
368 node.tag = data;
369
370 node.level = datastack.size();
371 nodeque.push_front(node);
372 return 0;
373 }
374
375 if(datastack.size() > 0 && datastack.top().type == CParserData::DT_VALUE)
376 {
377 CParserData data = datastack.top();
378 datastack.pop();
379 if(datastack.top().type != CParserData::DT_TAG)
380 return -1;
381 datastack.top().type = CParserData::DT_TAG_VALUE;
382 datastack.top().vstart = data.start;
383 datastack.top().vend = data.end;
384 }
385
386 datastack.push(data);
387 }
388 else if(data.type == CParserData::DT_END)
389 {
390 if(datastack.size() == 0)
391 return -1;
392
393 TNodeData node;
394 if(datastack.top().type == CParserData::DT_TAG || datastack.top().type == CParserData::DT_TAG_VALUE)
395 {
396 node.tag = datastack.top();
397 datastack.pop();
398 }
399 else if(datastack.top().type == CParserData::DT_VALUE)
400 {
401 node.value = datastack.top();
402
403// std::cout << "value - " << html.substr(node.value.start, node.value.end - node.value.start) << std::endl;
404
405 datastack.pop();
406
407 if(datastack.size() == 0)
408 return -1;
409
410 if(datastack.top().type == CParserData::DT_TAG)
411 {
412 node.tag = datastack.top();
413 }
414 else if(datastack.top().type == CParserData::DT_TAG_VALUE)
415 {
416 node.tag = datastack.top();
417 }
418 else
419 {
420 return -1;
421 }
422
423 //node.tag = datastack.top();
424 //else if(datastack.top().type == CParserData::DT_TAG_VALUE)
425 //{
426 // node.tag = datastack.top();
427 //}
428 datastack.pop();
429 }
430 else
431 {
432// std::cout << "type : " << datastack.top().type << std::endl;
433 return -1;
434 }
435
436 if(CheckTag(html, node.tag, data) != 0)
437 return -1;
438
439 node.level = datastack.size();
440 nodeque.push_front(node);
441 }
442 else if(data.type == CParserData::DT_DONE)
443 {
444 if(datastack.size() > 0 && datastack.top().type == CParserData::DT_VALUE)
445 {
446 CParserData data = datastack.top();
447 datastack.pop();
448 if(datastack.top().type != CParserData::DT_TAG)
449 return -1;
450 datastack.top().type = CParserData::DT_TAG_VALUE;
451 datastack.top().vstart = data.start;
452 datastack.top().vend = data.end;
453 }
454
455// datastack.push(data);
456
457 TNodeData node;
458 node.tag = data;
459
460 node.level = datastack.size();
461 nodeque.push_front(node);
462 }
463 else
464 {
465 return -1;
466 }
467 return 0;
468}
469
470int CDocumentObject::CheckSpecialTag(const std::string& html, const CParserData& data) const
471{
472 std::string tag = html.substr(data.start + 1, data.end - data.start - 1);
473 std::string::size_type pos = tag.find(" ");
474 if(pos != std::string::npos)
475 tag = tag.substr(0, pos);
476
477 if(tag == "IMG")
478 return 0;
479
480 return -1;
481}
482
483int CDocumentObject::CheckTag(const std::string& html, const CParserData& tag, const CParserData& end) const
484{
485 std::string str = html.substr(tag.start + 1, tag.end - tag.start - 1);
486 std::string::size_type pos = str.find(" ");
487 if(pos != std::string::npos)
488 str = str.substr(0, pos);
489
490 if(str != html.substr(end.start + 2, end.end - end.start - 2))
491 {
492// std::cout << "tag : " << str << " -- end : " << html.substr(end.start + 2, end.end - end.start - 2) << std::endl;
493 return -1;
494 }
495 return 0;
496}
497
498CElementObject* CDocumentObject::MakeElement(const std::string& html, const CDocumentObject::TNodeData &node, CElementObject *parent, CElementObject *sibling) const
499{
500 std::auto_ptr<CElementObject> ele(new CElementObject);
501
502 ele->level = node.level;
503
504 if(node.tag.type == CParserData::DT_TAG)
505 {
506 ele->type = ET_NODE;
507 ele->tag = html.substr(node.tag.start + 1, node.tag.end - node.tag.start - 1);
508 }
509 else if(node.tag.type == CParserData::DT_DONE)
510 {
511 ele->type = ET_TAG;
512 ele->tag = html.substr(node.tag.start + 1, node.tag.end - node.tag.start - 2);
513 }
514 else if(node.tag.type == CParserData::DT_TAG_VALUE)
515 {
516 ele->tag = ET_NODE;
517 ele->tag = html.substr(node.tag.start + 1, node.tag.end - node.tag.start - 1);
518 }
519 else
520 return NULL;
521
522 if(node.value.type == CParserData::DT_VALUE)
523 {
524 ele->type = ET_ELEMENT;
525 if(node.tag.type == CParserData::DT_TAG)
526 ele->value = html.substr(node.value.start, node.value.end - node.value.start);
527 else
528 ele->value = html.substr(node.tag.vstart, node.tag.vend - node.tag.vstart) + "%" + html.substr(node.value.start, node.value.end - node.value.start);
529 }
530
531 if(ele->Analyse() != 0)
532 {
533 return NULL;
534 }
535
536 if(parent != NULL)
537 parent->child = ele.get();
538 ele->parent = parent;
539 ele->sibling = sibling;
540
541 //std::cout << "element: tag - " << ele->tag << std::endl;
542
543 return ele.release();
544}
545
546void CDocumentObject::Show(std::ostream &os) const
547{
548 if(_root != NULL)
549 ShowElement(os, _root);
550}
551
552void CDocumentObject::ShowElement(std::ostream& os, const CElementObject* e) const
553{
554 const CElementObject* pe = e, *ps = e->sibling;
555
556 pe->Show(os);
557
558 pe = pe->child;
559 if(pe != NULL)
560 {
561 ShowElement(os, pe);
562 }
563 if(ps != NULL)
564 {
565 ShowElement(os, ps);
566 }
567}
568
569void CDocumentObject::FreeElement(CElementObject* root)
570{
571 CElementObject* pe = root->child, *ps = root->sibling;
572
573// std::cout << "free:" << root->tag << std::endl;
574
575 if(root != NULL)
576 {
577 free(root);
578 root = NULL;
579 }
580
581 if(pe != NULL)
582 {
583 FreeElement(pe);
584 }
585 if(ps != NULL)
586 {
587 FreeElement(ps);
588 }
589}
590
591}
下面是测试代码,其中有直接copy自lingoes的结果串。
2 #include < string >
3
4 #include " TinyHtmlParser.h "
5
6 using namespace TinyHtmlParser;
7
8 int Test()
9 {
10 //std::string str = "<hh></hh><HTML>\n<DOC DFDK = \"DD\"/><BODY> 1 2 3 </BODY><abc><def></def></abc></HTML><TEST>567</TEST>";
11
12 std::string str = \
13"<DIV id=main dir=ltr style=\"FONT-SIZE: 9pt; FONT-FAMILY: Tahoma, Arial; HEIGHT: 100%\">"
14"<DIV id=main_info style=\"DISPLAY: none\"> </DIV>"
15"<DIV id=main_wnd>"
16"<DIV id=lingoes_dictarea></DIV>"
17"<DIV id=dict_E1C27E806399D047822B6650194A3D32 style=\"PADDING-RIGHT: 10px; PADDING-LEFT: 10px; FONT-SIZE: 10.5pt; PADDING-BOTTOM: 0px; WIDTH: 100%; LINE-HEIGHT: 1.2em; PADDING-TOP: 10px; FONT-FAMILY: 'Tahoma'\" groupid=\"2\" dictid=\"E1C27E806399D047822B6650194A3D32\">"
18"<TABLE onselectstart=\"return true\" id=dict_head_E1C27E806399D047822B6650194A3D32 cellSpacing=0 cellPadding=0 border=0>"
19"<TBODY>"
20"<TR>"
21"<TD style=\"BORDER-RIGHT: #92b0dd 1px solid; BORDER-TOP: #92b0dd 1px solid; FONT-SIZE: 9pt; BACKGROUND: #cfddf0; BORDER-LEFT: #92b0dd 1px solid; COLOR: #000080; LINE-HEIGHT: 1em; BORDER-BOTTOM: #92b0dd 1px solid; FONT-FAMILY: ''\" noWrap>"
22"<DIV onmouseup=\"this.className='btn2_mouse_up'\" class=btn2_mouse_out onmousedown=\"this.className='btn2_mouse_down'\" id=dict_title_E1C27E806399D047822B6650194A3D32 onmouseover=\"this.className='btn2_mouse_over'\" title=\"Dictionary Menu\" style=\"MARGIN: 0px 3px 1px 0px\" onclick=\"window.navigate('app://dictmenu/E1C27E806399D047822B6650194A3D32-2')\" onmouseout=\"this.className='btn2_mouse_out'\"><IMG height=16 hspace=3 src=\"file:/**////D:/Profiles/grp436/Local%20Settings/Application%20Data/Lingoes/Translator/temp/dict/E1C27E806399D047822B6650194A3D32/icon.png\" width=16 align=absMiddle border=0><SPAN style=\"PADDING-RIGHT: 4px; PADDING-LEFT: 2px; PADDING-BOTTOM: 0px; PADDING-TOP: 0px\">Vicon English-Chinese(S) Dictionary</SPAN><IMG height=4 src=\"file:///C:/Program%20Files/Lingoes/Translator2/dict/image/menu.png\" width=7 align=absMiddle border=0> </DIV></TD>"
23"<TD style=\"BORDER-BOTTOM: #92b0dd 1px solid\"><IMG style=\"DISPLAY: none\" height=8 hspace=5 src=\"file:/**////C:/Program%20Files/Lingoes/Translator2/dict/image/sst.png\" width=11 align=absMiddle border=0> </TD>"
24"<TD style=\"BORDER-BOTTOM: #92b0dd 1px solid\" align=right width=\"100%\">"
25"<DIV style=\"OVERFLOW: hidden; WIDTH: 10px; CURSOR: hand; MARGIN-RIGHT: 2px; HEIGHT: 10px\"><IMG onmouseup=\"this.style.marginLeft = '-10px'\" onmousedown=\"this.style.marginLeft = '-10px'\" id=dict_show_E1C27E806399D047822B6650194A3D32 onmouseover=\"this.style.marginLeft = '-10px'\" title=\"Minimize Result\" style=\"MARGIN-TOP: 0px; MARGIN-LEFT: 0px\" onclick=\"window.navigate('app://hidemeaning/E1C27E806399D047822B6650194A3D32-2') ; this.style.marginTop = (parseInt(this.style.marginTop) == 0) ? '-10px' : '0px' ;\" onmouseout=\"this.style.marginLeft = '0px'\" height=20 src=\"file:/**////C:/Program%20Files/Lingoes/Translator2/dict/image/expand.png\" width=20 border=0></DIV></TD></TR></TBODY></TABLE>"
26"<DIV id=dict_body_E1C27E806399D047822B6650194A3D32>"
27"<DIV id=dict_gls_E1C27E806399D047822B6650194A3D32>"
28"<DIV style=\"MARGIN: 5px 0px\">"
29"<DIV style=\"WIDTH: 100%\">"
30"<DIV style=\"FLOAT: left; LINE-HEIGHT: normal\"><IMG height=11 src=\"file:/**////C:/Program%20Files/Lingoes/Translator2/dict/image/entry_p.png\" width=10 align=absMiddle border=0> </DIV>"
31"<DIV style=\"OVERFLOW-X: hidden; WIDTH: 100%\">"
32"<DIV style=\"MARGIN: 0px 0px 5px; COLOR: #808080; LINE-HEIGHT: normal\"><SPAN style=\"FONT-SIZE: 10.5pt; COLOR: #000000; LINE-HEIGHT: normal\"><B>what</B></SPAN> <SPAN style=\"FONT-SIZE: 10.5pt; LINE-HEIGHT: normal; FONT-FAMILY: 'Lingoes Unicode'\">[<FONT color=#009900>hwɑt ,hw?t /w?t</FONT>]</SPAN></DIV>"
33"<DIV style=\"MARGIN: 0px 0px 5px\">"
34"<DIV style=\"MARGIN: 4px 0px\"><FONT color=#c00000>adj.</FONT> 什么</DIV></DIV>"
35"<DIV style=\"MARGIN: 0px 0px 5px\">"
36"<DIV style=\"MARGIN: 4px 0px\"><FONT color=#c00000>adv.</FONT> 到什么程度, 在哪一方面</DIV></DIV>"
37"<DIV style=\"MARGIN: 0px 0px 5px\">"
38"<DIV style=\"MARGIN: 4px 0px\"><FONT color=#c00000>interj.</FONT> 怎么</DIV></DIV></DIV></DIV></DIV></DIV></DIV></DIV></DIV></DIV>";
39
40 //std::string str = \
41 // "<1 hello><1-1>[<1-1-1/>]</1-1><1-2/><1-3/></1><2/><3><3-1><3-1-1/></3-1></3>";
42
43 CDocumentObject doc;
44 doc.Load(str);
45
46 doc.Show(std::cout);
47
48 return 0;
49}
50
51
52 int main()
53 {
54 Test();
55 return 0;
56}