move some files into base/applications
[reactos.git] / reactos / base / applications / ibrowser / utility / xmlstorage.cpp
1
2 //
3 // XML storage classes
4 //
5 // xmlstorage.cpp
6 //
7 // Copyright (c) 2004, 2005 Martin Fuchs <martin-fuchs@gmx.net>
8 //
9
10
11 /*
12
13 All rights reserved.
14
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright
19 notice, this list of conditions and the following disclaimer.
20 * Redistributions in binary form must reproduce the above copyright
21 notice, this list of conditions and the following disclaimer in
22 the documentation and/or other materials provided with the
23 distribution.
24
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
36
37 */
38
39 //#include "xmlstorage.h"
40 #include <precomp.h>
41
42
43 // work around GCC's wide string constant bug
44 #ifdef __GNUC__
45 const LPCXSSTR XMLStorage::XS_TRUE = XS_TEXT("true");
46 const LPCXSSTR XMLStorage::XS_FALSE = XS_TEXT("false");
47 const LPCXSSTR XMLStorage::XS_NUMBERFMT = XS_TEXT("%d");
48 #endif
49
50
51 namespace XMLStorage {
52
53
54 static std::string unescape(const char* s, char b='"', char e='"')
55 {
56 const char* end = s + strlen(s);
57
58 // if (*s == b)
59 // ++s;
60 //
61 // if (end>s && end[-1]==e)
62 // --end;
63
64 if (*s == b)
65 if (end>s && end[-1]==e)
66 ++s, --end;
67
68 return std::string(s, end-s);
69 }
70
71 static std::string unescape(const char* s, int l, char b='"', char e='"')
72 {
73 const char* end = s + l;
74
75 // if (*s == b)
76 // ++s;
77 //
78 // if (end>s && end[-1]==e)
79 // --end;
80
81 if (*s == b)
82 if (end>s && end[-1]==e)
83 ++s, --end;
84
85 return std::string(s, end-s);
86 }
87
88
89 /// move XPath like to position in XML tree
90 bool XMLPos::go(const char* path)
91 {
92 XMLNode* node = _cur;
93
94 // Is this an absolute path?
95 if (*path == '/') {
96 node = _root;
97 ++path;
98 }
99
100 node = node->find_relative(path);
101
102 if (node) {
103 go_to(node);
104 return true;
105 } else
106 return false;
107 }
108
109 /// move XPath like to position in XML tree
110 bool const_XMLPos::go(const char* path)
111 {
112 const XMLNode* node = _cur;
113
114 // Is this an absolute path?
115 if (*path == '/') {
116 node = _root;
117 ++path;
118 }
119
120 node = node->find_relative(path);
121
122 if (node) {
123 go_to(node);
124 return true;
125 } else
126 return false;
127 }
128
129
130 const XMLNode* XMLNode::find_relative(const char* path) const
131 {
132 const XMLNode* node = this;
133
134 // parse relative path
135 while(*path) {
136 const char* slash = strchr(path, '/');
137 if (slash == path)
138 return NULL;
139
140 int l = slash? slash-path: strlen(path);
141 std::string comp(path, l);
142 path += l;
143
144 // look for [n] and [@attr_name="attr_value"] expressions in path components
145 const char* bracket = strchr(comp.c_str(), '[');
146 l = bracket? bracket-comp.c_str(): comp.length();
147 std::string child_name(comp.c_str(), l);
148 std::string attr_name, attr_value;
149
150 int n = 0;
151 if (bracket) {
152 std::string expr = unescape(bracket, '[', ']');
153 const char* p = expr.c_str();
154
155 n = atoi(p); // read index number
156
157 if (n)
158 n = n - 1; // convert into zero based index
159
160 const char* at = strchr(p, '@');
161
162 if (at) {
163 p = at + 1;
164 const char* equal = strchr(p, '=');
165
166 // read attribute name and value
167 if (equal) {
168 attr_name = unescape(p, equal-p);
169 attr_value = unescape(equal+1);
170 }
171 }
172 }
173
174 if (attr_name.empty())
175 // search n.th child node with specified name
176 node = node->find(child_name, n);
177 else
178 // search n.th child node with specified name and matching attribute value
179 node = node->find(child_name, attr_name, attr_value, n);
180
181 if (!node)
182 return NULL;
183
184 if (*path == '/')
185 ++path;
186 }
187
188 return node;
189 }
190
191 XMLNode* XMLNode::create_relative(const char* path)
192 {
193 XMLNode* node = this;
194
195 // parse relative path
196 while(*path) {
197 const char* slash = strchr(path, '/');
198 if (slash == path)
199 return NULL;
200
201 int l = slash? slash-path: strlen(path);
202 std::string comp(path, l);
203 path += l;
204
205 // look for [n] and [@attr_name="attr_value"] expressions in path components
206 const char* bracket = strchr(comp.c_str(), '[');
207 l = bracket? bracket-comp.c_str(): comp.length();
208 std::string child_name(comp.c_str(), l);
209 std::string attr_name, attr_value;
210
211 int n = 0;
212 if (bracket) {
213 std::string expr = unescape(bracket, '[', ']');
214 const char* p = expr.c_str();
215
216 n = atoi(p); // read index number
217
218 if (n)
219 n = n - 1; // convert into zero based index
220
221 const char* at = strchr(p, '@');
222
223 if (at) {
224 p = at + 1;
225 const char* equal = strchr(p, '=');
226
227 // read attribute name and value
228 if (equal) {
229 attr_name = unescape(p, equal-p);
230 attr_value = unescape(equal+1);
231 }
232 }
233 }
234
235 XMLNode* child;
236
237 if (attr_name.empty())
238 // search n.th child node with specified name
239 child = node->find(child_name, n);
240 else
241 // search n.th child node with specified name and matching attribute value
242 child = node->find(child_name, attr_name, attr_value, n);
243
244 if (!child) {
245 child = new XMLNode(child_name);
246 node->add_child(child);
247
248 if (!attr_name.empty())
249 (*node)[attr_name] = attr_value;
250 }
251
252 node = child;
253
254 if (*path == '/')
255 ++path;
256 }
257
258 return node;
259 }
260
261
262 /// read XML stream into XML tree below _pos
263 XML_Status XMLReaderBase::read()
264 {
265 XML_Status status = XML_STATUS_OK;
266
267 while(status == XML_STATUS_OK) {
268 char* buffer = (char*) XML_GetBuffer(_parser, BUFFER_LEN);
269
270 int l = read_buffer(buffer, BUFFER_LEN);
271 if (l < 0)
272 break;
273
274 status = XML_ParseBuffer(_parser, l, false);
275 }
276
277 if (status != XML_STATUS_ERROR)
278 status = XML_ParseBuffer(_parser, 0, true);
279
280 if (_pos->_children.empty())
281 _pos->_trailing.append(_content);
282 else
283 _pos->_children.back()->_trailing.append(_content);
284
285 _content.erase();
286
287 return status;
288 }
289
290
291 /// store XML version and encoding into XML reader
292 void XMLCALL XMLReaderBase::XML_XmlDeclHandler(void* userData, const XML_Char* version, const XML_Char* encoding, int standalone)
293 {
294 XMLReaderBase* pReader = (XMLReaderBase*) userData;
295
296 if (version)
297 pReader->_xml_version = version;
298
299 if (encoding)
300 pReader->_encoding = encoding;
301 }
302
303 /// notifications about XML start tag
304 void XMLCALL XMLReaderBase::XML_StartElementHandler(void* userData, const XML_Char* name, const XML_Char** atts)
305 {
306 XMLReaderBase* pReader = (XMLReaderBase*) userData;
307 XMLPos& pos = pReader->_pos;
308
309 // search for end of first line
310 const char* s = pReader->_content.c_str();
311 const char* p = s;
312 const char* e = p + pReader->_content.length();
313
314 for(; p<e; ++p)
315 if (*p == '\n') {
316 ++p;
317 break;
318 }
319
320 if (p != s)
321 if (pos->_children.empty()) { // no children in last node?
322 if (pReader->_last_tag == TAG_START)
323 pos->_content.append(s, p-s);
324 else if (pReader->_last_tag == TAG_END)
325 pos->_trailing.append(s, p-s);
326 // else TAG_NONE -> don't store white space in root node
327 } else
328 pos->_children.back()->_trailing.append(s, p-s);
329
330 std::string leading;
331
332 if (p != e)
333 leading.assign(p, e-p);
334
335 XMLNode* node = new XMLNode(String_from_XML_Char(name), leading);
336
337 pos.add_down(node);
338
339 while(*atts) {
340 const XML_Char* attr_name = *atts++;
341 const XML_Char* attr_value = *atts++;
342
343 (*node)[String_from_XML_Char(attr_name)] = String_from_XML_Char(attr_value);
344 }
345
346 pReader->_last_tag = TAG_START;
347 pReader->_content.erase();
348 }
349
350 /// notifications about XML end tag
351 void XMLCALL XMLReaderBase::XML_EndElementHandler(void* userData, const XML_Char* name)
352 {
353 XMLReaderBase* pReader = (XMLReaderBase*) userData;
354 XMLPos& pos = pReader->_pos;
355
356 // search for end of first line
357 const char* s = pReader->_content.c_str();
358 const char* p = s;
359 const char* e = p + pReader->_content.length();
360
361 for(; p<e; ++p)
362 if (*p == '\n') {
363 ++p;
364 break;
365 }
366
367 if (p != s)
368 if (pos->_children.empty()) // no children in current node?
369 pos->_content.append(s, p-s);
370 else
371 if (pReader->_last_tag == TAG_START)
372 pos->_content.append(s, p-s);
373 else
374 pos->_children.back()->_trailing.append(s, p-s);
375
376 if (p != e)
377 pos->_end_leading.assign(p, e-p);
378
379 pos.back();
380
381 pReader->_last_tag = TAG_END;
382 pReader->_content.erase();
383 }
384
385 /// store content, white space and comments
386 void XMLCALL XMLReaderBase::XML_DefaultHandler(void* userData, const XML_Char* s, int len)
387 {
388 XMLReaderBase* pReader = (XMLReaderBase*) userData;
389
390 pReader->_content.append(s, len);
391 }
392
393
394 std::string XMLReaderBase::get_error_string() const
395 {
396 XML_Error error = XML_GetErrorCode(_parser);
397
398 switch(error) {
399 case XML_ERROR_NONE: return "XML_ERROR_NONE";
400 case XML_ERROR_NO_MEMORY: return "XML_ERROR_NO_MEMORY";
401 case XML_ERROR_SYNTAX: return "XML_ERROR_SYNTAX";
402 case XML_ERROR_NO_ELEMENTS: return "XML_ERROR_NO_ELEMENTS";
403 case XML_ERROR_INVALID_TOKEN: return "XML_ERROR_INVALID_TOKEN";
404 case XML_ERROR_UNCLOSED_TOKEN: return "XML_ERROR_UNCLOSED_TOKEN";
405 case XML_ERROR_PARTIAL_CHAR: return "XML_ERROR_PARTIAL_CHAR";
406 case XML_ERROR_TAG_MISMATCH: return "XML_ERROR_TAG_MISMATCH";
407 case XML_ERROR_DUPLICATE_ATTRIBUTE: return "XML_ERROR_DUPLICATE_ATTRIBUTE";
408 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT: return "XML_ERROR_JUNK_AFTER_DOC_ELEMENT";
409 case XML_ERROR_PARAM_ENTITY_REF: return "XML_ERROR_PARAM_ENTITY_REF";
410 case XML_ERROR_UNDEFINED_ENTITY: return "XML_ERROR_UNDEFINED_ENTITY";
411 case XML_ERROR_RECURSIVE_ENTITY_REF: return "XML_ERROR_RECURSIVE_ENTITY_REF";
412 case XML_ERROR_ASYNC_ENTITY: return "XML_ERROR_ASYNC_ENTITY";
413 case XML_ERROR_BAD_CHAR_REF: return "XML_ERROR_BAD_CHAR_REF";
414 case XML_ERROR_BINARY_ENTITY_REF: return "XML_ERROR_BINARY_ENTITY_REF";
415 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF: return "XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF";
416 case XML_ERROR_MISPLACED_XML_PI: return "XML_ERROR_MISPLACED_XML_PI";
417 case XML_ERROR_UNKNOWN_ENCODING: return "XML_ERROR_UNKNOWN_ENCODING";
418 case XML_ERROR_INCORRECT_ENCODING: return "XML_ERROR_INCORRECT_ENCODING";
419 case XML_ERROR_UNCLOSED_CDATA_SECTION: return "XML_ERROR_UNCLOSED_CDATA_SECTION";
420 case XML_ERROR_EXTERNAL_ENTITY_HANDLING: return "XML_ERROR_EXTERNAL_ENTITY_HANDLING";
421 case XML_ERROR_NOT_STANDALONE: return "XML_ERROR_NOT_STANDALONE";
422 case XML_ERROR_UNEXPECTED_STATE: return "XML_ERROR_UNEXPECTED_STATE";
423 case XML_ERROR_ENTITY_DECLARED_IN_PE: return "XML_ERROR_ENTITY_DECLARED_IN_PE";
424 case XML_ERROR_FEATURE_REQUIRES_XML_DTD: return "XML_ERROR_FEATURE_REQUIRES_XML_DTD";
425 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING: return "XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING";
426 case XML_ERROR_UNBOUND_PREFIX: return "XML_ERROR_UNBOUND_PREFIX";
427 // EXPAT version >= 1.95.8
428 #if XML_MAJOR_VERSION>1 || (XML_MAJOR_VERSION==1 && XML_MINOR_VERSION>95) || (XML_MAJOR_VERSION==1 && XML_MINOR_VERSION==95 && XML_MICRO_VERSION>7)
429 case XML_ERROR_UNDECLARING_PREFIX: return "XML_ERROR_UNDECLARING_PREFIX";
430 case XML_ERROR_INCOMPLETE_PE: return "XML_ERROR_INCOMPLETE_PE";
431 case XML_ERROR_XML_DECL: return "XML_ERROR_XML_DECL";
432 case XML_ERROR_TEXT_DECL: return "XML_ERROR_TEXT_DECL";
433 case XML_ERROR_PUBLICID: return "XML_ERROR_PUBLICID";
434 case XML_ERROR_SUSPENDED: return "XML_ERROR_SUSPENDED";
435 case XML_ERROR_NOT_SUSPENDED: return "XML_ERROR_NOT_SUSPENDED";
436 case XML_ERROR_ABORTED: return "XML_ERROR_ABORTED";
437 case XML_ERROR_FINISHED: return "XML_ERROR_FINISHED";
438 case XML_ERROR_SUSPEND_PE: return "XML_ERROR_SUSPEND_PE";
439 //#endif
440 //#if XML_MAJOR_VERSION>=2
441 /* Added in 2.0. */
442 case XML_ERROR_RESERVED_PREFIX_XML: return "XML_ERROR_RESERVED_PREFIX_XML";
443 case XML_ERROR_RESERVED_PREFIX_XMLNS: return "XML_ERROR_RESERVED_PREFIX_XMLNS";
444 case XML_ERROR_RESERVED_NAMESPACE_URI: return "XML_ERROR_RESERVED_NAMESPACE_URI";
445 #endif
446 }
447
448 std::ostringstream out;
449
450 out << "XML parser error #" << error;
451
452 return out.str();
453 }
454
455
456 std::string EncodeXMLString(const XS_String& str)
457 {
458 LPCXSSTR s = str.c_str();
459 LPXSSTR buffer = (LPXSSTR)alloca(5*sizeof(XS_CHAR)*XS_len(s)); // worst case. "&amp;"
460 LPXSSTR o = buffer;
461
462 for(LPCXSSTR p=s; *p; ++p)
463 switch(*p) {
464 case '&':
465 *o++ = '&'; *o++ = 'a'; *o++ = 'm'; *o++ = 'p'; *o++ = ';';
466 break;
467
468 case '<':
469 *o++ = '&'; *o++ = 'l'; *o++ = 't'; *o++ = ';';
470 break;
471
472 case '>':
473 *o++ = '&'; *o++ = 'g'; *o++ = 't'; *o++ = ';';
474 break;
475
476 case '"':
477 *o++ = '&'; *o++ = 'q'; *o++ = 'u'; *o++ = 'o'; *o++ = 't'; *o++ = ';';
478 break;
479
480 case '\'':
481 *o++ = '&'; *o++ = 'a'; *o++ = 'p'; *o++ = 'o'; *o++ = 's'; *o++ = ';';
482 break;
483
484 default:
485 *o++ = *p;
486 }
487
488 #ifdef XS_STRING_UTF8
489 return XS_String(buffer, o-buffer);
490 #else
491 return get_utf8(buffer, o-buffer);
492 #endif
493 }
494
495 XS_String DecodeXMLString(const XS_String& str)
496 {
497 LPCXSSTR s = str.c_str();
498 LPXSSTR buffer = (LPXSSTR)alloca(sizeof(XS_CHAR)*XS_len(s));
499 LPXSSTR o = buffer;
500
501 for(LPCXSSTR p=s; *p; ++p)
502 if (*p == '&') {
503 if (!XS_nicmp(p+1, XS_TEXT("lt;"), 3)) {
504 *o++ = '<';
505 p += 3;
506 } else if (!XS_nicmp(p+1, XS_TEXT("gt;"), 3)) {
507 *o++ = '>';
508 p += 3;
509 } else if (!XS_nicmp(p+1, XS_TEXT("amp;"), 4)) {
510 *o++ = '&';
511 p += 4;
512 } else if (!XS_nicmp(p+1, XS_TEXT("quot;"), 5)) {
513 *o++ = '"';
514 p += 5;
515 } else if (!XS_nicmp(p+1, XS_TEXT("apos;"), 5)) {
516 *o++ = '\'';
517 p += 5;
518 } else
519 *o++ = *p;
520 } else
521 *o++ = *p;
522
523 return XS_String(buffer, o-buffer);
524 }
525
526
527 /// write node with children tree to output stream using original white space
528 void XMLNode::write_worker(std::ostream& out, int indent) const
529 {
530 out << _leading << '<' << EncodeXMLString(*this);
531
532 for(AttributeMap::const_iterator it=_attributes.begin(); it!=_attributes.end(); ++it)
533 out << ' ' << EncodeXMLString(it->first) << "=\"" << EncodeXMLString(it->second) << "\"";
534
535 if (!_children.empty() || !_content.empty()) {
536 out << '>' << _content;
537
538 for(Children::const_iterator it=_children.begin(); it!=_children.end(); ++it)
539 (*it)->write_worker(out, indent+1);
540
541 out << _end_leading << "</" << EncodeXMLString(*this) << '>';
542 } else
543 out << "/>";
544
545 out << _trailing;
546 }
547
548
549 /// pretty print node with children tree to output stream
550 void XMLNode::pretty_write_worker(std::ostream& out, int indent) const
551 {
552 for(int i=indent; i--; )
553 out << XML_INDENT_SPACE;
554
555 out << '<' << EncodeXMLString(*this);
556
557 for(AttributeMap::const_iterator it=_attributes.begin(); it!=_attributes.end(); ++it)
558 out << ' ' << EncodeXMLString(it->first) << "=\"" << EncodeXMLString(it->second) << "\"";
559
560 if (!_children.empty() || !_content.empty()) {
561 out << ">\n";
562
563 for(Children::const_iterator it=_children.begin(); it!=_children.end(); ++it)
564 (*it)->pretty_write_worker(out, indent+1);
565
566 for(int i=indent; i--; )
567 out << XML_INDENT_SPACE;
568
569 out << "</" << EncodeXMLString(*this) << ">\n";
570 } else
571 out << "/>\n";
572 }
573
574
575 /// write node with children tree to output stream using smart formating
576 void XMLNode::smart_write_worker(std::ostream& out, int indent) const
577 {
578 if (_leading.empty())
579 for(int i=indent; i--; )
580 out << XML_INDENT_SPACE;
581 else
582 out << _leading;
583
584 out << '<' << EncodeXMLString(*this);
585
586 for(AttributeMap::const_iterator it=_attributes.begin(); it!=_attributes.end(); ++it)
587 out << ' ' << EncodeXMLString(it->first) << "=\"" << EncodeXMLString(it->second) << "\"";
588
589 if (_children.empty() && _content.empty())
590 out << "/>";
591 else {
592 out << '>';
593
594 if (_content.empty())
595 out << '\n';
596 else
597 out << _content;
598
599 Children::const_iterator it = _children.begin();
600
601 if (it != _children.end()) {
602 for(; it!=_children.end(); ++it)
603 (*it)->smart_write_worker(out, indent+1);
604
605 if (_end_leading.empty())
606 for(int i=indent; i--; )
607 out << XML_INDENT_SPACE;
608 else
609 out << _end_leading;
610 } else
611 out << _end_leading;
612
613 out << "</" << EncodeXMLString(*this) << '>';
614 }
615
616 if (_trailing.empty())
617 out << '\n';
618 else
619 out << _trailing;
620 }
621
622
623 } // namespace XMLStorage