7 // Copyright (c) 2004, 2005 Martin Fuchs <martin-fuchs@gmx.net>
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
18 * Redistributions of source code must retain the above copyright
19 notice, this list of conditions and the following disclaimer.
20 * Redistributions in binary form must reproduce the above copyright
21 notice, this list of conditions and the following disclaimer in
22 the documentation and/or other materials provided with the
25 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 POSSIBILITY OF SUCH DAMAGE.
39 //#include "xmlstorage.h"
43 // work around GCC's wide string constant bug
45 const LPCXSSTR
XMLStorage::XS_TRUE
= XS_TEXT("true");
46 const LPCXSSTR
XMLStorage::XS_FALSE
= XS_TEXT("false");
47 const LPCXSSTR
XMLStorage::XS_NUMBERFMT
= XS_TEXT("%d");
51 namespace XMLStorage
{
54 static std::string
unescape(const char* s
, char b
='"', char e
='"')
56 const char* end
= s
+ strlen(s
);
61 // if (end>s && end[-1]==e)
65 if (end
>s
&& end
[-1]==e
)
68 return std::string(s
, end
-s
);
71 static std::string
unescape(const char* s
, int l
, char b
='"', char e
='"')
73 const char* end
= s
+ l
;
78 // if (end>s && end[-1]==e)
82 if (end
>s
&& end
[-1]==e
)
85 return std::string(s
, end
-s
);
89 /// move XPath like to position in XML tree
90 bool XMLPos::go(const char* path
)
94 // Is this an absolute path?
100 node
= node
->find_relative(path
);
109 /// move XPath like to position in XML tree
110 bool const_XMLPos::go(const char* path
)
112 const XMLNode
* node
= _cur
;
114 // Is this an absolute path?
120 node
= node
->find_relative(path
);
130 const XMLNode
* XMLNode::find_relative(const char* path
) const
132 const XMLNode
* node
= this;
134 // parse relative path
136 const char* slash
= strchr(path
, '/');
140 int l
= slash
? slash
-path
: strlen(path
);
141 std::string
comp(path
, l
);
144 // look for [n] and [@attr_name="attr_value"] expressions in path components
145 const char* bracket
= strchr(comp
.c_str(), '[');
146 l
= bracket
? bracket
-comp
.c_str(): comp
.length();
147 std::string
child_name(comp
.c_str(), l
);
148 std::string attr_name
, attr_value
;
152 std::string expr
= unescape(bracket
, '[', ']');
153 const char* p
= expr
.c_str();
155 n
= atoi(p
); // read index number
158 n
= n
- 1; // convert into zero based index
160 const char* at
= strchr(p
, '@');
164 const char* equal
= strchr(p
, '=');
166 // read attribute name and value
168 attr_name
= unescape(p
, equal
-p
);
169 attr_value
= unescape(equal
+1);
174 if (attr_name
.empty())
175 // search n.th child node with specified name
176 node
= node
->find(child_name
, n
);
178 // search n.th child node with specified name and matching attribute value
179 node
= node
->find(child_name
, attr_name
, attr_value
, n
);
191 XMLNode
* XMLNode::create_relative(const char* path
)
193 XMLNode
* node
= this;
195 // parse relative path
197 const char* slash
= strchr(path
, '/');
201 int l
= slash
? slash
-path
: strlen(path
);
202 std::string
comp(path
, l
);
205 // look for [n] and [@attr_name="attr_value"] expressions in path components
206 const char* bracket
= strchr(comp
.c_str(), '[');
207 l
= bracket
? bracket
-comp
.c_str(): comp
.length();
208 std::string
child_name(comp
.c_str(), l
);
209 std::string attr_name
, attr_value
;
213 std::string expr
= unescape(bracket
, '[', ']');
214 const char* p
= expr
.c_str();
216 n
= atoi(p
); // read index number
219 n
= n
- 1; // convert into zero based index
221 const char* at
= strchr(p
, '@');
225 const char* equal
= strchr(p
, '=');
227 // read attribute name and value
229 attr_name
= unescape(p
, equal
-p
);
230 attr_value
= unescape(equal
+1);
237 if (attr_name
.empty())
238 // search n.th child node with specified name
239 child
= node
->find(child_name
, n
);
241 // search n.th child node with specified name and matching attribute value
242 child
= node
->find(child_name
, attr_name
, attr_value
, n
);
245 child
= new XMLNode(child_name
);
246 node
->add_child(child
);
248 if (!attr_name
.empty())
249 (*node
)[attr_name
] = attr_value
;
262 /// read XML stream into XML tree below _pos
263 XML_Status
XMLReaderBase::read()
265 XML_Status status
= XML_STATUS_OK
;
267 while(status
== XML_STATUS_OK
) {
268 char* buffer
= (char*) XML_GetBuffer(_parser
, BUFFER_LEN
);
270 int l
= read_buffer(buffer
, BUFFER_LEN
);
274 status
= XML_ParseBuffer(_parser
, l
, false);
277 if (status
!= XML_STATUS_ERROR
)
278 status
= XML_ParseBuffer(_parser
, 0, true);
280 if (_pos
->_children
.empty())
281 _pos
->_trailing
.append(_content
);
283 _pos
->_children
.back()->_trailing
.append(_content
);
291 /// store XML version and encoding into XML reader
292 void XMLCALL
XMLReaderBase::XML_XmlDeclHandler(void* userData
, const XML_Char
* version
, const XML_Char
* encoding
, int standalone
)
294 XMLReaderBase
* pReader
= (XMLReaderBase
*) userData
;
297 pReader
->_xml_version
= version
;
300 pReader
->_encoding
= encoding
;
303 /// notifications about XML start tag
304 void XMLCALL
XMLReaderBase::XML_StartElementHandler(void* userData
, const XML_Char
* name
, const XML_Char
** atts
)
306 XMLReaderBase
* pReader
= (XMLReaderBase
*) userData
;
307 XMLPos
& pos
= pReader
->_pos
;
309 // search for end of first line
310 const char* s
= pReader
->_content
.c_str();
312 const char* e
= p
+ pReader
->_content
.length();
321 if (pos
->_children
.empty()) { // no children in last node?
322 if (pReader
->_last_tag
== TAG_START
)
323 pos
->_content
.append(s
, p
-s
);
324 else if (pReader
->_last_tag
== TAG_END
)
325 pos
->_trailing
.append(s
, p
-s
);
326 // else TAG_NONE -> don't store white space in root node
328 pos
->_children
.back()->_trailing
.append(s
, p
-s
);
333 leading
.assign(p
, e
-p
);
335 XMLNode
* node
= new XMLNode(String_from_XML_Char(name
), leading
);
340 const XML_Char
* attr_name
= *atts
++;
341 const XML_Char
* attr_value
= *atts
++;
343 (*node
)[String_from_XML_Char(attr_name
)] = String_from_XML_Char(attr_value
);
346 pReader
->_last_tag
= TAG_START
;
347 pReader
->_content
.erase();
350 /// notifications about XML end tag
351 void XMLCALL
XMLReaderBase::XML_EndElementHandler(void* userData
, const XML_Char
* name
)
353 XMLReaderBase
* pReader
= (XMLReaderBase
*) userData
;
354 XMLPos
& pos
= pReader
->_pos
;
356 // search for end of first line
357 const char* s
= pReader
->_content
.c_str();
359 const char* e
= p
+ pReader
->_content
.length();
368 if (pos
->_children
.empty()) // no children in current node?
369 pos
->_content
.append(s
, p
-s
);
371 if (pReader
->_last_tag
== TAG_START
)
372 pos
->_content
.append(s
, p
-s
);
374 pos
->_children
.back()->_trailing
.append(s
, p
-s
);
377 pos
->_end_leading
.assign(p
, e
-p
);
381 pReader
->_last_tag
= TAG_END
;
382 pReader
->_content
.erase();
385 /// store content, white space and comments
386 void XMLCALL
XMLReaderBase::XML_DefaultHandler(void* userData
, const XML_Char
* s
, int len
)
388 XMLReaderBase
* pReader
= (XMLReaderBase
*) userData
;
390 pReader
->_content
.append(s
, len
);
394 std::string
XMLReaderBase::get_error_string() const
396 XML_Error error
= XML_GetErrorCode(_parser
);
399 case XML_ERROR_NONE
: return "XML_ERROR_NONE";
400 case XML_ERROR_NO_MEMORY
: return "XML_ERROR_NO_MEMORY";
401 case XML_ERROR_SYNTAX
: return "XML_ERROR_SYNTAX";
402 case XML_ERROR_NO_ELEMENTS
: return "XML_ERROR_NO_ELEMENTS";
403 case XML_ERROR_INVALID_TOKEN
: return "XML_ERROR_INVALID_TOKEN";
404 case XML_ERROR_UNCLOSED_TOKEN
: return "XML_ERROR_UNCLOSED_TOKEN";
405 case XML_ERROR_PARTIAL_CHAR
: return "XML_ERROR_PARTIAL_CHAR";
406 case XML_ERROR_TAG_MISMATCH
: return "XML_ERROR_TAG_MISMATCH";
407 case XML_ERROR_DUPLICATE_ATTRIBUTE
: return "XML_ERROR_DUPLICATE_ATTRIBUTE";
408 case XML_ERROR_JUNK_AFTER_DOC_ELEMENT
: return "XML_ERROR_JUNK_AFTER_DOC_ELEMENT";
409 case XML_ERROR_PARAM_ENTITY_REF
: return "XML_ERROR_PARAM_ENTITY_REF";
410 case XML_ERROR_UNDEFINED_ENTITY
: return "XML_ERROR_UNDEFINED_ENTITY";
411 case XML_ERROR_RECURSIVE_ENTITY_REF
: return "XML_ERROR_RECURSIVE_ENTITY_REF";
412 case XML_ERROR_ASYNC_ENTITY
: return "XML_ERROR_ASYNC_ENTITY";
413 case XML_ERROR_BAD_CHAR_REF
: return "XML_ERROR_BAD_CHAR_REF";
414 case XML_ERROR_BINARY_ENTITY_REF
: return "XML_ERROR_BINARY_ENTITY_REF";
415 case XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF
: return "XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF";
416 case XML_ERROR_MISPLACED_XML_PI
: return "XML_ERROR_MISPLACED_XML_PI";
417 case XML_ERROR_UNKNOWN_ENCODING
: return "XML_ERROR_UNKNOWN_ENCODING";
418 case XML_ERROR_INCORRECT_ENCODING
: return "XML_ERROR_INCORRECT_ENCODING";
419 case XML_ERROR_UNCLOSED_CDATA_SECTION
: return "XML_ERROR_UNCLOSED_CDATA_SECTION";
420 case XML_ERROR_EXTERNAL_ENTITY_HANDLING
: return "XML_ERROR_EXTERNAL_ENTITY_HANDLING";
421 case XML_ERROR_NOT_STANDALONE
: return "XML_ERROR_NOT_STANDALONE";
422 case XML_ERROR_UNEXPECTED_STATE
: return "XML_ERROR_UNEXPECTED_STATE";
423 case XML_ERROR_ENTITY_DECLARED_IN_PE
: return "XML_ERROR_ENTITY_DECLARED_IN_PE";
424 case XML_ERROR_FEATURE_REQUIRES_XML_DTD
: return "XML_ERROR_FEATURE_REQUIRES_XML_DTD";
425 case XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING
: return "XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING";
426 case XML_ERROR_UNBOUND_PREFIX
: return "XML_ERROR_UNBOUND_PREFIX";
427 // EXPAT version >= 1.95.8
428 #if XML_MAJOR_VERSION>1 || (XML_MAJOR_VERSION==1 && XML_MINOR_VERSION>95) || (XML_MAJOR_VERSION==1 && XML_MINOR_VERSION==95 && XML_MICRO_VERSION>7)
429 case XML_ERROR_UNDECLARING_PREFIX
: return "XML_ERROR_UNDECLARING_PREFIX";
430 case XML_ERROR_INCOMPLETE_PE
: return "XML_ERROR_INCOMPLETE_PE";
431 case XML_ERROR_XML_DECL
: return "XML_ERROR_XML_DECL";
432 case XML_ERROR_TEXT_DECL
: return "XML_ERROR_TEXT_DECL";
433 case XML_ERROR_PUBLICID
: return "XML_ERROR_PUBLICID";
434 case XML_ERROR_SUSPENDED
: return "XML_ERROR_SUSPENDED";
435 case XML_ERROR_NOT_SUSPENDED
: return "XML_ERROR_NOT_SUSPENDED";
436 case XML_ERROR_ABORTED
: return "XML_ERROR_ABORTED";
437 case XML_ERROR_FINISHED
: return "XML_ERROR_FINISHED";
438 case XML_ERROR_SUSPEND_PE
: return "XML_ERROR_SUSPEND_PE";
440 //#if XML_MAJOR_VERSION>=2
442 case XML_ERROR_RESERVED_PREFIX_XML
: return "XML_ERROR_RESERVED_PREFIX_XML";
443 case XML_ERROR_RESERVED_PREFIX_XMLNS
: return "XML_ERROR_RESERVED_PREFIX_XMLNS";
444 case XML_ERROR_RESERVED_NAMESPACE_URI
: return "XML_ERROR_RESERVED_NAMESPACE_URI";
448 std::ostringstream out
;
450 out
<< "XML parser error #" << error
;
456 std::string
EncodeXMLString(const XS_String
& str
)
458 LPCXSSTR s
= str
.c_str();
459 LPXSSTR buffer
= (LPXSSTR
)alloca(5*sizeof(XS_CHAR
)*XS_len(s
)); // worst case. "&"
462 for(LPCXSSTR p
=s
; *p
; ++p
)
465 *o
++ = '&'; *o
++ = 'a'; *o
++ = 'm'; *o
++ = 'p'; *o
++ = ';';
469 *o
++ = '&'; *o
++ = 'l'; *o
++ = 't'; *o
++ = ';';
473 *o
++ = '&'; *o
++ = 'g'; *o
++ = 't'; *o
++ = ';';
477 *o
++ = '&'; *o
++ = 'q'; *o
++ = 'u'; *o
++ = 'o'; *o
++ = 't'; *o
++ = ';';
481 *o
++ = '&'; *o
++ = 'a'; *o
++ = 'p'; *o
++ = 'o'; *o
++ = 's'; *o
++ = ';';
488 #ifdef XS_STRING_UTF8
489 return XS_String(buffer
, o
-buffer
);
491 return get_utf8(buffer
, o
-buffer
);
495 XS_String
DecodeXMLString(const XS_String
& str
)
497 LPCXSSTR s
= str
.c_str();
498 LPXSSTR buffer
= (LPXSSTR
)alloca(sizeof(XS_CHAR
)*XS_len(s
));
501 for(LPCXSSTR p
=s
; *p
; ++p
)
503 if (!XS_nicmp(p
+1, XS_TEXT("lt;"), 3)) {
506 } else if (!XS_nicmp(p
+1, XS_TEXT("gt;"), 3)) {
509 } else if (!XS_nicmp(p
+1, XS_TEXT("amp;"), 4)) {
512 } else if (!XS_nicmp(p
+1, XS_TEXT("quot;"), 5)) {
515 } else if (!XS_nicmp(p
+1, XS_TEXT("apos;"), 5)) {
523 return XS_String(buffer
, o
-buffer
);
527 /// write node with children tree to output stream using original white space
528 void XMLNode::write_worker(std::ostream
& out
, int indent
) const
530 out
<< _leading
<< '<' << EncodeXMLString(*this);
532 for(AttributeMap::const_iterator it
=_attributes
.begin(); it
!=_attributes
.end(); ++it
)
533 out
<< ' ' << EncodeXMLString(it
->first
) << "=\"" << EncodeXMLString(it
->second
) << "\"";
535 if (!_children
.empty() || !_content
.empty()) {
536 out
<< '>' << _content
;
538 for(Children::const_iterator it
=_children
.begin(); it
!=_children
.end(); ++it
)
539 (*it
)->write_worker(out
, indent
+1);
541 out
<< _end_leading
<< "</" << EncodeXMLString(*this) << '>';
549 /// pretty print node with children tree to output stream
550 void XMLNode::pretty_write_worker(std::ostream
& out
, int indent
) const
552 for(int i
=indent
; i
--; )
553 out
<< XML_INDENT_SPACE
;
555 out
<< '<' << EncodeXMLString(*this);
557 for(AttributeMap::const_iterator it
=_attributes
.begin(); it
!=_attributes
.end(); ++it
)
558 out
<< ' ' << EncodeXMLString(it
->first
) << "=\"" << EncodeXMLString(it
->second
) << "\"";
560 if (!_children
.empty() || !_content
.empty()) {
563 for(Children::const_iterator it
=_children
.begin(); it
!=_children
.end(); ++it
)
564 (*it
)->pretty_write_worker(out
, indent
+1);
566 for(int i
=indent
; i
--; )
567 out
<< XML_INDENT_SPACE
;
569 out
<< "</" << EncodeXMLString(*this) << ">\n";
575 /// write node with children tree to output stream using smart formating
576 void XMLNode::smart_write_worker(std::ostream
& out
, int indent
) const
578 if (_leading
.empty())
579 for(int i
=indent
; i
--; )
580 out
<< XML_INDENT_SPACE
;
584 out
<< '<' << EncodeXMLString(*this);
586 for(AttributeMap::const_iterator it
=_attributes
.begin(); it
!=_attributes
.end(); ++it
)
587 out
<< ' ' << EncodeXMLString(it
->first
) << "=\"" << EncodeXMLString(it
->second
) << "\"";
589 if (_children
.empty() && _content
.empty())
594 if (_content
.empty())
599 Children::const_iterator it
= _children
.begin();
601 if (it
!= _children
.end()) {
602 for(; it
!=_children
.end(); ++it
)
603 (*it
)->smart_write_worker(out
, indent
+1);
605 if (_end_leading
.empty())
606 for(int i
=indent
; i
--; )
607 out
<< XML_INDENT_SPACE
;
613 out
<< "</" << EncodeXMLString(*this) << '>';
616 if (_trailing
.empty())
623 } // namespace XMLStorage