5d2b25ecf46200b903f377217cef9c2f435e5296
3 // XML storage C++ classes version 1.3
5 // Copyright (c) 2004, 2005, 2006, 2007, 2008, 2009, 2010 Martin Fuchs <martin-fuchs@gmx.net>
8 /// \file xmlstorage.cpp
9 /// XMLStorage implementation file
16 Redistribution and use in source and binary forms, with or without
17 modification, are permitted provided that the following conditions are met:
19 * Redistributions of source code must retain the above copyright
20 notice, this list of conditions and the following disclaimer.
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in
23 the documentation and/or other materials provided with the
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
43 #define XS_NO_COMMENT // no #pragma comment(lib, ...) statements in .lib files to enable static linking
46 //#include "xmlstorage.h"
49 namespace XMLStorage
{
52 // work around GCC's wide string constant bug
54 const LPCXSSTR XS_EMPTY
= XS_EMPTY_STR
;
55 const LPCXSSTR XS_TRUE
= XS_TRUE_STR
;
56 const LPCXSSTR XS_FALSE
= XS_FALSE_STR
;
57 const LPCXSSTR XS_INTFMT
= XS_INTFMT_STR
;
58 const LPCXSSTR XS_FLOATFMT
= XS_FLOATFMT_STR
;
61 const XS_String XS_KEY
= XS_KEY_STR
;
62 const XS_String XS_VALUE
= XS_VALUE_STR
;
63 const XS_String XS_PROPERTY
= XS_PROPERTY_STR
;
66 /// remove escape characters from zero terminated string
67 static std::string
unescape(const char* s
, char b
, char e
)
69 const char* end
= s
+ strlen(s
);
74 // if (end>s && end[-1]==e)
78 if (end
>s
&& end
[-1]==e
)
81 return std::string(s
, end
-s
);
84 inline std::string
unescape(const char* s
)
86 return unescape(s
, '"', '"');
89 /// remove escape characters from string with specified length
90 static std::string
unescape(const char* s
, size_t l
, char b
, char e
)
92 const char* end
= s
+ l
;
97 // if (end>s && end[-1]==e)
101 if (end
>s
&& end
[-1]==e
)
104 return std::string(s
, end
-s
);
107 inline std::string
unescape(const char* s
, size_t l
)
109 return unescape(s
, l
, '"', '"');
113 /// move to the position defined by xpath in XML tree
114 bool XMLPos::go(const XPath
& xpath
)
116 XMLNode
* node
= xpath
._absolute
? _root
: _cur
;
118 node
= node
->find_relative(xpath
);
127 /// move to the position defined by xpath in XML tree
128 bool const_XMLPos::go(const XPath
& xpath
)
130 const XMLNode
* node
= xpath
._absolute
? _root
: _cur
;
132 node
= node
->find_relative(xpath
);
142 const char* XPathElement::parse(const char* path
)
144 const char* slash
= strchr(path
, '/');
148 size_t l
= slash
? slash
-path
: strlen(path
);
149 std::string
comp(path
, l
);
152 // look for [n] and [@attr_name="attr_value"] expressions in path components
153 const char* bracket
= strchr(comp
.c_str(), '[');
154 l
= bracket
? bracket
-comp
.c_str(): comp
.length();
155 _child_name
.assign(comp
.c_str(), l
);
159 std::string expr
= unescape(bracket
, '[', ']');
160 const char* p
= expr
.c_str();
162 n
= atoi(p
); // read index number
165 _child_idx
= n
- 1; // convert into zero based index
167 const char* at
= strchr(p
, '@');
171 const char* equal
= strchr(p
, '=');
173 // read attribute name and value
175 _attr_name
= unescape(p
, equal
-p
);
176 _attr_value
= unescape(equal
+1);
184 XMLNode
* XPathElement::find(XMLNode
* node
) const
188 for(XMLNode::Children::const_iterator it
=node
->_children
.begin(); it
!=node
->_children
.end(); ++it
)
189 if (matches(**it
, n
))
195 const XMLNode
* XPathElement::const_find(const XMLNode
* node
) const
199 for(XMLNode::Children::const_iterator it
=node
->_children
.begin(); it
!=node
->_children
.end(); ++it
)
200 if (matches(**it
, n
))
206 bool XPathElement::matches(const XMLNode
& node
, int& n
) const
208 if (node
!= _child_name
)
209 if (_child_name
!= XS_TEXT("*")) // use asterisk as wildcard
212 if (!_attr_name
.empty())
213 if (node
.get(_attr_name
) != _attr_value
)
216 if (_child_idx
== -1)
218 else if (n
++ == _child_idx
)
225 void XPath::init(const char* path
)
227 // Is this an absolute path?
238 path
= elem
.parse(path
);
251 const XMLNode
* XMLNode::find_relative(const XPath
& xpath
) const
253 const XMLNode
* node
= this;
255 for(XPath::const_iterator it
=xpath
.begin(); it
!=xpath
.end(); ++it
) {
256 node
= it
->const_find(node
);
265 XMLNode
* XMLNode::find_relative(const XPath
& xpath
)
267 XMLNode
* node
= this;
269 for(XPath::const_iterator it
=xpath
.begin(); it
!=xpath
.end(); ++it
) {
270 node
= it
->find(node
);
279 XMLNode
* XMLNode::create_relative(const XPath
& xpath
)
281 XMLNode
* node
= this;
283 for(XPath::const_iterator it
=xpath
.begin(); it
!=xpath
.end(); ++it
) {
284 XMLNode
* child
= it
->find(node
);
287 child
= new XMLNode(it
->_child_name
);
288 node
->add_child(child
);
290 if (!it
->_attr_name
.empty())
291 (*this)[it
->_attr_name
] = it
->_attr_value
;
300 /// count the nodes matching the given relative XPath expression
301 int XMLNode::count(XPath::const_iterator from
, const XPath::const_iterator
& to
) const
303 const XPathElement
& elem
= *from
++;
307 for(XMLNode::Children::const_iterator it
=_children
.begin(); it
!=_children
.end(); ++it
)
308 if (elem
.matches(**it
, n
)) {
311 cnt
+= (*it
)->count(from
, to
);
313 // increment match counter
320 /// copy matching tree nodes using the given XPath filter expression
321 bool XMLNode::filter(const XPath
& xpath
, XMLNode
& target
) const
323 XMLNode
* ret
= filter(xpath
.begin(), xpath
.end());
326 // move returned nodes to target node
327 target
._children
.move(ret
->_children
);
328 target
._attributes
= ret
->_attributes
;
337 /// create a new node tree using the given XPath filter expression
338 XMLNode
* XMLNode::filter(XPath::const_iterator from
, const XPath::const_iterator
& to
) const
340 XMLNode
* copy
= NULL
;
342 const XPathElement
& elem
= *from
++;
346 for(XMLNode::Children::const_iterator it
=_children
.begin(); it
!=_children
.end(); ++it
)
347 if (elem
.matches(**it
, n
)) {
349 copy
= new XMLNode(*this, XMLNode::COPY_NOCHILDREN
);
352 XMLNode
* ret
= (*it
)->filter(from
, to
);
355 copy
->add_child(ret
);
359 copy
->add_child(new XMLNode(**it
, XMLNode::COPY_NOCHILDREN
));
373 /// encode XML string literals
374 std::string
EncodeXMLString(const XS_String
& str
, bool cdata
)
376 LPCXSSTR s
= str
.c_str();
377 size_t l
= XS_len(s
);
380 // encode the whole string in a CDATA section
381 std::string ret
= CDATA_START
;
383 #ifdef XS_STRING_UTF8
386 ret
+= get_utf8(str
);
392 } else if (l
<= BUFFER_LEN
) {
393 LPXSSTR buffer
= (LPXSSTR
)alloca(6*sizeof(XS_CHAR
)*XS_len(s
)); // worst case """ / "'"
396 for(LPCXSSTR p
=s
; *p
; ++p
)
399 *o
++ = '&'; *o
++ = 'a'; *o
++ = 'm'; *o
++ = 'p'; *o
++ = ';'; // "&"
403 *o
++ = '&'; *o
++ = 'l'; *o
++ = 't'; *o
++ = ';'; // "<"
407 *o
++ = '&'; *o
++ = 'g'; *o
++ = 't'; *o
++ = ';'; // ">"
411 *o
++ = '&'; *o
++ = 'q'; *o
++ = 'u'; *o
++ = 'o'; *o
++ = 't'; *o
++ = ';'; // """
415 *o
++ = '&'; *o
++ = 'a'; *o
++ = 'p'; *o
++ = 'o'; *o
++ = 's'; *o
++ = ';'; // "'"
419 if ((unsigned)*p
<0x20 && *p
!='\t' && *p
!='\r' && *p
!='\n') {
421 sprintf(b
, "&#%d;", (unsigned)*p
);
422 for(const char*q
=b
; *q
; )
428 #ifdef XS_STRING_UTF8
429 return XS_String(buffer
, o
-buffer
);
431 return get_utf8(buffer
, o
-buffer
);
433 } else { // l > BUFFER_LEN
434 // alternative code for larger strings using ostringstream
435 // and avoiding to use alloca() for preallocated memory
436 fast_ostringstream out
;
438 LPCXSSTR s
= str
.c_str();
440 for(LPCXSSTR p
=s
; *p
; ++p
)
463 if ((unsigned)*p
<0x20 && *p
!='\t' && *p
!='\r' && *p
!='\n')
464 out
<< "&#" << (unsigned)*p
<< ";";
469 #ifdef XS_STRING_UTF8
470 return XS_String(out
.str());
472 return get_utf8(out
.str());
477 /// decode XML string literals
478 XS_String
DecodeXMLString(const std::string
& str
)
480 #ifdef XS_STRING_UTF8
481 const XS_String
& str_utf8
= str
;
484 assign_utf8(str_utf8
, str
.c_str(), str
.length());
487 LPCXSSTR s
= str_utf8
.c_str();
488 LPXSSTR buffer
= (LPXSSTR
)alloca(sizeof(XS_CHAR
)*XS_len(s
));
491 for(LPCXSSTR p
=s
; *p
; ++p
)
493 if (!XS_nicmp(p
+1, XS_TEXT("lt;"), 3)) {
496 } else if (!XS_nicmp(p
+1, XS_TEXT("gt;"), 3)) {
499 } else if (!XS_nicmp(p
+1, XS_TEXT("amp;"), 4)) {
502 } else if (!XS_nicmp(p
+1, XS_TEXT("quot;"), 5)) {
505 } else if (!XS_nicmp(p
+1, XS_TEXT("apos;"), 5)) {
508 } else //@@ maybe decode "&#xx;" special characters
510 } else if (*p
=='<' && !XS_nicmp(p
+1,XS_TEXT("![CDATA["),8)) {
511 LPCXSSTR e
= XS_strstr(p
+9, XS_TEXT(CDATA_END
));
523 return XS_String(buffer
, o
-buffer
);
527 /// write node with children tree to output stream using original white space
528 void XMLNode::original_write_worker(std::ostream
& out
) const
530 out
<< _leading
<< '<' << EncodeXMLString(*this);
532 for(AttributeMap::const_iterator it
=_attributes
.begin(); it
!=_attributes
.end(); ++it
)
533 out
<< ' ' << EncodeXMLString(it
->first
) << "=\"" << EncodeXMLString(it
->second
) << "\"";
535 if (!_children
.empty() || !_content
.empty()) {
539 out
<< CDATA_START
<< _content
<< CDATA_END
;
543 for(Children::const_iterator it
=_children
.begin(); it
!=_children
.end(); ++it
)
544 (*it
)->original_write_worker(out
);
546 out
<< _end_leading
<< "</" << EncodeXMLString(*this) << '>';
554 /// print node without any white space
555 void XMLNode::plain_write_worker(std::ostream
& out
) const
557 out
<< '<' << EncodeXMLString(*this);
559 for(AttributeMap::const_iterator it
=_attributes
.begin(); it
!=_attributes
.end(); ++it
)
560 out
<< ' ' << EncodeXMLString(it
->first
) << "=\"" << EncodeXMLString(it
->second
) << "\"";
562 // strip leading white space from content
563 const char* content
= _content
.c_str();
564 while(isspace((unsigned char)*content
)) ++content
;
566 if (!_children
.empty() || *content
) {
567 out
<< ">" << content
;
569 for(Children::const_iterator it
=_children
.begin(); it
!=_children
.end(); ++it
)
570 (*it
)->plain_write_worker(out
);
572 out
<< "</" << EncodeXMLString(*this) << ">";
578 /// pretty print node with children tree to output stream
579 void XMLNode::pretty_write_worker(std::ostream
& out
, const XMLFormat
& format
, int indent
) const
581 for(int i
=indent
; i
--; )
582 out
<< XML_INDENT_SPACE
;
584 out
<< '<' << EncodeXMLString(*this);
586 for(AttributeMap::const_iterator it
=_attributes
.begin(); it
!=_attributes
.end(); ++it
)
587 out
<< ' ' << EncodeXMLString(it
->first
) << "=\"" << EncodeXMLString(it
->second
) << "\"";
589 // strip leading white space from content
590 const char* content
= _content
.c_str();
591 while(isspace((unsigned char)*content
)) ++content
;
593 if (!_children
.empty() || *content
) {
594 out
<< '>' << content
;
596 if (!_children
.empty())
599 for(Children::const_iterator it
=_children
.begin(); it
!=_children
.end(); ++it
)
600 (*it
)->pretty_write_worker(out
, format
, indent
+1);
602 for(int i
=indent
; i
--; )
603 out
<< XML_INDENT_SPACE
;
605 out
<< "</" << EncodeXMLString(*this) << '>' << format
._endl
;
607 out
<< "/>" << format
._endl
;
611 /// write node with children tree to output stream using smart formating
612 void XMLNode::smart_write_worker(std::ostream
& out
, const XMLFormat
& format
, int indent
) const
614 // strip the first line feed from _leading
615 const char* leading
= _leading
.c_str();
616 if (*leading
== '\n') ++leading
;
619 for(int i
=indent
; i
--; )
620 out
<< XML_INDENT_SPACE
;
624 out
<< '<' << EncodeXMLString(*this);
626 for(AttributeMap::const_iterator it
=_attributes
.begin(); it
!=_attributes
.end(); ++it
)
627 out
<< ' ' << EncodeXMLString(it
->first
) << "=\"" << EncodeXMLString(it
->second
) << "\"";
629 // strip leading white space from content
630 const char* content
= _content
.c_str();
631 while(isspace((unsigned char)*content
)) ++content
;
633 if (_children
.empty() && !*content
)
639 out
<< CDATA_START
<< _content
<< CDATA_END
;
645 Children::const_iterator it
= _children
.begin();
647 if (it
!= _children
.end()) {
648 for(; it
!=_children
.end(); ++it
)
649 (*it
)->smart_write_worker(out
, format
, indent
+1);
651 // strip the first line feed from _end_leading
652 const char* end_leading
= _end_leading
.c_str();
653 if (*end_leading
== '\n') ++end_leading
;
656 for(int i
=indent
; i
--; )
657 out
<< XML_INDENT_SPACE
;
663 out
<< "</" << EncodeXMLString(*this) << '>';
666 if (_trailing
.empty())
673 std::ostream
& operator<<(std::ostream
& out
, const XMLError
& err
)
675 out
<< err
._systemId
<< "(" << err
._line
<< ") [column " << err
._column
<< "] : "
682 const char* get_xmlsym_end_utf8(const char* p
)
687 // NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender
688 if (c
== '\xC3') // UTF-8 escape character
689 ++p
; //TODO only continue on umlaut characters
690 else if (!isalnum(c
) && c
!='.' && c
!='-' && c
!='_' && c
!=':')
698 void DocType::parse(const char* p
)
700 while(isspace((unsigned char)*p
)) ++p
;
702 const char* start
= p
;
703 p
= get_xmlsym_end_utf8(p
);
704 _name
.assign(start
, p
-start
);
706 while(isspace((unsigned char)*p
)) ++p
;
709 p
= get_xmlsym_end_utf8(p
);
710 std::string
keyword(p
, p
-start
); // "PUBLIC" or "SYSTEM"
712 while(isspace((unsigned char)*p
)) ++p
;
714 if (*p
=='"' || *p
=='\'') {
718 while(*p
&& *p
!=delim
) ++p
;
721 _public
.assign(start
, p
++-start
);
725 while(isspace((unsigned char)*p
)) ++p
;
727 if (*p
=='"' || *p
=='\'') {
731 while(*p
&& *p
!=delim
) ++p
;
734 _system
.assign(start
, p
++-start
);
740 void XMLFormat::print_header(std::ostream
& out
, bool lf
) const
742 out
<< "<?xml version=\"" << _version
<< "\" encoding=\"" << _encoding
<< "\"";
744 if (_standalone
!= -1)
745 out
<< " standalone=\"yes\"";
752 if (!_doctype
.empty()) {
753 out
<< "<!DOCTYPE " << _doctype
._name
;
755 if (!_doctype
._public
.empty()) {
756 out
<< " PUBLIC \"" << _doctype
._public
<< '"';
761 out
<< " \"" << _doctype
._system
<< '"';
762 } else if (!_doctype
._system
.empty())
763 out
<< " SYSTEM \"" << _doctype
._system
<< '"';
771 for(StyleSheetList::const_iterator it
=_stylesheets
.begin(); it
!=_stylesheets
.end(); ++it
) {
778 /* if (!_additional.empty()) {
786 void StyleSheet::print(std::ostream
& out
) const
788 out
<< "<?xml-stylesheet"
789 " href=\"" << _href
<< "\""
790 " type=\"" << _type
<< "\"";
793 out
<< " title=\"" << _title
<< "\"";
796 out
<< " media=\"" << _media
<< "\"";
798 if (!_charset
.empty())
799 out
<< " charset=\"" << _charset
<< "\"";
802 out
<< " alternate=\"yes\"";
808 /// return formated error message
809 std::string
XMLError::str() const
811 std::ostringstream out
;
819 /// return merged error strings
820 XS_String
XMLErrorList::str() const
822 std::ostringstream out
;
824 for(const_iterator it
=begin(); it
!=end(); ++it
)
825 out
<< *it
<< std::endl
;
831 void XMLReaderBase::finish_read()
833 if (_pos
->_children
.empty())
834 _pos
->_trailing
.append(_content
);
836 _pos
->_children
.back()->_trailing
.append(_content
);
842 /// store XML version and encoding into XML reader
843 void XMLReaderBase::XmlDeclHandler(const char* version
, const char* encoding
, int standalone
)
846 _format
._version
= version
;
849 _format
._encoding
= encoding
;
851 _format
._standalone
= standalone
;
855 /// notifications about XML start tag
856 void XMLReaderBase::StartElementHandler(const XS_String
& name
, const XMLNode::AttributeMap
& attributes
)
858 const char* s
= _content
.c_str();
859 const char* e
= s
+ _content
.length();
862 // search for content end leaving only white space for leading
864 if (!isspace((unsigned char)p
[-1]))
868 if (_pos
->_children
.empty()) { // no children in last node?
869 if (_last_tag
== TAG_START
)
870 _pos
->_content
.append(s
, p
-s
);
871 else if (_last_tag
== TAG_END
)
872 _pos
->_trailing
.append(s
, p
-s
);
873 else // TAG_NONE at root node
876 _pos
->_children
.back()->_trailing
.append(s
, p
-s
);
882 leading
.assign(p
, e
-p
);
884 XMLNode
* node
= new XMLNode(name
, leading
);
888 #ifdef XMLNODE_LOCATION
889 node
->_location
= get_location();
892 node
->_attributes
= attributes
;
894 _last_tag
= TAG_START
;
898 /// notifications about XML end tag
899 void XMLReaderBase::EndElementHandler()
901 const char* s
= _content
.c_str();
902 const char* e
= s
+ _content
.length();
905 if (!strncmp(s
,CDATA_START
,9) && !strncmp(e
-3,CDATA_END
,3)) {
909 _pos
->_cdata_content
= true;
911 // search for content end leaving only white space for _end_leading
913 if (!isspace((unsigned char)p
[-1]))
916 _pos
->_cdata_content
= false;
920 if (_pos
->_children
.empty()) // no children in current node?
921 _pos
->_content
.append(s
, p
-s
);
922 else if (_last_tag
== TAG_START
)
923 _pos
->_content
.append(s
, p
-s
);
925 _pos
->_children
.back()->_trailing
.append(s
, p
-s
);
929 _pos
->_end_leading
.assign(p
, e
-p
);
937 #if defined(XS_USE_XERCES) || defined(XS_USE_EXPAT)
938 /// store content, white space and comments
939 void XMLReaderBase::DefaultHandler(const XML_Char
* s
, int len
)
941 #if defined(XML_UNICODE) || defined(XS_USE_XERCES)
942 _content
.append(String_from_XML_Char(s
, len
));
944 _content
.append(s
, len
);
950 XS_String
XMLWriter::s_empty_attr
;
952 void XMLWriter::create(const XS_String
& name
)
954 if (!_stack
.empty()) {
955 StackEntry
& last
= _stack
.top();
957 if (last
._state
< PRE_CLOSED
) {
958 write_attributes(last
);
966 entry
._node_name
= name
;
972 bool XMLWriter::back()
974 if (!_stack
.empty()) {
975 write_post(_stack
.top());
983 void XMLWriter::close_pre(StackEntry
& entry
)
987 entry
._state
= PRE_CLOSED
;
990 void XMLWriter::write_pre(StackEntry
& entry
)
992 if (_format
._pretty
>= PRETTY_LINEFEED
)
993 _out
<< _format
._endl
;
995 if (_format
._pretty
== PRETTY_INDENT
) {
996 for(size_t i
=_stack
.size(); --i
>0; )
997 _out
<< XML_INDENT_SPACE
;
1000 _out
<< '<' << EncodeXMLString(entry
._node_name
);
1001 //entry._state = PRE;
1004 void XMLWriter::write_attributes(StackEntry
& entry
)
1006 for(AttrMap::const_iterator it
=entry
._attributes
.begin(); it
!=entry
._attributes
.end(); ++it
)
1007 _out
<< ' ' << EncodeXMLString(it
->first
) << "=\"" << EncodeXMLString(it
->second
) << "\"";
1009 entry
._state
= ATTRIBUTES
;
1012 void XMLWriter::write_post(StackEntry
& entry
)
1014 if (entry
._state
< ATTRIBUTES
)
1015 write_attributes(entry
);
1017 if (entry
._children
|| !entry
._content
.empty()) {
1018 if (entry
._state
< PRE_CLOSED
)
1021 _out
<< entry
._content
;
1022 //entry._state = CONTENT;
1024 if (_format
._pretty
>=PRETTY_LINEFEED
&& entry
._content
.empty())
1025 _out
<< _format
._endl
;
1027 if (_format
._pretty
==PRETTY_INDENT
&& entry
._content
.empty()) {
1028 for(size_t i
=_stack
.size(); --i
>0; )
1029 _out
<< XML_INDENT_SPACE
;
1032 _out
<< "</" << EncodeXMLString(entry
._node_name
) << ">";
1037 entry
._state
= POST
;
1041 } // namespace XMLStorage