2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013, 2016-2017 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
31 #include "xmllite_private.h"
36 #include "wine/debug.h"
37 #include "wine/list.h"
39 WINE_DEFAULT_DEBUG_CHANNEL(xmllite
);
41 /* not defined in public headers */
42 DEFINE_GUID(IID_IXmlReaderInput
, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
46 XmlReadInState_Initial
,
47 XmlReadInState_XmlDecl
,
48 XmlReadInState_Misc_DTD
,
50 XmlReadInState_DTD_Misc
,
51 XmlReadInState_Element
,
52 XmlReadInState_Content
,
53 XmlReadInState_MiscEnd
, /* optional Misc at the end of a document */
55 } XmlReaderInternalState
;
57 /* This state denotes where parsing was interrupted by input problem.
58 Reader resumes parsing using this information. */
61 XmlReadResumeState_Initial
,
62 XmlReadResumeState_PITarget
,
63 XmlReadResumeState_PIBody
,
64 XmlReadResumeState_CDATA
,
65 XmlReadResumeState_Comment
,
66 XmlReadResumeState_STag
,
67 XmlReadResumeState_CharData
,
68 XmlReadResumeState_Whitespace
69 } XmlReaderResumeState
;
71 /* saved pointer index to resume from particular input position */
74 XmlReadResume_Name
, /* PITarget, name for NCName, prefix for QName */
75 XmlReadResume_Local
, /* local for QName */
76 XmlReadResume_Body
, /* PI body, comment text, CDATA text, CharData text */
82 StringValue_LocalName
,
84 StringValue_QualifiedName
,
87 } XmlReaderStringValue
;
89 static const WCHAR usasciiW
[] = {'U','S','-','A','S','C','I','I',0};
90 static const WCHAR utf16W
[] = {'U','T','F','-','1','6',0};
91 static const WCHAR utf8W
[] = {'U','T','F','-','8',0};
93 static const WCHAR dblquoteW
[] = {'\"',0};
94 static const WCHAR quoteW
[] = {'\'',0};
95 static const WCHAR ltW
[] = {'<',0};
96 static const WCHAR gtW
[] = {'>',0};
97 static const WCHAR commentW
[] = {'<','!','-','-',0};
98 static const WCHAR piW
[] = {'<','?',0};
100 BOOL
is_namestartchar(WCHAR ch
);
102 static const char *debugstr_nodetype(XmlNodeType nodetype
)
104 static const char * const type_names
[] =
113 "ProcessingInstruction",
126 if (nodetype
> _XmlNodeType_Last
)
127 return wine_dbg_sprintf("unknown type=%d", nodetype
);
129 return type_names
[nodetype
];
132 static const char *debugstr_reader_prop(XmlReaderProperty prop
)
134 static const char * const prop_names
[] =
146 if (prop
> _XmlReaderProperty_Last
)
147 return wine_dbg_sprintf("unknown property=%d", prop
);
149 return prop_names
[prop
];
152 struct xml_encoding_data
159 static const struct xml_encoding_data xml_encoding_map
[] = {
160 { usasciiW
, XmlEncoding_USASCII
, 20127 },
161 { utf16W
, XmlEncoding_UTF16
, 1200 },
162 { utf8W
, XmlEncoding_UTF8
, CP_UTF8
},
165 const WCHAR
*get_encoding_name(xml_encoding encoding
)
167 return xml_encoding_map
[encoding
].name
;
170 xml_encoding
get_encoding_from_codepage(UINT codepage
)
173 for (i
= 0; i
< ARRAY_SIZE(xml_encoding_map
); i
++)
175 if (xml_encoding_map
[i
].cp
== codepage
) return xml_encoding_map
[i
].enc
;
177 return XmlEncoding_Unknown
;
184 unsigned int allocated
;
185 unsigned int written
;
189 typedef struct input_buffer input_buffer
;
193 IXmlReaderInput IXmlReaderInput_iface
;
195 /* reference passed on IXmlReaderInput creation, is kept when input is created */
198 xml_encoding encoding
;
201 /* stream reference set after SetInput() call from reader,
202 stored as sequential stream, cause currently
203 optimizations possible with IStream aren't implemented */
204 ISequentialStream
*stream
;
205 input_buffer
*buffer
;
206 unsigned int pending
: 1;
209 static const struct IUnknownVtbl xmlreaderinputvtbl
;
211 /* Structure to hold parsed string of specific length.
213 Reader stores node value as 'start' pointer, on request
214 a null-terminated version of it is allocated.
216 To init a strval variable use reader_init_strval(),
217 to set strval as a reader value use reader_set_strval().
221 WCHAR
*str
; /* allocated null-terminated string */
222 UINT len
; /* length in WCHARs, altered after ReadValueChunk */
223 UINT start
; /* input position where value starts */
226 static WCHAR emptyW
[] = {0};
227 static WCHAR xmlW
[] = {'x','m','l',0};
228 static WCHAR xmlnsW
[] = {'x','m','l','n','s',0};
229 static const strval strval_empty
= { emptyW
};
230 static const strval strval_xml
= { xmlW
, 3 };
231 static const strval strval_xmlns
= { xmlnsW
, 5 };
233 struct reader_position
241 ATTRIBUTE_NS_DEFINITION
= 0x1,
242 ATTRIBUTE_DEFAULT_NS_DEFINITION
= 0x2,
252 struct reader_position position
;
262 struct reader_position position
;
270 struct element
*element
;
275 IXmlReader IXmlReader_iface
;
277 xmlreaderinput
*input
;
280 HRESULT error
; /* error set on XmlReadState_Error */
281 XmlReaderInternalState instate
;
282 XmlReaderResumeState resumestate
;
283 XmlNodeType nodetype
;
284 DtdProcessing dtdmode
;
285 IXmlResolver
*resolver
;
287 struct reader_position position
;
288 struct list attrs
; /* attributes list for current node */
289 struct attribute
*attr
; /* current attribute */
293 struct list elements
;
295 strval strvalues
[StringValue_Last
];
298 BOOL is_empty_element
;
299 struct element empty_element
; /* used for empty elements without end tag <a />,
300 and to keep <?xml reader position */
301 UINT resume
[XmlReadResume_Last
]; /* offsets used to resume reader */
306 encoded_buffer utf16
;
307 encoded_buffer encoded
;
309 xmlreaderinput
*input
;
312 static inline xmlreader
*impl_from_IXmlReader(IXmlReader
*iface
)
314 return CONTAINING_RECORD(iface
, xmlreader
, IXmlReader_iface
);
317 static inline xmlreaderinput
*impl_from_IXmlReaderInput(IXmlReaderInput
*iface
)
319 return CONTAINING_RECORD(iface
, xmlreaderinput
, IXmlReaderInput_iface
);
322 /* reader memory allocation functions */
323 static inline void *reader_alloc(xmlreader
*reader
, size_t len
)
325 return m_alloc(reader
->imalloc
, len
);
328 static inline void *reader_alloc_zero(xmlreader
*reader
, size_t len
)
330 void *ret
= reader_alloc(reader
, len
);
336 static inline void reader_free(xmlreader
*reader
, void *mem
)
338 m_free(reader
->imalloc
, mem
);
341 /* Just return pointer from offset, no attempt to read more. */
342 static inline WCHAR
*reader_get_ptr2(const xmlreader
*reader
, UINT offset
)
344 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
345 return (WCHAR
*)buffer
->data
+ offset
;
348 static inline WCHAR
*reader_get_strptr(const xmlreader
*reader
, const strval
*v
)
350 return v
->str
? v
->str
: reader_get_ptr2(reader
, v
->start
);
353 static HRESULT
reader_strvaldup(xmlreader
*reader
, const strval
*src
, strval
*dest
)
357 if (src
->str
!= strval_empty
.str
)
359 dest
->str
= reader_alloc(reader
, (dest
->len
+1)*sizeof(WCHAR
));
360 if (!dest
->str
) return E_OUTOFMEMORY
;
361 memcpy(dest
->str
, reader_get_strptr(reader
, src
), dest
->len
*sizeof(WCHAR
));
362 dest
->str
[dest
->len
] = 0;
369 /* reader input memory allocation functions */
370 static inline void *readerinput_alloc(xmlreaderinput
*input
, size_t len
)
372 return m_alloc(input
->imalloc
, len
);
375 static inline void *readerinput_realloc(xmlreaderinput
*input
, void *mem
, size_t len
)
377 return m_realloc(input
->imalloc
, mem
, len
);
380 static inline void readerinput_free(xmlreaderinput
*input
, void *mem
)
382 m_free(input
->imalloc
, mem
);
385 static inline WCHAR
*readerinput_strdupW(xmlreaderinput
*input
, const WCHAR
*str
)
392 size
= (lstrlenW(str
)+1)*sizeof(WCHAR
);
393 ret
= readerinput_alloc(input
, size
);
394 if (ret
) memcpy(ret
, str
, size
);
400 /* This one frees stored string value if needed */
401 static void reader_free_strvalued(xmlreader
*reader
, strval
*v
)
403 if (v
->str
!= strval_empty
.str
)
405 reader_free(reader
, v
->str
);
410 static void reader_clear_attrs(xmlreader
*reader
)
412 struct attribute
*attr
, *attr2
;
413 LIST_FOR_EACH_ENTRY_SAFE(attr
, attr2
, &reader
->attrs
, struct attribute
, entry
)
415 reader_free_strvalued(reader
, &attr
->localname
);
416 reader_free_strvalued(reader
, &attr
->value
);
417 reader_free(reader
, attr
);
419 list_init(&reader
->attrs
);
420 reader
->attr_count
= 0;
424 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
425 while we are on a node with attributes */
426 static HRESULT
reader_add_attr(xmlreader
*reader
, strval
*prefix
, strval
*localname
, strval
*qname
,
427 strval
*value
, const struct reader_position
*position
, unsigned int flags
)
429 struct attribute
*attr
;
432 attr
= reader_alloc(reader
, sizeof(*attr
));
433 if (!attr
) return E_OUTOFMEMORY
;
435 hr
= reader_strvaldup(reader
, localname
, &attr
->localname
);
438 hr
= reader_strvaldup(reader
, value
, &attr
->value
);
440 reader_free_strvalued(reader
, &attr
->value
);
444 reader_free(reader
, attr
);
449 attr
->prefix
= *prefix
;
451 memset(&attr
->prefix
, 0, sizeof(attr
->prefix
));
452 attr
->qname
= qname
? *qname
: *localname
;
453 attr
->position
= *position
;
455 list_add_tail(&reader
->attrs
, &attr
->entry
);
456 reader
->attr_count
++;
461 /* Returns current element, doesn't check if reader is actually positioned on it. */
462 static struct element
*reader_get_element(xmlreader
*reader
)
464 if (reader
->is_empty_element
)
465 return &reader
->empty_element
;
467 return LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
470 static inline void reader_init_strvalue(UINT start
, UINT len
, strval
*v
)
477 static inline const char* debug_strval(const xmlreader
*reader
, const strval
*v
)
479 return debugstr_wn(reader_get_strptr(reader
, v
), v
->len
);
482 /* used to initialize from constant string */
483 static inline void reader_init_cstrvalue(WCHAR
*str
, UINT len
, strval
*v
)
490 static void reader_free_strvalue(xmlreader
*reader
, XmlReaderStringValue type
)
492 reader_free_strvalued(reader
, &reader
->strvalues
[type
]);
495 static void reader_free_strvalues(xmlreader
*reader
)
498 for (type
= 0; type
< StringValue_Last
; type
++)
499 reader_free_strvalue(reader
, type
);
502 /* This helper should only be used to test if strings are the same,
503 it doesn't try to sort. */
504 static inline int strval_eq(const xmlreader
*reader
, const strval
*str1
, const strval
*str2
)
506 if (str1
->len
!= str2
->len
) return 0;
507 return !memcmp(reader_get_strptr(reader
, str1
), reader_get_strptr(reader
, str2
), str1
->len
*sizeof(WCHAR
));
510 static void reader_clear_elements(xmlreader
*reader
)
512 struct element
*elem
, *elem2
;
513 LIST_FOR_EACH_ENTRY_SAFE(elem
, elem2
, &reader
->elements
, struct element
, entry
)
515 reader_free_strvalued(reader
, &elem
->prefix
);
516 reader_free_strvalued(reader
, &elem
->localname
);
517 reader_free_strvalued(reader
, &elem
->qname
);
518 reader_free(reader
, elem
);
520 list_init(&reader
->elements
);
521 reader_free_strvalued(reader
, &reader
->empty_element
.localname
);
522 reader_free_strvalued(reader
, &reader
->empty_element
.qname
);
523 reader
->is_empty_element
= FALSE
;
526 static struct ns
*reader_lookup_ns(xmlreader
*reader
, const strval
*prefix
)
528 struct list
*nslist
= prefix
? &reader
->ns
: &reader
->nsdef
;
531 LIST_FOR_EACH_ENTRY_REV(ns
, nslist
, struct ns
, entry
) {
532 if (strval_eq(reader
, prefix
, &ns
->prefix
))
539 static HRESULT
reader_inc_depth(xmlreader
*reader
)
541 return (++reader
->depth
>= reader
->max_depth
&& reader
->max_depth
) ? SC_E_MAXELEMENTDEPTH
: S_OK
;
544 static void reader_dec_depth(xmlreader
*reader
)
550 static HRESULT
reader_push_ns(xmlreader
*reader
, const strval
*prefix
, const strval
*uri
, BOOL def
)
555 ns
= reader_alloc(reader
, sizeof(*ns
));
556 if (!ns
) return E_OUTOFMEMORY
;
559 memset(&ns
->prefix
, 0, sizeof(ns
->prefix
));
561 hr
= reader_strvaldup(reader
, prefix
, &ns
->prefix
);
563 reader_free(reader
, ns
);
568 hr
= reader_strvaldup(reader
, uri
, &ns
->uri
);
570 reader_free_strvalued(reader
, &ns
->prefix
);
571 reader_free(reader
, ns
);
576 list_add_head(def
? &reader
->nsdef
: &reader
->ns
, &ns
->entry
);
580 static void reader_free_element(xmlreader
*reader
, struct element
*element
)
582 reader_free_strvalued(reader
, &element
->prefix
);
583 reader_free_strvalued(reader
, &element
->localname
);
584 reader_free_strvalued(reader
, &element
->qname
);
585 reader_free(reader
, element
);
588 static void reader_mark_ns_nodes(xmlreader
*reader
, struct element
*element
)
592 LIST_FOR_EACH_ENTRY(ns
, &reader
->ns
, struct ns
, entry
) {
595 ns
->element
= element
;
598 LIST_FOR_EACH_ENTRY(ns
, &reader
->nsdef
, struct ns
, entry
) {
601 ns
->element
= element
;
605 static HRESULT
reader_push_element(xmlreader
*reader
, strval
*prefix
, strval
*localname
,
606 strval
*qname
, const struct reader_position
*position
)
608 struct element
*element
;
611 element
= reader_alloc_zero(reader
, sizeof(*element
));
613 return E_OUTOFMEMORY
;
615 if ((hr
= reader_strvaldup(reader
, prefix
, &element
->prefix
)) == S_OK
&&
616 (hr
= reader_strvaldup(reader
, localname
, &element
->localname
)) == S_OK
&&
617 (hr
= reader_strvaldup(reader
, qname
, &element
->qname
)) == S_OK
)
619 list_add_head(&reader
->elements
, &element
->entry
);
620 reader_mark_ns_nodes(reader
, element
);
621 reader
->is_empty_element
= FALSE
;
622 element
->position
= *position
;
625 reader_free_element(reader
, element
);
630 static void reader_pop_ns_nodes(xmlreader
*reader
, struct element
*element
)
634 LIST_FOR_EACH_ENTRY_SAFE_REV(ns
, ns2
, &reader
->ns
, struct ns
, entry
) {
635 if (ns
->element
!= element
)
638 list_remove(&ns
->entry
);
639 reader_free_strvalued(reader
, &ns
->prefix
);
640 reader_free_strvalued(reader
, &ns
->uri
);
641 reader_free(reader
, ns
);
644 if (!list_empty(&reader
->nsdef
)) {
645 ns
= LIST_ENTRY(list_head(&reader
->nsdef
), struct ns
, entry
);
646 if (ns
->element
== element
) {
647 list_remove(&ns
->entry
);
648 reader_free_strvalued(reader
, &ns
->prefix
);
649 reader_free_strvalued(reader
, &ns
->uri
);
650 reader_free(reader
, ns
);
655 static void reader_pop_element(xmlreader
*reader
)
657 struct element
*element
;
659 if (list_empty(&reader
->elements
))
662 element
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
663 list_remove(&element
->entry
);
665 reader_pop_ns_nodes(reader
, element
);
666 reader_free_element(reader
, element
);
668 /* It was a root element, the rest is expected as Misc */
669 if (list_empty(&reader
->elements
))
670 reader
->instate
= XmlReadInState_MiscEnd
;
673 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
674 means node value is to be determined. */
675 static void reader_set_strvalue(xmlreader
*reader
, XmlReaderStringValue type
, const strval
*value
)
677 strval
*v
= &reader
->strvalues
[type
];
679 reader_free_strvalue(reader
, type
);
688 if (value
->str
== strval_empty
.str
)
692 if (type
== StringValue_Value
)
694 /* defer allocation for value string */
696 v
->start
= value
->start
;
701 v
->str
= reader_alloc(reader
, (value
->len
+ 1)*sizeof(WCHAR
));
702 memcpy(v
->str
, reader_get_strptr(reader
, value
), value
->len
*sizeof(WCHAR
));
703 v
->str
[value
->len
] = 0;
709 static inline int is_reader_pending(xmlreader
*reader
)
711 return reader
->input
->pending
;
714 static HRESULT
init_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
716 const int initial_len
= 0x2000;
717 buffer
->data
= readerinput_alloc(input
, initial_len
);
718 if (!buffer
->data
) return E_OUTOFMEMORY
;
720 memset(buffer
->data
, 0, 4);
722 buffer
->allocated
= initial_len
;
724 buffer
->prev_cr
= FALSE
;
729 static void free_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
731 readerinput_free(input
, buffer
->data
);
734 HRESULT
get_code_page(xml_encoding encoding
, UINT
*cp
)
736 if (encoding
== XmlEncoding_Unknown
)
738 FIXME("unsupported encoding %d\n", encoding
);
742 *cp
= xml_encoding_map
[encoding
].cp
;
747 xml_encoding
parse_encoding_name(const WCHAR
*name
, int len
)
751 if (!name
) return XmlEncoding_Unknown
;
754 max
= ARRAY_SIZE(xml_encoding_map
) - 1;
761 c
= _wcsnicmp(xml_encoding_map
[n
].name
, name
, len
);
763 c
= wcsicmp(xml_encoding_map
[n
].name
, name
);
765 return xml_encoding_map
[n
].enc
;
773 return XmlEncoding_Unknown
;
776 static HRESULT
alloc_input_buffer(xmlreaderinput
*input
)
778 input_buffer
*buffer
;
781 input
->buffer
= NULL
;
783 buffer
= readerinput_alloc(input
, sizeof(*buffer
));
784 if (!buffer
) return E_OUTOFMEMORY
;
786 buffer
->input
= input
;
787 buffer
->code_page
= ~0; /* code page is unknown at this point */
788 hr
= init_encoded_buffer(input
, &buffer
->utf16
);
790 readerinput_free(input
, buffer
);
794 hr
= init_encoded_buffer(input
, &buffer
->encoded
);
796 free_encoded_buffer(input
, &buffer
->utf16
);
797 readerinput_free(input
, buffer
);
801 input
->buffer
= buffer
;
805 static void free_input_buffer(input_buffer
*buffer
)
807 free_encoded_buffer(buffer
->input
, &buffer
->encoded
);
808 free_encoded_buffer(buffer
->input
, &buffer
->utf16
);
809 readerinput_free(buffer
->input
, buffer
);
812 static void readerinput_release_stream(xmlreaderinput
*readerinput
)
814 if (readerinput
->stream
) {
815 ISequentialStream_Release(readerinput
->stream
);
816 readerinput
->stream
= NULL
;
820 /* Queries already stored interface for IStream/ISequentialStream.
821 Interface supplied on creation will be overwritten */
822 static inline HRESULT
readerinput_query_for_stream(xmlreaderinput
*readerinput
)
826 readerinput_release_stream(readerinput
);
827 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_IStream
, (void**)&readerinput
->stream
);
829 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_ISequentialStream
, (void**)&readerinput
->stream
);
834 /* reads a chunk to raw buffer */
835 static HRESULT
readerinput_growraw(xmlreaderinput
*readerinput
)
837 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
838 /* to make sure aligned length won't exceed allocated length */
839 ULONG len
= buffer
->allocated
- buffer
->written
- 4;
843 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
844 variable width encodings like UTF-8 */
845 len
= (len
+ 3) & ~3;
846 /* try to use allocated space or grow */
847 if (buffer
->allocated
- buffer
->written
< len
)
849 buffer
->allocated
*= 2;
850 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, buffer
->allocated
);
851 len
= buffer
->allocated
- buffer
->written
;
855 hr
= ISequentialStream_Read(readerinput
->stream
, buffer
->data
+ buffer
->written
, len
, &read
);
856 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer
->written
, buffer
->allocated
, len
, read
, hr
);
857 readerinput
->pending
= hr
== E_PENDING
;
858 if (FAILED(hr
)) return hr
;
859 buffer
->written
+= read
;
864 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
865 static void readerinput_grow(xmlreaderinput
*readerinput
, int length
)
867 encoded_buffer
*buffer
= &readerinput
->buffer
->utf16
;
869 length
*= sizeof(WCHAR
);
870 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
871 if (buffer
->allocated
< buffer
->written
+ length
+ 4)
873 int grown_size
= max(2*buffer
->allocated
, buffer
->allocated
+ length
);
874 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, grown_size
);
875 buffer
->allocated
= grown_size
;
879 static inline BOOL
readerinput_is_utf8(xmlreaderinput
*readerinput
)
881 static const char startA
[] = {'<','?'};
882 static const char commentA
[] = {'<','!'};
883 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
884 unsigned char *ptr
= (unsigned char*)buffer
->data
;
886 return !memcmp(buffer
->data
, startA
, sizeof(startA
)) ||
887 !memcmp(buffer
->data
, commentA
, sizeof(commentA
)) ||
888 /* test start byte */
891 (ptr
[1] && (ptr
[1] <= 0x7f)) ||
892 (buffer
->data
[1] >> 5) == 0x6 || /* 2 bytes */
893 (buffer
->data
[1] >> 4) == 0xe || /* 3 bytes */
894 (buffer
->data
[1] >> 3) == 0x1e) /* 4 bytes */
898 static HRESULT
readerinput_detectencoding(xmlreaderinput
*readerinput
, xml_encoding
*enc
)
900 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
901 static const char utf8bom
[] = {0xef,0xbb,0xbf};
902 static const char utf16lebom
[] = {0xff,0xfe};
905 *enc
= XmlEncoding_Unknown
;
907 if (buffer
->written
<= 3)
909 HRESULT hr
= readerinput_growraw(readerinput
);
910 if (FAILED(hr
)) return hr
;
911 if (buffer
->written
< 3) return MX_E_INPUTEND
;
914 ptrW
= (WCHAR
*)buffer
->data
;
915 /* try start symbols if we have enough data to do that, input buffer should contain
916 first chunk already */
917 if (readerinput_is_utf8(readerinput
))
918 *enc
= XmlEncoding_UTF8
;
919 else if (*ptrW
== '<')
922 if (*ptrW
== '?' || *ptrW
== '!' || is_namestartchar(*ptrW
))
923 *enc
= XmlEncoding_UTF16
;
925 /* try with BOM now */
926 else if (!memcmp(buffer
->data
, utf8bom
, sizeof(utf8bom
)))
928 buffer
->cur
+= sizeof(utf8bom
);
929 *enc
= XmlEncoding_UTF8
;
931 else if (!memcmp(buffer
->data
, utf16lebom
, sizeof(utf16lebom
)))
933 buffer
->cur
+= sizeof(utf16lebom
);
934 *enc
= XmlEncoding_UTF16
;
940 static int readerinput_get_utf8_convlen(xmlreaderinput
*readerinput
)
942 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
943 int len
= buffer
->written
;
945 /* complete single byte char */
946 if (!(buffer
->data
[len
-1] & 0x80)) return len
;
948 /* find start byte of multibyte char */
949 while (--len
&& !(buffer
->data
[len
] & 0xc0))
955 /* Returns byte length of complete char sequence for buffer code page,
956 it's relative to current buffer position which is currently used for BOM handling
958 static int readerinput_get_convlen(xmlreaderinput
*readerinput
)
960 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
963 if (readerinput
->buffer
->code_page
== CP_UTF8
)
964 len
= readerinput_get_utf8_convlen(readerinput
);
966 len
= buffer
->written
;
968 TRACE("%d\n", len
- buffer
->cur
);
969 return len
- buffer
->cur
;
972 /* It's possible that raw buffer has some leftovers from last conversion - some char
973 sequence that doesn't represent a full code point. Length argument should be calculated with
974 readerinput_get_convlen(), if it's -1 it will be calculated here. */
975 static void readerinput_shrinkraw(xmlreaderinput
*readerinput
, int len
)
977 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
980 len
= readerinput_get_convlen(readerinput
);
982 memmove(buffer
->data
, buffer
->data
+ buffer
->cur
+ (buffer
->written
- len
), len
);
983 /* everything below cur is lost too */
984 buffer
->written
-= len
+ buffer
->cur
;
985 /* after this point we don't need cur offset really,
986 it's used only to mark where actual data begins when first chunk is read */
990 static void fixup_buffer_cr(encoded_buffer
*buffer
, int off
)
992 BOOL prev_cr
= buffer
->prev_cr
;
996 src
= dest
= (WCHAR
*)buffer
->data
+ off
;
997 while ((const char*)src
< buffer
->data
+ buffer
->written
)
1006 if(prev_cr
&& *src
== '\n')
1013 buffer
->written
= (char*)dest
- buffer
->data
;
1014 buffer
->prev_cr
= prev_cr
;
1018 /* note that raw buffer content is kept */
1019 static void readerinput_switchencoding(xmlreaderinput
*readerinput
, xml_encoding enc
)
1021 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
1022 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
1028 hr
= get_code_page(enc
, &cp
);
1029 if (FAILED(hr
)) return;
1031 readerinput
->buffer
->code_page
= cp
;
1032 len
= readerinput_get_convlen(readerinput
);
1034 TRACE("switching to cp %d\n", cp
);
1036 /* just copy in this case */
1037 if (enc
== XmlEncoding_UTF16
)
1039 readerinput_grow(readerinput
, len
);
1040 memcpy(dest
->data
, src
->data
+ src
->cur
, len
);
1041 dest
->written
+= len
*sizeof(WCHAR
);
1045 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
1046 readerinput_grow(readerinput
, dest_len
);
1047 ptr
= (WCHAR
*)dest
->data
;
1048 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
1050 dest
->written
+= dest_len
*sizeof(WCHAR
);
1053 fixup_buffer_cr(dest
, 0);
1056 /* shrinks parsed data a buffer begins with */
1057 static void reader_shrink(xmlreader
*reader
)
1059 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1061 /* avoid to move too often using threshold shrink length */
1062 if (buffer
->cur
*sizeof(WCHAR
) > buffer
->written
/ 2)
1064 buffer
->written
-= buffer
->cur
*sizeof(WCHAR
);
1065 memmove(buffer
->data
, (WCHAR
*)buffer
->data
+ buffer
->cur
, buffer
->written
);
1067 *(WCHAR
*)&buffer
->data
[buffer
->written
] = 0;
1071 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
1072 It won't attempt to shrink but will grow destination buffer if needed */
1073 static HRESULT
reader_more(xmlreader
*reader
)
1075 xmlreaderinput
*readerinput
= reader
->input
;
1076 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
1077 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
1078 UINT cp
= readerinput
->buffer
->code_page
;
1079 int len
, dest_len
, prev_len
;
1083 /* get some raw data from stream first */
1084 hr
= readerinput_growraw(readerinput
);
1085 len
= readerinput_get_convlen(readerinput
);
1086 prev_len
= dest
->written
/ sizeof(WCHAR
);
1088 /* just copy for UTF-16 case */
1091 readerinput_grow(readerinput
, len
);
1092 memcpy(dest
->data
+ dest
->written
, src
->data
+ src
->cur
, len
);
1093 dest
->written
+= len
*sizeof(WCHAR
);
1097 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
1098 readerinput_grow(readerinput
, dest_len
);
1099 ptr
= (WCHAR
*)(dest
->data
+ dest
->written
);
1100 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
1102 dest
->written
+= dest_len
*sizeof(WCHAR
);
1103 /* get rid of processed data */
1104 readerinput_shrinkraw(readerinput
, len
);
1107 fixup_buffer_cr(dest
, prev_len
);
1111 static inline UINT
reader_get_cur(xmlreader
*reader
)
1113 return reader
->input
->buffer
->utf16
.cur
;
1116 static inline WCHAR
*reader_get_ptr(xmlreader
*reader
)
1118 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1119 WCHAR
*ptr
= (WCHAR
*)buffer
->data
+ buffer
->cur
;
1120 if (!*ptr
) reader_more(reader
);
1121 return (WCHAR
*)buffer
->data
+ buffer
->cur
;
1124 static int reader_cmp(xmlreader
*reader
, const WCHAR
*str
)
1127 const WCHAR
*ptr
= reader_get_ptr(reader
);
1132 reader_more(reader
);
1133 ptr
= reader_get_ptr(reader
);
1135 if (str
[i
] != ptr
[i
])
1136 return ptr
[i
] - str
[i
];
1142 static void reader_update_position(xmlreader
*reader
, WCHAR ch
)
1145 reader
->position
.line_position
= 1;
1146 else if (ch
== '\n')
1148 reader
->position
.line_number
++;
1149 reader
->position
.line_position
= 1;
1152 reader
->position
.line_position
++;
1155 /* moves cursor n WCHARs forward */
1156 static void reader_skipn(xmlreader
*reader
, int n
)
1158 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1161 while (*(ptr
= reader_get_ptr(reader
)) && n
--)
1163 reader_update_position(reader
, *ptr
);
1168 static inline BOOL
is_wchar_space(WCHAR ch
)
1170 return ch
== ' ' || ch
== '\t' || ch
== '\r' || ch
== '\n';
1173 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
1174 static int reader_skipspaces(xmlreader
*reader
)
1176 const WCHAR
*ptr
= reader_get_ptr(reader
);
1177 UINT start
= reader_get_cur(reader
);
1179 while (is_wchar_space(*ptr
))
1181 reader_skipn(reader
, 1);
1182 ptr
= reader_get_ptr(reader
);
1185 return reader_get_cur(reader
) - start
;
1188 /* [26] VersionNum ::= '1.' [0-9]+ */
1189 static HRESULT
reader_parse_versionnum(xmlreader
*reader
, strval
*val
)
1191 static const WCHAR onedotW
[] = {'1','.',0};
1195 if (reader_cmp(reader
, onedotW
)) return WC_E_XMLDECL
;
1197 start
= reader_get_cur(reader
);
1199 reader_skipn(reader
, 2);
1201 ptr2
= ptr
= reader_get_ptr(reader
);
1202 while (*ptr
>= '0' && *ptr
<= '9')
1204 reader_skipn(reader
, 1);
1205 ptr
= reader_get_ptr(reader
);
1208 if (ptr2
== ptr
) return WC_E_DIGIT
;
1209 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, val
);
1210 TRACE("version=%s\n", debug_strval(reader
, val
));
1214 /* [25] Eq ::= S? '=' S? */
1215 static HRESULT
reader_parse_eq(xmlreader
*reader
)
1217 static const WCHAR eqW
[] = {'=',0};
1218 reader_skipspaces(reader
);
1219 if (reader_cmp(reader
, eqW
)) return WC_E_EQUAL
;
1221 reader_skipn(reader
, 1);
1222 reader_skipspaces(reader
);
1226 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1227 static HRESULT
reader_parse_versioninfo(xmlreader
*reader
)
1229 static const WCHAR versionW
[] = {'v','e','r','s','i','o','n',0};
1230 struct reader_position position
;
1234 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1236 position
= reader
->position
;
1237 if (reader_cmp(reader
, versionW
)) return WC_E_XMLDECL
;
1238 reader_init_strvalue(reader_get_cur(reader
), 7, &name
);
1239 /* skip 'version' */
1240 reader_skipn(reader
, 7);
1242 hr
= reader_parse_eq(reader
);
1243 if (FAILED(hr
)) return hr
;
1245 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1248 reader_skipn(reader
, 1);
1250 hr
= reader_parse_versionnum(reader
, &val
);
1251 if (FAILED(hr
)) return hr
;
1253 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1257 reader_skipn(reader
, 1);
1259 return reader_add_attr(reader
, NULL
, &name
, NULL
, &val
, &position
, 0);
1262 /* ([A-Za-z0-9._] | '-') */
1263 static inline BOOL
is_wchar_encname(WCHAR ch
)
1265 return ((ch
>= 'A' && ch
<= 'Z') ||
1266 (ch
>= 'a' && ch
<= 'z') ||
1267 (ch
>= '0' && ch
<= '9') ||
1268 (ch
== '.') || (ch
== '_') ||
1272 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1273 static HRESULT
reader_parse_encname(xmlreader
*reader
, strval
*val
)
1275 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
1279 if ((*start
< 'A' || *start
> 'Z') && (*start
< 'a' || *start
> 'z'))
1280 return WC_E_ENCNAME
;
1282 val
->start
= reader_get_cur(reader
);
1285 while (is_wchar_encname(*++ptr
))
1289 enc
= parse_encoding_name(start
, len
);
1290 TRACE("encoding name %s\n", debugstr_wn(start
, len
));
1294 if (enc
== XmlEncoding_Unknown
)
1295 return WC_E_ENCNAME
;
1297 /* skip encoding name */
1298 reader_skipn(reader
, len
);
1302 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1303 static HRESULT
reader_parse_encdecl(xmlreader
*reader
)
1305 static const WCHAR encodingW
[] = {'e','n','c','o','d','i','n','g',0};
1306 struct reader_position position
;
1310 if (!reader_skipspaces(reader
)) return S_FALSE
;
1312 position
= reader
->position
;
1313 if (reader_cmp(reader
, encodingW
)) return S_FALSE
;
1314 name
.str
= reader_get_ptr(reader
);
1315 name
.start
= reader_get_cur(reader
);
1317 /* skip 'encoding' */
1318 reader_skipn(reader
, 8);
1320 hr
= reader_parse_eq(reader
);
1321 if (FAILED(hr
)) return hr
;
1323 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1326 reader_skipn(reader
, 1);
1328 hr
= reader_parse_encname(reader
, &val
);
1329 if (FAILED(hr
)) return hr
;
1331 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1335 reader_skipn(reader
, 1);
1337 return reader_add_attr(reader
, NULL
, &name
, NULL
, &val
, &position
, 0);
1340 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1341 static HRESULT
reader_parse_sddecl(xmlreader
*reader
)
1343 static const WCHAR standaloneW
[] = {'s','t','a','n','d','a','l','o','n','e',0};
1344 static const WCHAR yesW
[] = {'y','e','s',0};
1345 static const WCHAR noW
[] = {'n','o',0};
1346 struct reader_position position
;
1351 if (!reader_skipspaces(reader
)) return S_FALSE
;
1353 position
= reader
->position
;
1354 if (reader_cmp(reader
, standaloneW
)) return S_FALSE
;
1355 reader_init_strvalue(reader_get_cur(reader
), 10, &name
);
1356 /* skip 'standalone' */
1357 reader_skipn(reader
, 10);
1359 hr
= reader_parse_eq(reader
);
1360 if (FAILED(hr
)) return hr
;
1362 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1365 reader_skipn(reader
, 1);
1367 if (reader_cmp(reader
, yesW
) && reader_cmp(reader
, noW
))
1368 return WC_E_XMLDECL
;
1370 start
= reader_get_cur(reader
);
1371 /* skip 'yes'|'no' */
1372 reader_skipn(reader
, reader_cmp(reader
, yesW
) ? 2 : 3);
1373 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &val
);
1374 TRACE("standalone=%s\n", debug_strval(reader
, &val
));
1376 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1379 reader_skipn(reader
, 1);
1381 return reader_add_attr(reader
, NULL
, &name
, NULL
, &val
, &position
, 0);
1384 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1385 static HRESULT
reader_parse_xmldecl(xmlreader
*reader
)
1387 static const WCHAR xmldeclW
[] = {'<','?','x','m','l',' ',0};
1388 static const WCHAR declcloseW
[] = {'?','>',0};
1389 struct reader_position position
;
1392 /* check if we have "<?xml " */
1393 if (reader_cmp(reader
, xmldeclW
))
1396 reader_skipn(reader
, 2);
1397 position
= reader
->position
;
1398 reader_skipn(reader
, 3);
1399 hr
= reader_parse_versioninfo(reader
);
1403 hr
= reader_parse_encdecl(reader
);
1407 hr
= reader_parse_sddecl(reader
);
1411 reader_skipspaces(reader
);
1412 if (reader_cmp(reader
, declcloseW
))
1413 return WC_E_XMLDECL
;
1416 reader_skipn(reader
, 2);
1418 reader
->nodetype
= XmlNodeType_XmlDeclaration
;
1419 reader
->empty_element
.position
= position
;
1420 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_xml
);
1421 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_xml
);
1426 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1427 static HRESULT
reader_parse_comment(xmlreader
*reader
)
1432 if (reader
->resumestate
== XmlReadResumeState_Comment
)
1434 start
= reader
->resume
[XmlReadResume_Body
];
1435 ptr
= reader_get_ptr(reader
);
1440 reader_skipn(reader
, 4);
1441 reader_shrink(reader
);
1442 ptr
= reader_get_ptr(reader
);
1443 start
= reader_get_cur(reader
);
1444 reader
->nodetype
= XmlNodeType_Comment
;
1445 reader
->resume
[XmlReadResume_Body
] = start
;
1446 reader
->resumestate
= XmlReadResumeState_Comment
;
1447 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
1450 /* will exit when there's no more data, it won't attempt to
1451 read more from stream */
1462 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1463 TRACE("%s\n", debug_strval(reader
, &value
));
1465 /* skip rest of markup '->' */
1466 reader_skipn(reader
, 3);
1468 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1469 reader
->resume
[XmlReadResume_Body
] = 0;
1470 reader
->resumestate
= XmlReadResumeState_Initial
;
1474 return WC_E_COMMENT
;
1478 reader_skipn(reader
, 1);
1485 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1486 static inline BOOL
is_char(WCHAR ch
)
1488 return (ch
== '\t') || (ch
== '\r') || (ch
== '\n') ||
1489 (ch
>= 0x20 && ch
<= 0xd7ff) ||
1490 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1491 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1492 (ch
>= 0xe000 && ch
<= 0xfffd);
1495 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1496 BOOL
is_pubchar(WCHAR ch
)
1498 return (ch
== ' ') ||
1499 (ch
>= 'a' && ch
<= 'z') ||
1500 (ch
>= 'A' && ch
<= 'Z') ||
1501 (ch
>= '0' && ch
<= '9') ||
1502 (ch
>= '-' && ch
<= ';') || /* '()*+,-./:; */
1503 (ch
== '=') || (ch
== '?') ||
1504 (ch
== '@') || (ch
== '!') ||
1505 (ch
>= '#' && ch
<= '%') || /* #$% */
1506 (ch
== '_') || (ch
== '\r') || (ch
== '\n');
1509 BOOL
is_namestartchar(WCHAR ch
)
1511 return (ch
== ':') || (ch
>= 'A' && ch
<= 'Z') ||
1512 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1513 (ch
>= 0xc0 && ch
<= 0xd6) ||
1514 (ch
>= 0xd8 && ch
<= 0xf6) ||
1515 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1516 (ch
>= 0x370 && ch
<= 0x37d) ||
1517 (ch
>= 0x37f && ch
<= 0x1fff) ||
1518 (ch
>= 0x200c && ch
<= 0x200d) ||
1519 (ch
>= 0x2070 && ch
<= 0x218f) ||
1520 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1521 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1522 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1523 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1524 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1525 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1528 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1529 BOOL
is_ncnamechar(WCHAR ch
)
1531 return (ch
>= 'A' && ch
<= 'Z') ||
1532 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1533 (ch
== '-') || (ch
== '.') ||
1534 (ch
>= '0' && ch
<= '9') ||
1536 (ch
>= 0xc0 && ch
<= 0xd6) ||
1537 (ch
>= 0xd8 && ch
<= 0xf6) ||
1538 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1539 (ch
>= 0x300 && ch
<= 0x36f) ||
1540 (ch
>= 0x370 && ch
<= 0x37d) ||
1541 (ch
>= 0x37f && ch
<= 0x1fff) ||
1542 (ch
>= 0x200c && ch
<= 0x200d) ||
1543 (ch
>= 0x203f && ch
<= 0x2040) ||
1544 (ch
>= 0x2070 && ch
<= 0x218f) ||
1545 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1546 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1547 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1548 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1549 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1550 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1553 BOOL
is_namechar(WCHAR ch
)
1555 return (ch
== ':') || is_ncnamechar(ch
);
1558 static XmlNodeType
reader_get_nodetype(const xmlreader
*reader
)
1560 /* When we're on attribute always return attribute type, container node type is kept.
1561 Note that container is not necessarily an element, and attribute doesn't mean it's
1562 an attribute in XML spec terms. */
1563 return reader
->attr
? XmlNodeType_Attribute
: reader
->nodetype
;
1566 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1567 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1568 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1569 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1570 [5] Name ::= NameStartChar (NameChar)* */
1571 static HRESULT
reader_parse_name(xmlreader
*reader
, strval
*name
)
1576 if (reader
->resume
[XmlReadResume_Name
])
1578 start
= reader
->resume
[XmlReadResume_Name
];
1579 ptr
= reader_get_ptr(reader
);
1583 ptr
= reader_get_ptr(reader
);
1584 start
= reader_get_cur(reader
);
1585 if (!is_namestartchar(*ptr
)) return WC_E_NAMECHARACTER
;
1588 while (is_namechar(*ptr
))
1590 reader_skipn(reader
, 1);
1591 ptr
= reader_get_ptr(reader
);
1594 if (is_reader_pending(reader
))
1596 reader
->resume
[XmlReadResume_Name
] = start
;
1600 reader
->resume
[XmlReadResume_Name
] = 0;
1602 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, name
);
1603 TRACE("name %s:%d\n", debug_strval(reader
, name
), name
->len
);
1608 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1609 static HRESULT
reader_parse_pitarget(xmlreader
*reader
, strval
*target
)
1611 static const WCHAR xmlW
[] = {'x','m','l'};
1612 static const strval xmlval
= { (WCHAR
*)xmlW
, 3 };
1618 hr
= reader_parse_name(reader
, &name
);
1619 if (FAILED(hr
)) return is_reader_pending(reader
) ? E_PENDING
: WC_E_PI
;
1621 /* now that we got name check for illegal content */
1622 if (strval_eq(reader
, &name
, &xmlval
))
1623 return WC_E_LEADINGXML
;
1625 /* PITarget can't be a qualified name */
1626 ptr
= reader_get_strptr(reader
, &name
);
1627 for (i
= 0; i
< name
.len
; i
++)
1629 return i
? NC_E_NAMECOLON
: WC_E_PI
;
1631 TRACE("pitarget %s:%d\n", debug_strval(reader
, &name
), name
.len
);
1636 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1637 static HRESULT
reader_parse_pi(xmlreader
*reader
)
1644 switch (reader
->resumestate
)
1646 case XmlReadResumeState_Initial
:
1648 reader_skipn(reader
, 2);
1649 reader_shrink(reader
);
1650 reader
->resumestate
= XmlReadResumeState_PITarget
;
1651 case XmlReadResumeState_PITarget
:
1652 hr
= reader_parse_pitarget(reader
, &target
);
1653 if (FAILED(hr
)) return hr
;
1654 reader_set_strvalue(reader
, StringValue_LocalName
, &target
);
1655 reader_set_strvalue(reader
, StringValue_QualifiedName
, &target
);
1656 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1657 reader
->resumestate
= XmlReadResumeState_PIBody
;
1658 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1663 start
= reader
->resume
[XmlReadResume_Body
];
1664 ptr
= reader_get_ptr(reader
);
1671 UINT cur
= reader_get_cur(reader
);
1674 /* strip all leading whitespace chars */
1677 ptr
= reader_get_ptr2(reader
, start
);
1678 if (!is_wchar_space(*ptr
)) break;
1682 reader_init_strvalue(start
, cur
-start
, &value
);
1685 reader_skipn(reader
, 2);
1686 TRACE("%s\n", debug_strval(reader
, &value
));
1687 reader
->nodetype
= XmlNodeType_ProcessingInstruction
;
1688 reader
->resumestate
= XmlReadResumeState_Initial
;
1689 reader
->resume
[XmlReadResume_Body
] = 0;
1690 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1695 reader_skipn(reader
, 1);
1696 ptr
= reader_get_ptr(reader
);
1702 /* This one is used to parse significant whitespace nodes, like in Misc production */
1703 static HRESULT
reader_parse_whitespace(xmlreader
*reader
)
1705 switch (reader
->resumestate
)
1707 case XmlReadResumeState_Initial
:
1708 reader_shrink(reader
);
1709 reader
->resumestate
= XmlReadResumeState_Whitespace
;
1710 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1711 reader
->nodetype
= XmlNodeType_Whitespace
;
1712 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1713 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1714 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1716 case XmlReadResumeState_Whitespace
:
1721 reader_skipspaces(reader
);
1722 if (is_reader_pending(reader
)) return S_OK
;
1724 start
= reader
->resume
[XmlReadResume_Body
];
1725 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1726 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1727 TRACE("%s\n", debug_strval(reader
, &value
));
1728 reader
->resumestate
= XmlReadResumeState_Initial
;
1737 /* [27] Misc ::= Comment | PI | S */
1738 static HRESULT
reader_parse_misc(xmlreader
*reader
)
1740 HRESULT hr
= S_FALSE
;
1742 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
1744 hr
= reader_more(reader
);
1745 if (FAILED(hr
)) return hr
;
1747 /* finish current node */
1748 switch (reader
->resumestate
)
1750 case XmlReadResumeState_PITarget
:
1751 case XmlReadResumeState_PIBody
:
1752 return reader_parse_pi(reader
);
1753 case XmlReadResumeState_Comment
:
1754 return reader_parse_comment(reader
);
1755 case XmlReadResumeState_Whitespace
:
1756 return reader_parse_whitespace(reader
);
1758 ERR("unknown resume state %d\n", reader
->resumestate
);
1764 const WCHAR
*cur
= reader_get_ptr(reader
);
1766 if (is_wchar_space(*cur
))
1767 hr
= reader_parse_whitespace(reader
);
1768 else if (!reader_cmp(reader
, commentW
))
1769 hr
= reader_parse_comment(reader
);
1770 else if (!reader_cmp(reader
, piW
))
1771 hr
= reader_parse_pi(reader
);
1775 if (hr
!= S_FALSE
) return hr
;
1781 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1782 static HRESULT
reader_parse_sys_literal(xmlreader
*reader
, strval
*literal
)
1784 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1787 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1790 reader_skipn(reader
, 1);
1792 cur
= reader_get_ptr(reader
);
1793 start
= reader_get_cur(reader
);
1794 while (is_char(*cur
) && *cur
!= quote
)
1796 reader_skipn(reader
, 1);
1797 cur
= reader_get_ptr(reader
);
1799 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1800 if (*cur
== quote
) reader_skipn(reader
, 1);
1802 TRACE("%s\n", debug_strval(reader
, literal
));
1806 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1807 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1808 static HRESULT
reader_parse_pub_literal(xmlreader
*reader
, strval
*literal
)
1810 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1813 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1816 reader_skipn(reader
, 1);
1818 start
= reader_get_cur(reader
);
1819 cur
= reader_get_ptr(reader
);
1820 while (is_pubchar(*cur
) && *cur
!= quote
)
1822 reader_skipn(reader
, 1);
1823 cur
= reader_get_ptr(reader
);
1825 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1826 if (*cur
== quote
) reader_skipn(reader
, 1);
1828 TRACE("%s\n", debug_strval(reader
, literal
));
1832 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1833 static HRESULT
reader_parse_externalid(xmlreader
*reader
)
1835 static WCHAR systemW
[] = {'S','Y','S','T','E','M',0};
1836 static WCHAR publicW
[] = {'P','U','B','L','I','C',0};
1837 struct reader_position position
= reader
->position
;
1842 if (!reader_cmp(reader
, publicW
)) {
1846 reader_skipn(reader
, 6);
1847 cnt
= reader_skipspaces(reader
);
1848 if (!cnt
) return WC_E_WHITESPACE
;
1850 hr
= reader_parse_pub_literal(reader
, &pub
);
1851 if (FAILED(hr
)) return hr
;
1853 reader_init_cstrvalue(publicW
, lstrlenW(publicW
), &name
);
1854 hr
= reader_add_attr(reader
, NULL
, &name
, NULL
, &pub
, &position
, 0);
1855 if (FAILED(hr
)) return hr
;
1857 cnt
= reader_skipspaces(reader
);
1858 if (!cnt
) return S_OK
;
1860 /* optional system id */
1861 hr
= reader_parse_sys_literal(reader
, &sys
);
1862 if (FAILED(hr
)) return S_OK
;
1864 reader_init_cstrvalue(systemW
, lstrlenW(systemW
), &name
);
1865 hr
= reader_add_attr(reader
, NULL
, &name
, NULL
, &sys
, &position
, 0);
1866 if (FAILED(hr
)) return hr
;
1869 } else if (!reader_cmp(reader
, systemW
)) {
1871 reader_skipn(reader
, 6);
1872 cnt
= reader_skipspaces(reader
);
1873 if (!cnt
) return WC_E_WHITESPACE
;
1875 hr
= reader_parse_sys_literal(reader
, &sys
);
1876 if (FAILED(hr
)) return hr
;
1878 reader_init_cstrvalue(systemW
, lstrlenW(systemW
), &name
);
1879 return reader_add_attr(reader
, NULL
, &name
, NULL
, &sys
, &position
, 0);
1885 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1886 static HRESULT
reader_parse_dtd(xmlreader
*reader
)
1888 static const WCHAR doctypeW
[] = {'<','!','D','O','C','T','Y','P','E',0};
1893 /* check if we have "<!DOCTYPE" */
1894 if (reader_cmp(reader
, doctypeW
)) return S_FALSE
;
1895 reader_shrink(reader
);
1897 /* DTD processing is not allowed by default */
1898 if (reader
->dtdmode
== DtdProcessing_Prohibit
) return WC_E_DTDPROHIBITED
;
1900 reader_skipn(reader
, 9);
1901 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1904 hr
= reader_parse_name(reader
, &name
);
1905 if (FAILED(hr
)) return WC_E_DECLDOCTYPE
;
1907 reader_skipspaces(reader
);
1909 hr
= reader_parse_externalid(reader
);
1910 if (FAILED(hr
)) return hr
;
1912 reader_skipspaces(reader
);
1914 cur
= reader_get_ptr(reader
);
1917 FIXME("internal subset parsing not implemented\n");
1922 reader_skipn(reader
, 1);
1924 reader
->nodetype
= XmlNodeType_DocumentType
;
1925 reader_set_strvalue(reader
, StringValue_LocalName
, &name
);
1926 reader_set_strvalue(reader
, StringValue_QualifiedName
, &name
);
1931 /* [11 NS] LocalPart ::= NCName */
1932 static HRESULT
reader_parse_local(xmlreader
*reader
, strval
*local
, BOOL check_for_separator
)
1937 if (reader
->resume
[XmlReadResume_Local
])
1939 start
= reader
->resume
[XmlReadResume_Local
];
1940 ptr
= reader_get_ptr(reader
);
1944 ptr
= reader_get_ptr(reader
);
1945 start
= reader_get_cur(reader
);
1948 while (is_ncnamechar(*ptr
))
1950 reader_skipn(reader
, 1);
1951 ptr
= reader_get_ptr(reader
);
1954 if (check_for_separator
&& *ptr
== ':')
1955 return NC_E_QNAMECOLON
;
1957 if (is_reader_pending(reader
))
1959 reader
->resume
[XmlReadResume_Local
] = start
;
1963 reader
->resume
[XmlReadResume_Local
] = 0;
1965 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, local
);
1970 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1971 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1972 [9 NS] UnprefixedName ::= LocalPart
1973 [10 NS] Prefix ::= NCName */
1974 static HRESULT
reader_parse_qname(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
)
1980 if (reader
->resume
[XmlReadResume_Name
])
1982 start
= reader
->resume
[XmlReadResume_Name
];
1983 ptr
= reader_get_ptr(reader
);
1987 ptr
= reader_get_ptr(reader
);
1988 start
= reader_get_cur(reader
);
1989 reader
->resume
[XmlReadResume_Name
] = start
;
1990 if (!is_ncnamechar(*ptr
)) return NC_E_QNAMECHARACTER
;
1993 if (reader
->resume
[XmlReadResume_Local
])
1995 hr
= reader_parse_local(reader
, local
, FALSE
);
1996 if (FAILED(hr
)) return hr
;
1998 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
],
1999 local
->start
- reader
->resume
[XmlReadResume_Name
] - 1,
2004 /* skip prefix part */
2005 while (is_ncnamechar(*ptr
))
2007 reader_skipn(reader
, 1);
2008 ptr
= reader_get_ptr(reader
);
2011 if (is_reader_pending(reader
)) return E_PENDING
;
2013 /* got a qualified name */
2016 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, prefix
);
2019 reader_skipn(reader
, 1);
2020 hr
= reader_parse_local(reader
, local
, TRUE
);
2021 if (FAILED(hr
)) return hr
;
2025 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
], reader_get_cur(reader
)-reader
->resume
[XmlReadResume_Name
], local
);
2026 reader_init_strvalue(0, 0, prefix
);
2031 TRACE("qname %s:%s\n", debug_strval(reader
, prefix
), debug_strval(reader
, local
));
2033 TRACE("ncname %s\n", debug_strval(reader
, local
));
2035 reader_init_strvalue(prefix
->len
? prefix
->start
: local
->start
,
2037 (prefix
->len
? prefix
->len
+ 1 : 0) + local
->len
,
2040 reader
->resume
[XmlReadResume_Name
] = 0;
2041 reader
->resume
[XmlReadResume_Local
] = 0;
2046 static WCHAR
get_predefined_entity(const xmlreader
*reader
, const strval
*name
)
2048 static const WCHAR entltW
[] = {'l','t'};
2049 static const WCHAR entgtW
[] = {'g','t'};
2050 static const WCHAR entampW
[] = {'a','m','p'};
2051 static const WCHAR entaposW
[] = {'a','p','o','s'};
2052 static const WCHAR entquotW
[] = {'q','u','o','t'};
2053 static const strval lt
= { (WCHAR
*)entltW
, 2 };
2054 static const strval gt
= { (WCHAR
*)entgtW
, 2 };
2055 static const strval amp
= { (WCHAR
*)entampW
, 3 };
2056 static const strval apos
= { (WCHAR
*)entaposW
, 4 };
2057 static const strval quot
= { (WCHAR
*)entquotW
, 4 };
2058 WCHAR
*str
= reader_get_strptr(reader
, name
);
2063 if (strval_eq(reader
, name
, <
)) return '<';
2066 if (strval_eq(reader
, name
, >
)) return '>';
2069 if (strval_eq(reader
, name
, &
))
2071 else if (strval_eq(reader
, name
, &apos
))
2075 if (strval_eq(reader
, name
, "
)) return '\"';
2084 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
2085 [67] Reference ::= EntityRef | CharRef
2086 [68] EntityRef ::= '&' Name ';' */
2087 static HRESULT
reader_parse_reference(xmlreader
*reader
)
2089 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
2090 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
2091 UINT cur
= reader_get_cur(reader
);
2096 reader_skipn(reader
, 1);
2097 ptr
= reader_get_ptr(reader
);
2101 reader_skipn(reader
, 1);
2102 ptr
= reader_get_ptr(reader
);
2104 /* hex char or decimal */
2107 reader_skipn(reader
, 1);
2108 ptr
= reader_get_ptr(reader
);
2112 if ((*ptr
>= '0' && *ptr
<= '9'))
2113 ch
= ch
*16 + *ptr
- '0';
2114 else if ((*ptr
>= 'a' && *ptr
<= 'f'))
2115 ch
= ch
*16 + *ptr
- 'a' + 10;
2116 else if ((*ptr
>= 'A' && *ptr
<= 'F'))
2117 ch
= ch
*16 + *ptr
- 'A' + 10;
2119 return ch
? WC_E_SEMICOLON
: WC_E_HEXDIGIT
;
2120 reader_skipn(reader
, 1);
2121 ptr
= reader_get_ptr(reader
);
2128 if ((*ptr
>= '0' && *ptr
<= '9'))
2130 ch
= ch
*10 + *ptr
- '0';
2131 reader_skipn(reader
, 1);
2132 ptr
= reader_get_ptr(reader
);
2135 return ch
? WC_E_SEMICOLON
: WC_E_DIGIT
;
2139 if (!is_char(ch
)) return WC_E_XMLCHARACTER
;
2142 if (is_wchar_space(ch
)) ch
= ' ';
2144 ptr
= reader_get_ptr(reader
);
2145 start
= reader_get_ptr2(reader
, cur
);
2146 len
= buffer
->written
- ((char *)ptr
- buffer
->data
);
2147 memmove(start
+ 1, ptr
+ 1, len
);
2149 buffer
->written
-= (reader_get_cur(reader
) - cur
) * sizeof(WCHAR
);
2150 buffer
->cur
= cur
+ 1;
2159 hr
= reader_parse_name(reader
, &name
);
2160 if (FAILED(hr
)) return hr
;
2162 ptr
= reader_get_ptr(reader
);
2163 if (*ptr
!= ';') return WC_E_SEMICOLON
;
2165 /* predefined entities resolve to a single character */
2166 ch
= get_predefined_entity(reader
, &name
);
2169 len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - sizeof(WCHAR
);
2170 memmove(start
+1, ptr
+1, len
);
2171 buffer
->cur
= cur
+ 1;
2172 buffer
->written
-= (ptr
- start
) * sizeof(WCHAR
);
2178 FIXME("undeclared entity %s\n", debug_strval(reader
, &name
));
2179 return WC_E_UNDECLAREDENTITY
;
2187 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
2188 static HRESULT
reader_parse_attvalue(xmlreader
*reader
, strval
*value
)
2193 ptr
= reader_get_ptr(reader
);
2195 /* skip opening quote */
2197 if (quote
!= '\"' && quote
!= '\'') return WC_E_QUOTE
;
2198 reader_skipn(reader
, 1);
2200 ptr
= reader_get_ptr(reader
);
2201 start
= reader_get_cur(reader
);
2204 if (*ptr
== '<') return WC_E_LESSTHAN
;
2208 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, value
);
2209 /* skip closing quote */
2210 reader_skipn(reader
, 1);
2216 HRESULT hr
= reader_parse_reference(reader
);
2217 if (FAILED(hr
)) return hr
;
2221 /* replace all whitespace chars with ' ' */
2222 if (is_wchar_space(*ptr
)) *ptr
= ' ';
2223 reader_skipn(reader
, 1);
2225 ptr
= reader_get_ptr(reader
);
2231 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2232 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2233 [3 NS] DefaultAttName ::= 'xmlns'
2234 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2235 static HRESULT
reader_parse_attribute(xmlreader
*reader
)
2237 struct reader_position position
= reader
->position
;
2238 strval prefix
, local
, qname
, value
;
2239 enum attribute_flags flags
= 0;
2242 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2243 if (FAILED(hr
)) return hr
;
2245 if (strval_eq(reader
, &prefix
, &strval_xmlns
))
2246 flags
|= ATTRIBUTE_NS_DEFINITION
;
2248 if (strval_eq(reader
, &qname
, &strval_xmlns
))
2249 flags
|= ATTRIBUTE_DEFAULT_NS_DEFINITION
;
2251 hr
= reader_parse_eq(reader
);
2252 if (FAILED(hr
)) return hr
;
2254 hr
= reader_parse_attvalue(reader
, &value
);
2255 if (FAILED(hr
)) return hr
;
2257 if (flags
& (ATTRIBUTE_NS_DEFINITION
| ATTRIBUTE_DEFAULT_NS_DEFINITION
))
2258 reader_push_ns(reader
, &local
, &value
, !!(flags
& ATTRIBUTE_DEFAULT_NS_DEFINITION
));
2260 TRACE("%s=%s\n", debug_strval(reader
, &local
), debug_strval(reader
, &value
));
2261 return reader_add_attr(reader
, &prefix
, &local
, &qname
, &value
, &position
, flags
);
2264 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2265 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2266 static HRESULT
reader_parse_stag(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
)
2268 struct reader_position position
= reader
->position
;
2271 hr
= reader_parse_qname(reader
, prefix
, local
, qname
);
2272 if (FAILED(hr
)) return hr
;
2276 static const WCHAR endW
[] = {'/','>',0};
2278 reader_skipspaces(reader
);
2281 if ((reader
->is_empty_element
= !reader_cmp(reader
, endW
)))
2283 struct element
*element
= &reader
->empty_element
;
2286 reader_skipn(reader
, 2);
2288 reader_free_strvalued(reader
, &element
->qname
);
2289 reader_free_strvalued(reader
, &element
->localname
);
2291 element
->prefix
= *prefix
;
2292 reader_strvaldup(reader
, qname
, &element
->qname
);
2293 reader_strvaldup(reader
, local
, &element
->localname
);
2294 element
->position
= position
;
2295 reader_mark_ns_nodes(reader
, element
);
2299 /* got a start tag */
2300 if (!reader_cmp(reader
, gtW
))
2303 reader_skipn(reader
, 1);
2304 return reader_push_element(reader
, prefix
, local
, qname
, &position
);
2307 hr
= reader_parse_attribute(reader
);
2308 if (FAILED(hr
)) return hr
;
2314 /* [39] element ::= EmptyElemTag | STag content ETag */
2315 static HRESULT
reader_parse_element(xmlreader
*reader
)
2319 switch (reader
->resumestate
)
2321 case XmlReadResumeState_Initial
:
2322 /* check if we are really on element */
2323 if (reader_cmp(reader
, ltW
)) return S_FALSE
;
2326 reader_skipn(reader
, 1);
2328 reader_shrink(reader
);
2329 reader
->resumestate
= XmlReadResumeState_STag
;
2330 case XmlReadResumeState_STag
:
2332 strval qname
, prefix
, local
;
2334 /* this handles empty elements too */
2335 hr
= reader_parse_stag(reader
, &prefix
, &local
, &qname
);
2336 if (FAILED(hr
)) return hr
;
2338 /* FIXME: need to check for defined namespace to reject invalid prefix */
2340 /* if we got empty element and stack is empty go straight to Misc */
2341 if (reader
->is_empty_element
&& list_empty(&reader
->elements
))
2342 reader
->instate
= XmlReadInState_MiscEnd
;
2344 reader
->instate
= XmlReadInState_Content
;
2346 reader
->nodetype
= XmlNodeType_Element
;
2347 reader
->resumestate
= XmlReadResumeState_Initial
;
2348 reader_set_strvalue(reader
, StringValue_Prefix
, &prefix
);
2349 reader_set_strvalue(reader
, StringValue_QualifiedName
, &qname
);
2350 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
2360 /* [13 NS] ETag ::= '</' QName S? '>' */
2361 static HRESULT
reader_parse_endtag(xmlreader
*reader
)
2363 struct reader_position position
;
2364 strval prefix
, local
, qname
;
2365 struct element
*element
;
2369 reader_skipn(reader
, 2);
2371 position
= reader
->position
;
2372 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2373 if (FAILED(hr
)) return hr
;
2375 reader_skipspaces(reader
);
2377 if (reader_cmp(reader
, gtW
)) return WC_E_GREATERTHAN
;
2380 reader_skipn(reader
, 1);
2382 /* Element stack should never be empty at this point, cause we shouldn't get to
2383 content parsing if it's empty. */
2384 element
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
2385 if (!strval_eq(reader
, &element
->qname
, &qname
)) return WC_E_ELEMENTMATCH
;
2387 /* update position stored for start tag, we won't be using it */
2388 element
->position
= position
;
2390 reader
->nodetype
= XmlNodeType_EndElement
;
2391 reader
->is_empty_element
= FALSE
;
2392 reader_set_strvalue(reader
, StringValue_Prefix
, &prefix
);
2397 /* [18] CDSect ::= CDStart CData CDEnd
2398 [19] CDStart ::= '<![CDATA['
2399 [20] CData ::= (Char* - (Char* ']]>' Char*))
2400 [21] CDEnd ::= ']]>' */
2401 static HRESULT
reader_parse_cdata(xmlreader
*reader
)
2406 if (reader
->resumestate
== XmlReadResumeState_CDATA
)
2408 start
= reader
->resume
[XmlReadResume_Body
];
2409 ptr
= reader_get_ptr(reader
);
2413 /* skip markup '<![CDATA[' */
2414 reader_skipn(reader
, 9);
2415 reader_shrink(reader
);
2416 ptr
= reader_get_ptr(reader
);
2417 start
= reader_get_cur(reader
);
2418 reader
->nodetype
= XmlNodeType_CDATA
;
2419 reader
->resume
[XmlReadResume_Body
] = start
;
2420 reader
->resumestate
= XmlReadResumeState_CDATA
;
2421 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2426 if (*ptr
== ']' && *(ptr
+1) == ']' && *(ptr
+2) == '>')
2430 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2433 reader_skipn(reader
, 3);
2434 TRACE("%s\n", debug_strval(reader
, &value
));
2436 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2437 reader
->resume
[XmlReadResume_Body
] = 0;
2438 reader
->resumestate
= XmlReadResumeState_Initial
;
2443 reader_skipn(reader
, 1);
2451 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2452 static HRESULT
reader_parse_chardata(xmlreader
*reader
)
2454 struct reader_position position
;
2458 if (reader
->resumestate
== XmlReadResumeState_CharData
)
2460 start
= reader
->resume
[XmlReadResume_Body
];
2461 ptr
= reader_get_ptr(reader
);
2465 reader_shrink(reader
);
2466 ptr
= reader_get_ptr(reader
);
2467 start
= reader_get_cur(reader
);
2468 /* There's no text */
2469 if (!*ptr
|| *ptr
== '<') return S_OK
;
2470 reader
->nodetype
= is_wchar_space(*ptr
) ? XmlNodeType_Whitespace
: XmlNodeType_Text
;
2471 reader
->resume
[XmlReadResume_Body
] = start
;
2472 reader
->resumestate
= XmlReadResumeState_CharData
;
2473 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2476 position
= reader
->position
;
2479 static const WCHAR ampW
[] = {'&',0};
2481 /* CDATA closing sequence ']]>' is not allowed */
2482 if (ptr
[0] == ']' && ptr
[1] == ']' && ptr
[2] == '>')
2483 return WC_E_CDSECTEND
;
2485 /* Found next markup part */
2490 reader
->empty_element
.position
= position
;
2491 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2492 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2493 reader
->resume
[XmlReadResume_Body
] = 0;
2494 reader
->resumestate
= XmlReadResumeState_Initial
;
2498 /* this covers a case when text has leading whitespace chars */
2499 if (!is_wchar_space(*ptr
)) reader
->nodetype
= XmlNodeType_Text
;
2501 if (!reader_cmp(reader
, ampW
))
2502 reader_parse_reference(reader
);
2504 reader_skipn(reader
, 1);
2506 ptr
= reader_get_ptr(reader
);
2512 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2513 static HRESULT
reader_parse_content(xmlreader
*reader
)
2515 static const WCHAR cdstartW
[] = {'<','!','[','C','D','A','T','A','[',0};
2516 static const WCHAR etagW
[] = {'<','/',0};
2518 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
2520 switch (reader
->resumestate
)
2522 case XmlReadResumeState_CDATA
:
2523 return reader_parse_cdata(reader
);
2524 case XmlReadResumeState_Comment
:
2525 return reader_parse_comment(reader
);
2526 case XmlReadResumeState_PIBody
:
2527 case XmlReadResumeState_PITarget
:
2528 return reader_parse_pi(reader
);
2529 case XmlReadResumeState_CharData
:
2530 return reader_parse_chardata(reader
);
2532 ERR("unknown resume state %d\n", reader
->resumestate
);
2536 reader_shrink(reader
);
2538 /* handle end tag here, it indicates end of content as well */
2539 if (!reader_cmp(reader
, etagW
))
2540 return reader_parse_endtag(reader
);
2542 if (!reader_cmp(reader
, commentW
))
2543 return reader_parse_comment(reader
);
2545 if (!reader_cmp(reader
, piW
))
2546 return reader_parse_pi(reader
);
2548 if (!reader_cmp(reader
, cdstartW
))
2549 return reader_parse_cdata(reader
);
2551 if (!reader_cmp(reader
, ltW
))
2552 return reader_parse_element(reader
);
2554 /* what's left must be CharData */
2555 return reader_parse_chardata(reader
);
2558 static HRESULT
reader_parse_nextnode(xmlreader
*reader
)
2560 XmlNodeType nodetype
= reader_get_nodetype(reader
);
2563 if (!is_reader_pending(reader
))
2565 reader
->chunk_read_off
= 0;
2566 reader_clear_attrs(reader
);
2569 /* When moving from EndElement or empty element, pop its own namespace definitions */
2572 case XmlNodeType_Attribute
:
2573 reader_dec_depth(reader
);
2575 case XmlNodeType_Element
:
2576 if (reader
->is_empty_element
)
2577 reader_pop_ns_nodes(reader
, &reader
->empty_element
);
2578 else if (FAILED(hr
= reader_inc_depth(reader
)))
2581 case XmlNodeType_EndElement
:
2582 reader_pop_element(reader
);
2583 reader_dec_depth(reader
);
2591 switch (reader
->instate
)
2593 /* if it's a first call for a new input we need to detect stream encoding */
2594 case XmlReadInState_Initial
:
2598 hr
= readerinput_growraw(reader
->input
);
2599 if (FAILED(hr
)) return hr
;
2601 reader
->position
.line_number
= 1;
2602 reader
->position
.line_position
= 1;
2604 /* try to detect encoding by BOM or data and set input code page */
2605 hr
= readerinput_detectencoding(reader
->input
, &enc
);
2606 TRACE("detected encoding %s, 0x%08x\n", enc
== XmlEncoding_Unknown
? "(unknown)" :
2607 debugstr_w(xml_encoding_map
[enc
].name
), hr
);
2608 if (FAILED(hr
)) return hr
;
2610 /* always switch first time cause we have to put something in */
2611 readerinput_switchencoding(reader
->input
, enc
);
2613 /* parse xml declaration */
2614 hr
= reader_parse_xmldecl(reader
);
2615 if (FAILED(hr
)) return hr
;
2617 readerinput_shrinkraw(reader
->input
, -1);
2618 reader
->instate
= XmlReadInState_Misc_DTD
;
2619 if (hr
== S_OK
) return hr
;
2622 case XmlReadInState_Misc_DTD
:
2623 hr
= reader_parse_misc(reader
);
2624 if (FAILED(hr
)) return hr
;
2627 reader
->instate
= XmlReadInState_DTD
;
2631 case XmlReadInState_DTD
:
2632 hr
= reader_parse_dtd(reader
);
2633 if (FAILED(hr
)) return hr
;
2637 reader
->instate
= XmlReadInState_DTD_Misc
;
2641 reader
->instate
= XmlReadInState_Element
;
2643 case XmlReadInState_DTD_Misc
:
2644 hr
= reader_parse_misc(reader
);
2645 if (FAILED(hr
)) return hr
;
2648 reader
->instate
= XmlReadInState_Element
;
2652 case XmlReadInState_Element
:
2653 return reader_parse_element(reader
);
2654 case XmlReadInState_Content
:
2655 return reader_parse_content(reader
);
2656 case XmlReadInState_MiscEnd
:
2657 hr
= reader_parse_misc(reader
);
2658 if (hr
!= S_FALSE
) return hr
;
2660 if (*reader_get_ptr(reader
))
2662 WARN("found garbage in the end of XML\n");
2666 reader
->instate
= XmlReadInState_Eof
;
2667 reader
->state
= XmlReadState_EndOfFile
;
2668 reader
->nodetype
= XmlNodeType_None
;
2670 case XmlReadInState_Eof
:
2673 FIXME("internal state %d not handled\n", reader
->instate
);
2681 static HRESULT WINAPI
xmlreader_QueryInterface(IXmlReader
*iface
, REFIID riid
, void** ppvObject
)
2683 xmlreader
*This
= impl_from_IXmlReader(iface
);
2685 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
2687 if (IsEqualGUID(riid
, &IID_IUnknown
) ||
2688 IsEqualGUID(riid
, &IID_IXmlReader
))
2694 FIXME("interface %s not implemented\n", debugstr_guid(riid
));
2696 return E_NOINTERFACE
;
2699 IXmlReader_AddRef(iface
);
2704 static ULONG WINAPI
xmlreader_AddRef(IXmlReader
*iface
)
2706 xmlreader
*This
= impl_from_IXmlReader(iface
);
2707 ULONG ref
= InterlockedIncrement(&This
->ref
);
2708 TRACE("(%p)->(%d)\n", This
, ref
);
2712 static void reader_clear_ns(xmlreader
*reader
)
2714 struct ns
*ns
, *ns2
;
2716 LIST_FOR_EACH_ENTRY_SAFE(ns
, ns2
, &reader
->ns
, struct ns
, entry
) {
2717 list_remove(&ns
->entry
);
2718 reader_free_strvalued(reader
, &ns
->prefix
);
2719 reader_free_strvalued(reader
, &ns
->uri
);
2720 reader_free(reader
, ns
);
2723 LIST_FOR_EACH_ENTRY_SAFE(ns
, ns2
, &reader
->nsdef
, struct ns
, entry
) {
2724 list_remove(&ns
->entry
);
2725 reader_free_strvalued(reader
, &ns
->uri
);
2726 reader_free(reader
, ns
);
2730 static void reader_reset_parser(xmlreader
*reader
)
2732 reader
->position
.line_number
= 0;
2733 reader
->position
.line_position
= 0;
2735 reader_clear_elements(reader
);
2736 reader_clear_attrs(reader
);
2737 reader_clear_ns(reader
);
2738 reader_free_strvalues(reader
);
2741 reader
->nodetype
= XmlNodeType_None
;
2742 reader
->resumestate
= XmlReadResumeState_Initial
;
2743 memset(reader
->resume
, 0, sizeof(reader
->resume
));
2744 reader
->is_empty_element
= FALSE
;
2747 static ULONG WINAPI
xmlreader_Release(IXmlReader
*iface
)
2749 xmlreader
*This
= impl_from_IXmlReader(iface
);
2750 LONG ref
= InterlockedDecrement(&This
->ref
);
2752 TRACE("(%p)->(%d)\n", This
, ref
);
2756 IMalloc
*imalloc
= This
->imalloc
;
2757 reader_reset_parser(This
);
2758 if (This
->input
) IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2759 if (This
->resolver
) IXmlResolver_Release(This
->resolver
);
2760 if (This
->mlang
) IUnknown_Release(This
->mlang
);
2761 reader_free(This
, This
);
2762 if (imalloc
) IMalloc_Release(imalloc
);
2768 static HRESULT WINAPI
xmlreader_SetInput(IXmlReader
* iface
, IUnknown
*input
)
2770 xmlreader
*This
= impl_from_IXmlReader(iface
);
2771 IXmlReaderInput
*readerinput
;
2774 TRACE("(%p)->(%p)\n", This
, input
);
2778 readerinput_release_stream(This
->input
);
2779 IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2783 reader_reset_parser(This
);
2785 /* just reset current input */
2788 This
->state
= XmlReadState_Initial
;
2792 /* now try IXmlReaderInput, ISequentialStream, IStream */
2793 hr
= IUnknown_QueryInterface(input
, &IID_IXmlReaderInput
, (void**)&readerinput
);
2796 if (readerinput
->lpVtbl
== &xmlreaderinputvtbl
)
2797 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2800 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2801 readerinput
, readerinput
->lpVtbl
);
2802 IUnknown_Release(readerinput
);
2808 if (hr
!= S_OK
|| !readerinput
)
2810 /* create IXmlReaderInput basing on supplied interface */
2811 hr
= CreateXmlReaderInputWithEncodingName(input
,
2812 This
->imalloc
, NULL
, FALSE
, NULL
, &readerinput
);
2813 if (hr
!= S_OK
) return hr
;
2814 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2817 /* set stream for supplied IXmlReaderInput */
2818 hr
= readerinput_query_for_stream(This
->input
);
2821 This
->state
= XmlReadState_Initial
;
2822 This
->instate
= XmlReadInState_Initial
;
2827 static HRESULT WINAPI
xmlreader_GetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR
*value
)
2829 xmlreader
*This
= impl_from_IXmlReader(iface
);
2831 TRACE("(%p)->(%s %p)\n", This
, debugstr_reader_prop(property
), value
);
2833 if (!value
) return E_INVALIDARG
;
2837 case XmlReaderProperty_MultiLanguage
:
2838 *value
= (LONG_PTR
)This
->mlang
;
2840 IUnknown_AddRef(This
->mlang
);
2842 case XmlReaderProperty_XmlResolver
:
2843 *value
= (LONG_PTR
)This
->resolver
;
2845 IXmlResolver_AddRef(This
->resolver
);
2847 case XmlReaderProperty_DtdProcessing
:
2848 *value
= This
->dtdmode
;
2850 case XmlReaderProperty_ReadState
:
2851 *value
= This
->state
;
2853 case XmlReaderProperty_MaxElementDepth
:
2854 *value
= This
->max_depth
;
2857 FIXME("Unimplemented property (%u)\n", property
);
2864 static HRESULT WINAPI
xmlreader_SetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR value
)
2866 xmlreader
*This
= impl_from_IXmlReader(iface
);
2868 TRACE("(%p)->(%s 0x%lx)\n", This
, debugstr_reader_prop(property
), value
);
2872 case XmlReaderProperty_MultiLanguage
:
2874 IUnknown_Release(This
->mlang
);
2875 This
->mlang
= (IUnknown
*)value
;
2877 IUnknown_AddRef(This
->mlang
);
2879 FIXME("Ignoring MultiLanguage %p\n", This
->mlang
);
2881 case XmlReaderProperty_XmlResolver
:
2883 IXmlResolver_Release(This
->resolver
);
2884 This
->resolver
= (IXmlResolver
*)value
;
2886 IXmlResolver_AddRef(This
->resolver
);
2888 case XmlReaderProperty_DtdProcessing
:
2889 if (value
< 0 || value
> _DtdProcessing_Last
) return E_INVALIDARG
;
2890 This
->dtdmode
= value
;
2892 case XmlReaderProperty_MaxElementDepth
:
2893 This
->max_depth
= value
;
2896 FIXME("Unimplemented property (%u)\n", property
);
2903 static HRESULT WINAPI
xmlreader_Read(IXmlReader
* iface
, XmlNodeType
*nodetype
)
2905 xmlreader
*This
= impl_from_IXmlReader(iface
);
2906 XmlNodeType oldtype
= This
->nodetype
;
2910 TRACE("(%p)->(%p)\n", This
, nodetype
);
2915 switch (This
->state
)
2917 case XmlReadState_Closed
:
2920 case XmlReadState_Error
:
2924 hr
= reader_parse_nextnode(This
);
2925 if (SUCCEEDED(hr
) && oldtype
== XmlNodeType_None
&& This
->nodetype
!= oldtype
)
2926 This
->state
= XmlReadState_Interactive
;
2930 This
->state
= XmlReadState_Error
;
2931 This
->nodetype
= XmlNodeType_None
;
2937 TRACE("node type %s\n", debugstr_nodetype(This
->nodetype
));
2938 *nodetype
= This
->nodetype
;
2943 static HRESULT WINAPI
xmlreader_GetNodeType(IXmlReader
* iface
, XmlNodeType
*node_type
)
2945 xmlreader
*This
= impl_from_IXmlReader(iface
);
2947 TRACE("(%p)->(%p)\n", This
, node_type
);
2950 return E_INVALIDARG
;
2952 *node_type
= reader_get_nodetype(This
);
2953 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
2956 static void reader_set_current_attribute(xmlreader
*reader
, struct attribute
*attr
)
2958 reader
->attr
= attr
;
2959 reader
->chunk_read_off
= 0;
2960 reader_set_strvalue(reader
, StringValue_Prefix
, &attr
->prefix
);
2961 reader_set_strvalue(reader
, StringValue_QualifiedName
, &attr
->qname
);
2962 reader_set_strvalue(reader
, StringValue_Value
, &attr
->value
);
2965 static HRESULT
reader_move_to_first_attribute(xmlreader
*reader
)
2967 if (!reader
->attr_count
)
2971 reader_inc_depth(reader
);
2973 reader_set_current_attribute(reader
, LIST_ENTRY(list_head(&reader
->attrs
), struct attribute
, entry
));
2978 static HRESULT WINAPI
xmlreader_MoveToFirstAttribute(IXmlReader
* iface
)
2980 xmlreader
*This
= impl_from_IXmlReader(iface
);
2982 TRACE("(%p)\n", This
);
2984 return reader_move_to_first_attribute(This
);
2987 static HRESULT WINAPI
xmlreader_MoveToNextAttribute(IXmlReader
* iface
)
2989 xmlreader
*This
= impl_from_IXmlReader(iface
);
2990 const struct list
*next
;
2992 TRACE("(%p)\n", This
);
2994 if (!This
->attr_count
) return S_FALSE
;
2997 return reader_move_to_first_attribute(This
);
2999 next
= list_next(&This
->attrs
, &This
->attr
->entry
);
3001 reader_set_current_attribute(This
, LIST_ENTRY(next
, struct attribute
, entry
));
3003 return next
? S_OK
: S_FALSE
;
3006 static void reader_get_attribute_ns_uri(xmlreader
*reader
, struct attribute
*attr
, const WCHAR
**uri
, UINT
*len
)
3008 static const WCHAR xmlns_uriW
[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
3009 '2','0','0','0','/','x','m','l','n','s','/',0};
3010 static const WCHAR xml_uriW
[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
3011 'X','M','L','/','1','9','9','8','/','n','a','m','e','s','p','a','c','e',0};
3013 /* Check for reserved prefixes first */
3014 if ((strval_eq(reader
, &attr
->prefix
, &strval_empty
) && strval_eq(reader
, &attr
->localname
, &strval_xmlns
)) ||
3015 strval_eq(reader
, &attr
->prefix
, &strval_xmlns
))
3018 *len
= ARRAY_SIZE(xmlns_uriW
) - 1;
3020 else if (strval_eq(reader
, &attr
->prefix
, &strval_xml
))
3023 *len
= ARRAY_SIZE(xml_uriW
) - 1;
3035 if ((ns
= reader_lookup_ns(reader
, &attr
->prefix
)))
3048 static void reader_get_attribute_local_name(xmlreader
*reader
, struct attribute
*attr
, const WCHAR
**name
, UINT
*len
)
3050 if (attr
->flags
& ATTRIBUTE_DEFAULT_NS_DEFINITION
)
3055 else if (attr
->flags
& ATTRIBUTE_NS_DEFINITION
)
3057 const struct ns
*ns
= reader_lookup_ns(reader
, &attr
->localname
);
3058 *name
= ns
->prefix
.str
;
3059 *len
= ns
->prefix
.len
;
3063 *name
= attr
->localname
.str
;
3064 *len
= attr
->localname
.len
;
3068 static HRESULT WINAPI
xmlreader_MoveToAttributeByName(IXmlReader
* iface
,
3069 const WCHAR
*local_name
, const WCHAR
*namespace_uri
)
3071 xmlreader
*This
= impl_from_IXmlReader(iface
);
3072 UINT target_name_len
, target_uri_len
;
3073 struct attribute
*attr
;
3075 TRACE("(%p)->(%s %s)\n", This
, debugstr_w(local_name
), debugstr_w(namespace_uri
));
3078 return E_INVALIDARG
;
3080 if (!This
->attr_count
)
3084 namespace_uri
= emptyW
;
3086 target_name_len
= lstrlenW(local_name
);
3087 target_uri_len
= lstrlenW(namespace_uri
);
3089 LIST_FOR_EACH_ENTRY(attr
, &This
->attrs
, struct attribute
, entry
)
3091 UINT name_len
, uri_len
;
3092 const WCHAR
*name
, *uri
;
3094 reader_get_attribute_local_name(This
, attr
, &name
, &name_len
);
3095 reader_get_attribute_ns_uri(This
, attr
, &uri
, &uri_len
);
3097 if (name_len
== target_name_len
&& uri_len
== target_uri_len
&&
3098 !wcscmp(name
, local_name
) && !wcscmp(uri
, namespace_uri
))
3100 reader_set_current_attribute(This
, attr
);
3108 static HRESULT WINAPI
xmlreader_MoveToElement(IXmlReader
* iface
)
3110 xmlreader
*This
= impl_from_IXmlReader(iface
);
3112 TRACE("(%p)\n", This
);
3114 if (!This
->attr_count
) return S_FALSE
;
3117 reader_dec_depth(This
);
3121 /* FIXME: support other node types with 'attributes' like DTD */
3122 if (This
->is_empty_element
) {
3123 reader_set_strvalue(This
, StringValue_Prefix
, &This
->empty_element
.prefix
);
3124 reader_set_strvalue(This
, StringValue_QualifiedName
, &This
->empty_element
.qname
);
3127 struct element
*element
= LIST_ENTRY(list_head(&This
->elements
), struct element
, entry
);
3129 reader_set_strvalue(This
, StringValue_Prefix
, &element
->prefix
);
3130 reader_set_strvalue(This
, StringValue_QualifiedName
, &element
->qname
);
3133 This
->chunk_read_off
= 0;
3134 reader_set_strvalue(This
, StringValue_Value
, &strval_empty
);
3139 static HRESULT WINAPI
xmlreader_GetQualifiedName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
3141 xmlreader
*This
= impl_from_IXmlReader(iface
);
3142 struct attribute
*attribute
= This
->attr
;
3143 struct element
*element
;
3146 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
3151 switch (reader_get_nodetype(This
))
3153 case XmlNodeType_Text
:
3154 case XmlNodeType_CDATA
:
3155 case XmlNodeType_Comment
:
3156 case XmlNodeType_Whitespace
:
3160 case XmlNodeType_Element
:
3161 case XmlNodeType_EndElement
:
3162 element
= reader_get_element(This
);
3163 if (element
->prefix
.len
)
3165 *name
= element
->qname
.str
;
3166 *len
= element
->qname
.len
;
3170 *name
= element
->localname
.str
;
3171 *len
= element
->localname
.len
;
3174 case XmlNodeType_Attribute
:
3175 if (attribute
->flags
& ATTRIBUTE_DEFAULT_NS_DEFINITION
)
3179 } else if (attribute
->prefix
.len
)
3181 *name
= This
->strvalues
[StringValue_QualifiedName
].str
;
3182 *len
= This
->strvalues
[StringValue_QualifiedName
].len
;
3186 *name
= attribute
->localname
.str
;
3187 *len
= attribute
->localname
.len
;
3191 *name
= This
->strvalues
[StringValue_QualifiedName
].str
;
3192 *len
= This
->strvalues
[StringValue_QualifiedName
].len
;
3199 static struct ns
*reader_lookup_nsdef(xmlreader
*reader
)
3201 if (list_empty(&reader
->nsdef
))
3204 return LIST_ENTRY(list_head(&reader
->nsdef
), struct ns
, entry
);
3207 static HRESULT WINAPI
xmlreader_GetNamespaceUri(IXmlReader
* iface
, const WCHAR
**uri
, UINT
*len
)
3209 xmlreader
*This
= impl_from_IXmlReader(iface
);
3210 const strval
*prefix
= &This
->strvalues
[StringValue_Prefix
];
3211 XmlNodeType nodetype
;
3215 TRACE("(%p %p %p)\n", iface
, uri
, len
);
3220 switch ((nodetype
= reader_get_nodetype(This
)))
3222 case XmlNodeType_Attribute
:
3223 reader_get_attribute_ns_uri(This
, This
->attr
, uri
, len
);
3225 case XmlNodeType_Element
:
3226 case XmlNodeType_EndElement
:
3228 ns
= reader_lookup_ns(This
, prefix
);
3230 /* pick top default ns if any */
3232 ns
= reader_lookup_nsdef(This
);
3244 case XmlNodeType_Text
:
3245 case XmlNodeType_CDATA
:
3246 case XmlNodeType_ProcessingInstruction
:
3247 case XmlNodeType_Comment
:
3248 case XmlNodeType_Whitespace
:
3249 case XmlNodeType_XmlDeclaration
:
3254 FIXME("Unhandled node type %d\n", nodetype
);
3263 static HRESULT WINAPI
xmlreader_GetLocalName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
3265 xmlreader
*This
= impl_from_IXmlReader(iface
);
3266 struct element
*element
;
3269 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
3274 switch (reader_get_nodetype(This
))
3276 case XmlNodeType_Text
:
3277 case XmlNodeType_CDATA
:
3278 case XmlNodeType_Comment
:
3279 case XmlNodeType_Whitespace
:
3283 case XmlNodeType_Element
:
3284 case XmlNodeType_EndElement
:
3285 element
= reader_get_element(This
);
3286 *name
= element
->localname
.str
;
3287 *len
= element
->localname
.len
;
3289 case XmlNodeType_Attribute
:
3290 reader_get_attribute_local_name(This
, This
->attr
, name
, len
);
3293 *name
= This
->strvalues
[StringValue_LocalName
].str
;
3294 *len
= This
->strvalues
[StringValue_LocalName
].len
;
3301 static HRESULT WINAPI
xmlreader_GetPrefix(IXmlReader
* iface
, const WCHAR
**ret
, UINT
*len
)
3303 xmlreader
*This
= impl_from_IXmlReader(iface
);
3304 XmlNodeType nodetype
;
3307 TRACE("(%p)->(%p %p)\n", This
, ret
, len
);
3315 switch ((nodetype
= reader_get_nodetype(This
)))
3317 case XmlNodeType_Element
:
3318 case XmlNodeType_EndElement
:
3319 case XmlNodeType_Attribute
:
3321 const strval
*prefix
= &This
->strvalues
[StringValue_Prefix
];
3324 if (strval_eq(This
, prefix
, &strval_xml
))
3329 else if (strval_eq(This
, prefix
, &strval_xmlns
))
3334 else if ((ns
= reader_lookup_ns(This
, prefix
)))
3336 *ret
= ns
->prefix
.str
;
3337 *len
= ns
->prefix
.len
;
3349 static const strval
*reader_get_value(xmlreader
*reader
, BOOL ensure_allocated
)
3353 switch (reader_get_nodetype(reader
))
3355 case XmlNodeType_XmlDeclaration
:
3356 case XmlNodeType_EndElement
:
3357 case XmlNodeType_None
:
3358 return &strval_empty
;
3359 case XmlNodeType_Attribute
:
3360 /* For namespace definition attributes return values from namespace list */
3361 if (reader
->attr
->flags
& (ATTRIBUTE_NS_DEFINITION
| ATTRIBUTE_DEFAULT_NS_DEFINITION
))
3365 if (!(ns
= reader_lookup_ns(reader
, &reader
->attr
->localname
)))
3366 ns
= reader_lookup_nsdef(reader
);
3370 return &reader
->attr
->value
;
3375 val
= &reader
->strvalues
[StringValue_Value
];
3376 if (!val
->str
&& ensure_allocated
)
3378 WCHAR
*ptr
= reader_alloc(reader
, (val
->len
+1)*sizeof(WCHAR
));
3379 if (!ptr
) return NULL
;
3380 memcpy(ptr
, reader_get_strptr(reader
, val
), val
->len
*sizeof(WCHAR
));
3388 static HRESULT WINAPI
xmlreader_GetValue(IXmlReader
* iface
, const WCHAR
**value
, UINT
*len
)
3390 xmlreader
*reader
= impl_from_IXmlReader(iface
);
3391 const strval
*val
= &reader
->strvalues
[StringValue_Value
];
3394 TRACE("(%p)->(%p %p)\n", reader
, value
, len
);
3398 if ((reader
->nodetype
== XmlNodeType_Comment
&& !val
->str
&& !val
->len
) || is_reader_pending(reader
))
3403 hr
= IXmlReader_Read(iface
, &type
);
3404 if (FAILED(hr
)) return hr
;
3406 /* return if still pending, partially read values are not reported */
3407 if (is_reader_pending(reader
)) return E_PENDING
;
3410 val
= reader_get_value(reader
, TRUE
);
3412 return E_OUTOFMEMORY
;
3414 off
= abs(reader
->chunk_read_off
);
3415 assert(off
<= val
->len
);
3416 *value
= val
->str
+ off
;
3417 if (len
) *len
= val
->len
- off
;
3418 reader
->chunk_read_off
= -off
;
3422 static HRESULT WINAPI
xmlreader_ReadValueChunk(IXmlReader
* iface
, WCHAR
*buffer
, UINT chunk_size
, UINT
*read
)
3424 xmlreader
*reader
= impl_from_IXmlReader(iface
);
3428 TRACE("(%p)->(%p %u %p)\n", reader
, buffer
, chunk_size
, read
);
3430 val
= reader_get_value(reader
, FALSE
);
3432 /* If value is already read by GetValue, chunk_read_off is negative and chunked reads are not possible. */
3433 if (reader
->chunk_read_off
>= 0)
3435 assert(reader
->chunk_read_off
<= val
->len
);
3436 len
= min(val
->len
- reader
->chunk_read_off
, chunk_size
);
3438 if (read
) *read
= len
;
3442 memcpy(buffer
, reader_get_strptr(reader
, val
) + reader
->chunk_read_off
, len
*sizeof(WCHAR
));
3443 reader
->chunk_read_off
+= len
;
3446 return len
|| !chunk_size
? S_OK
: S_FALSE
;
3449 static HRESULT WINAPI
xmlreader_GetBaseUri(IXmlReader
* iface
,
3451 UINT
*baseUri_length
)
3453 FIXME("(%p %p %p): stub\n", iface
, baseUri
, baseUri_length
);
3457 static BOOL WINAPI
xmlreader_IsDefault(IXmlReader
* iface
)
3459 FIXME("(%p): stub\n", iface
);
3463 static BOOL WINAPI
xmlreader_IsEmptyElement(IXmlReader
* iface
)
3465 xmlreader
*This
= impl_from_IXmlReader(iface
);
3466 TRACE("(%p)\n", This
);
3467 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
3468 when current node is start tag of an element */
3469 return (reader_get_nodetype(This
) == XmlNodeType_Element
) ? This
->is_empty_element
: FALSE
;
3472 static HRESULT WINAPI
xmlreader_GetLineNumber(IXmlReader
* iface
, UINT
*line_number
)
3474 xmlreader
*This
= impl_from_IXmlReader(iface
);
3475 const struct element
*element
;
3477 TRACE("(%p %p)\n", This
, line_number
);
3480 return E_INVALIDARG
;
3482 switch (reader_get_nodetype(This
))
3484 case XmlNodeType_Element
:
3485 case XmlNodeType_EndElement
:
3486 element
= reader_get_element(This
);
3487 *line_number
= element
->position
.line_number
;
3489 case XmlNodeType_Attribute
:
3490 *line_number
= This
->attr
->position
.line_number
;
3492 case XmlNodeType_Whitespace
:
3493 case XmlNodeType_XmlDeclaration
:
3494 *line_number
= This
->empty_element
.position
.line_number
;
3497 *line_number
= This
->position
.line_number
;
3501 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
3504 static HRESULT WINAPI
xmlreader_GetLinePosition(IXmlReader
* iface
, UINT
*line_position
)
3506 xmlreader
*This
= impl_from_IXmlReader(iface
);
3507 const struct element
*element
;
3509 TRACE("(%p %p)\n", This
, line_position
);
3512 return E_INVALIDARG
;
3514 switch (reader_get_nodetype(This
))
3516 case XmlNodeType_Element
:
3517 case XmlNodeType_EndElement
:
3518 element
= reader_get_element(This
);
3519 *line_position
= element
->position
.line_position
;
3521 case XmlNodeType_Attribute
:
3522 *line_position
= This
->attr
->position
.line_position
;
3524 case XmlNodeType_Whitespace
:
3525 case XmlNodeType_XmlDeclaration
:
3526 *line_position
= This
->empty_element
.position
.line_position
;
3529 *line_position
= This
->position
.line_position
;
3533 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
3536 static HRESULT WINAPI
xmlreader_GetAttributeCount(IXmlReader
* iface
, UINT
*count
)
3538 xmlreader
*This
= impl_from_IXmlReader(iface
);
3540 TRACE("(%p)->(%p)\n", This
, count
);
3542 if (!count
) return E_INVALIDARG
;
3544 *count
= This
->attr_count
;
3548 static HRESULT WINAPI
xmlreader_GetDepth(IXmlReader
* iface
, UINT
*depth
)
3550 xmlreader
*This
= impl_from_IXmlReader(iface
);
3551 TRACE("(%p)->(%p)\n", This
, depth
);
3552 *depth
= This
->depth
;
3556 static BOOL WINAPI
xmlreader_IsEOF(IXmlReader
* iface
)
3558 xmlreader
*This
= impl_from_IXmlReader(iface
);
3559 TRACE("(%p)\n", iface
);
3560 return This
->state
== XmlReadState_EndOfFile
;
3563 static const struct IXmlReaderVtbl xmlreader_vtbl
=
3565 xmlreader_QueryInterface
,
3569 xmlreader_GetProperty
,
3570 xmlreader_SetProperty
,
3572 xmlreader_GetNodeType
,
3573 xmlreader_MoveToFirstAttribute
,
3574 xmlreader_MoveToNextAttribute
,
3575 xmlreader_MoveToAttributeByName
,
3576 xmlreader_MoveToElement
,
3577 xmlreader_GetQualifiedName
,
3578 xmlreader_GetNamespaceUri
,
3579 xmlreader_GetLocalName
,
3580 xmlreader_GetPrefix
,
3582 xmlreader_ReadValueChunk
,
3583 xmlreader_GetBaseUri
,
3584 xmlreader_IsDefault
,
3585 xmlreader_IsEmptyElement
,
3586 xmlreader_GetLineNumber
,
3587 xmlreader_GetLinePosition
,
3588 xmlreader_GetAttributeCount
,
3593 /** IXmlReaderInput **/
3594 static HRESULT WINAPI
xmlreaderinput_QueryInterface(IXmlReaderInput
*iface
, REFIID riid
, void** ppvObject
)
3596 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
3598 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
3600 if (IsEqualGUID(riid
, &IID_IXmlReaderInput
) ||
3601 IsEqualGUID(riid
, &IID_IUnknown
))
3607 WARN("interface %s not implemented\n", debugstr_guid(riid
));
3609 return E_NOINTERFACE
;
3612 IUnknown_AddRef(iface
);
3617 static ULONG WINAPI
xmlreaderinput_AddRef(IXmlReaderInput
*iface
)
3619 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
3620 ULONG ref
= InterlockedIncrement(&This
->ref
);
3621 TRACE("(%p)->(%d)\n", This
, ref
);
3625 static ULONG WINAPI
xmlreaderinput_Release(IXmlReaderInput
*iface
)
3627 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
3628 LONG ref
= InterlockedDecrement(&This
->ref
);
3630 TRACE("(%p)->(%d)\n", This
, ref
);
3634 IMalloc
*imalloc
= This
->imalloc
;
3635 if (This
->input
) IUnknown_Release(This
->input
);
3636 if (This
->stream
) ISequentialStream_Release(This
->stream
);
3637 if (This
->buffer
) free_input_buffer(This
->buffer
);
3638 readerinput_free(This
, This
->baseuri
);
3639 readerinput_free(This
, This
);
3640 if (imalloc
) IMalloc_Release(imalloc
);
3646 static const struct IUnknownVtbl xmlreaderinputvtbl
=
3648 xmlreaderinput_QueryInterface
,
3649 xmlreaderinput_AddRef
,
3650 xmlreaderinput_Release
3653 HRESULT WINAPI
CreateXmlReader(REFIID riid
, void **obj
, IMalloc
*imalloc
)
3659 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid
), obj
, imalloc
);
3662 reader
= IMalloc_Alloc(imalloc
, sizeof(*reader
));
3664 reader
= heap_alloc(sizeof(*reader
));
3666 return E_OUTOFMEMORY
;
3668 memset(reader
, 0, sizeof(*reader
));
3669 reader
->IXmlReader_iface
.lpVtbl
= &xmlreader_vtbl
;
3671 reader
->state
= XmlReadState_Closed
;
3672 reader
->instate
= XmlReadInState_Initial
;
3673 reader
->resumestate
= XmlReadResumeState_Initial
;
3674 reader
->dtdmode
= DtdProcessing_Prohibit
;
3675 reader
->imalloc
= imalloc
;
3676 if (imalloc
) IMalloc_AddRef(imalloc
);
3677 reader
->nodetype
= XmlNodeType_None
;
3678 list_init(&reader
->attrs
);
3679 list_init(&reader
->nsdef
);
3680 list_init(&reader
->ns
);
3681 list_init(&reader
->elements
);
3682 reader
->max_depth
= 256;
3684 reader
->chunk_read_off
= 0;
3685 for (i
= 0; i
< StringValue_Last
; i
++)
3686 reader
->strvalues
[i
] = strval_empty
;
3688 hr
= IXmlReader_QueryInterface(&reader
->IXmlReader_iface
, riid
, obj
);
3689 IXmlReader_Release(&reader
->IXmlReader_iface
);
3691 TRACE("returning iface %p, hr %#x\n", *obj
, hr
);
3696 HRESULT WINAPI
CreateXmlReaderInputWithEncodingName(IUnknown
*stream
,
3701 IXmlReaderInput
**ppInput
)
3703 xmlreaderinput
*readerinput
;
3706 TRACE("%p %p %s %d %s %p\n", stream
, imalloc
, wine_dbgstr_w(encoding
),
3707 hint
, wine_dbgstr_w(base_uri
), ppInput
);
3709 if (!stream
|| !ppInput
) return E_INVALIDARG
;
3712 readerinput
= IMalloc_Alloc(imalloc
, sizeof(*readerinput
));
3714 readerinput
= heap_alloc(sizeof(*readerinput
));
3715 if(!readerinput
) return E_OUTOFMEMORY
;
3717 readerinput
->IXmlReaderInput_iface
.lpVtbl
= &xmlreaderinputvtbl
;
3718 readerinput
->ref
= 1;
3719 readerinput
->imalloc
= imalloc
;
3720 readerinput
->stream
= NULL
;
3721 if (imalloc
) IMalloc_AddRef(imalloc
);
3722 readerinput
->encoding
= parse_encoding_name(encoding
, -1);
3723 readerinput
->hint
= hint
;
3724 readerinput
->baseuri
= readerinput_strdupW(readerinput
, base_uri
);
3725 readerinput
->pending
= 0;
3727 hr
= alloc_input_buffer(readerinput
);
3730 readerinput_free(readerinput
, readerinput
->baseuri
);
3731 readerinput_free(readerinput
, readerinput
);
3732 if (imalloc
) IMalloc_Release(imalloc
);
3735 IUnknown_QueryInterface(stream
, &IID_IUnknown
, (void**)&readerinput
->input
);
3737 *ppInput
= &readerinput
->IXmlReaderInput_iface
;
3739 TRACE("returning iface %p\n", *ppInput
);