2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 #include "xmllite_private.h"
25 #include <wine/list.h>
26 #include <wine/unicode.h>
28 /* not defined in public headers */
29 DEFINE_GUID(IID_IXmlReaderInput
, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
33 XmlReadInState_Initial
,
34 XmlReadInState_XmlDecl
,
35 XmlReadInState_Misc_DTD
,
37 XmlReadInState_DTD_Misc
,
38 XmlReadInState_Element
,
39 XmlReadInState_Content
,
40 XmlReadInState_MiscEnd
, /* optional Misc at the end of a document */
42 } XmlReaderInternalState
;
44 /* This state denotes where parsing was interrupted by input problem.
45 Reader resumes parsing using this information. */
48 XmlReadResumeState_Initial
,
49 XmlReadResumeState_PITarget
,
50 XmlReadResumeState_PIBody
,
51 XmlReadResumeState_CDATA
,
52 XmlReadResumeState_Comment
,
53 XmlReadResumeState_STag
,
54 XmlReadResumeState_CharData
,
55 XmlReadResumeState_Whitespace
56 } XmlReaderResumeState
;
58 /* saved pointer index to resume from particular input position */
61 XmlReadResume_Name
, /* PITarget, name for NCName, prefix for QName */
62 XmlReadResume_Local
, /* local for QName */
63 XmlReadResume_Body
, /* PI body, comment text, CDATA text, CharData text */
69 StringValue_LocalName
,
71 StringValue_QualifiedName
,
74 } XmlReaderStringValue
;
76 static const WCHAR utf16W
[] = {'U','T','F','-','1','6',0};
77 static const WCHAR utf8W
[] = {'U','T','F','-','8',0};
79 static const WCHAR dblquoteW
[] = {'\"',0};
80 static const WCHAR quoteW
[] = {'\'',0};
81 static const WCHAR ltW
[] = {'<',0};
82 static const WCHAR gtW
[] = {'>',0};
83 static const WCHAR commentW
[] = {'<','!','-','-',0};
84 static const WCHAR piW
[] = {'<','?',0};
86 static const char *debugstr_nodetype(XmlNodeType nodetype
)
88 static const char * const type_names
[] =
97 "ProcessingInstruction",
110 if (nodetype
> _XmlNodeType_Last
)
111 return wine_dbg_sprintf("unknown type=%d", nodetype
);
113 return type_names
[nodetype
];
116 static const char *debugstr_prop(XmlReaderProperty prop
)
118 static const char * const prop_names
[] =
130 if (prop
> _XmlReaderProperty_Last
)
131 return wine_dbg_sprintf("unknown property=%d", prop
);
133 return prop_names
[prop
];
136 struct xml_encoding_data
143 static const struct xml_encoding_data xml_encoding_map
[] = {
144 { utf16W
, XmlEncoding_UTF16
, ~0 },
145 { utf8W
, XmlEncoding_UTF8
, CP_UTF8
}
152 unsigned int allocated
;
153 unsigned int written
;
156 typedef struct input_buffer input_buffer
;
160 IXmlReaderInput IXmlReaderInput_iface
;
162 /* reference passed on IXmlReaderInput creation, is kept when input is created */
165 xml_encoding encoding
;
168 /* stream reference set after SetInput() call from reader,
169 stored as sequential stream, cause currently
170 optimizations possible with IStream aren't implemented */
171 ISequentialStream
*stream
;
172 input_buffer
*buffer
;
173 unsigned int pending
: 1;
176 static const struct IUnknownVtbl xmlreaderinputvtbl
;
178 /* Structure to hold parsed string of specific length.
180 Reader stores node value as 'start' pointer, on request
181 a null-terminated version of it is allocated.
183 To init a strval variable use reader_init_strval(),
184 to set strval as a reader value use reader_set_strval().
188 WCHAR
*str
; /* allocated null-terminated string */
189 UINT len
; /* length in WCHARs, altered after ReadValueChunk */
190 UINT start
; /* input position where value starts */
193 static WCHAR emptyW
[] = {0};
194 static const strval strval_empty
= { emptyW
};
212 IXmlReader IXmlReader_iface
;
214 xmlreaderinput
*input
;
217 XmlReaderInternalState instate
;
218 XmlReaderResumeState resumestate
;
219 XmlNodeType nodetype
;
220 DtdProcessing dtdmode
;
221 UINT line
, pos
; /* reader position in XML stream */
222 struct list attrs
; /* attributes list for current node */
223 struct attribute
*attr
; /* current attribute */
225 struct list elements
;
226 strval strvalues
[StringValue_Last
];
230 UINT resume
[XmlReadResume_Last
]; /* offsets used to resume reader */
235 encoded_buffer utf16
;
236 encoded_buffer encoded
;
238 xmlreaderinput
*input
;
241 static inline xmlreader
*impl_from_IXmlReader(IXmlReader
*iface
)
243 return CONTAINING_RECORD(iface
, xmlreader
, IXmlReader_iface
);
246 static inline xmlreaderinput
*impl_from_IXmlReaderInput(IXmlReaderInput
*iface
)
248 return CONTAINING_RECORD(iface
, xmlreaderinput
, IXmlReaderInput_iface
);
251 static inline void *m_realloc(IMalloc
*imalloc
, void *mem
, size_t len
)
254 return IMalloc_Realloc(imalloc
, mem
, len
);
256 return heap_realloc(mem
, len
);
259 /* reader memory allocation functions */
260 static inline void *reader_alloc(xmlreader
*reader
, size_t len
)
262 return m_alloc(reader
->imalloc
, len
);
265 static inline void reader_free(xmlreader
*reader
, void *mem
)
267 m_free(reader
->imalloc
, mem
);
270 /* Just return pointer from offset, no attempt to read more. */
271 static inline WCHAR
*reader_get_ptr2(const xmlreader
*reader
, UINT offset
)
273 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
274 return (WCHAR
*)buffer
->data
+ offset
;
277 static inline WCHAR
*reader_get_strptr(const xmlreader
*reader
, const strval
*v
)
279 return v
->str
? v
->str
: reader_get_ptr2(reader
, v
->start
);
282 static HRESULT
reader_strvaldup(xmlreader
*reader
, const strval
*src
, strval
*dest
)
286 if (src
->str
!= strval_empty
.str
)
288 dest
->str
= reader_alloc(reader
, (dest
->len
+1)*sizeof(WCHAR
));
289 if (!dest
->str
) return E_OUTOFMEMORY
;
290 memcpy(dest
->str
, reader_get_strptr(reader
, src
), dest
->len
*sizeof(WCHAR
));
291 dest
->str
[dest
->len
] = 0;
298 /* reader input memory allocation functions */
299 static inline void *readerinput_alloc(xmlreaderinput
*input
, size_t len
)
301 return m_alloc(input
->imalloc
, len
);
304 static inline void *readerinput_realloc(xmlreaderinput
*input
, void *mem
, size_t len
)
306 return m_realloc(input
->imalloc
, mem
, len
);
309 static inline void readerinput_free(xmlreaderinput
*input
, void *mem
)
311 m_free(input
->imalloc
, mem
);
314 static inline WCHAR
*readerinput_strdupW(xmlreaderinput
*input
, const WCHAR
*str
)
321 size
= (strlenW(str
)+1)*sizeof(WCHAR
);
322 ret
= readerinput_alloc(input
, size
);
323 if (ret
) memcpy(ret
, str
, size
);
329 static void reader_clear_attrs(xmlreader
*reader
)
331 struct attribute
*attr
, *attr2
;
332 LIST_FOR_EACH_ENTRY_SAFE(attr
, attr2
, &reader
->attrs
, struct attribute
, entry
)
334 reader_free(reader
, attr
);
336 list_init(&reader
->attrs
);
337 reader
->attr_count
= 0;
341 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
342 while we are on a node with attributes */
343 static HRESULT
reader_add_attr(xmlreader
*reader
, strval
*localname
, strval
*value
)
345 struct attribute
*attr
;
347 attr
= reader_alloc(reader
, sizeof(*attr
));
348 if (!attr
) return E_OUTOFMEMORY
;
350 attr
->localname
= *localname
;
351 attr
->value
= *value
;
352 list_add_tail(&reader
->attrs
, &attr
->entry
);
353 reader
->attr_count
++;
358 /* This one frees stored string value if needed */
359 static void reader_free_strvalued(xmlreader
*reader
, strval
*v
)
361 if (v
->str
!= strval_empty
.str
)
363 reader_free(reader
, v
->str
);
368 /* returns length in WCHARs from 'start' to current buffer offset */
369 static inline UINT
reader_get_len(const xmlreader
*reader
, UINT start
)
371 return reader
->input
->buffer
->utf16
.cur
- start
;
374 static inline void reader_init_strvalue(UINT start
, UINT len
, strval
*v
)
381 static inline const char* debug_strval(const xmlreader
*reader
, const strval
*v
)
383 return debugstr_wn(reader_get_strptr(reader
, v
), v
->len
);
386 /* used to initialize from constant string */
387 static inline void reader_init_cstrvalue(WCHAR
*str
, UINT len
, strval
*v
)
394 static void reader_free_strvalue(xmlreader
*reader
, XmlReaderStringValue type
)
396 reader_free_strvalued(reader
, &reader
->strvalues
[type
]);
399 static void reader_free_strvalues(xmlreader
*reader
)
402 for (type
= 0; type
< StringValue_Last
; type
++)
403 reader_free_strvalue(reader
, type
);
406 /* This helper should only be used to test if strings are the same,
407 it doesn't try to sort. */
408 static inline int strval_eq(const xmlreader
*reader
, const strval
*str1
, const strval
*str2
)
410 if (str1
->len
!= str2
->len
) return 0;
411 return !memcmp(reader_get_strptr(reader
, str1
), reader_get_strptr(reader
, str2
), str1
->len
*sizeof(WCHAR
));
414 static void reader_clear_elements(xmlreader
*reader
)
416 struct element
*elem
, *elem2
;
417 LIST_FOR_EACH_ENTRY_SAFE(elem
, elem2
, &reader
->elements
, struct element
, entry
)
419 reader_free_strvalued(reader
, &elem
->qname
);
420 reader_free(reader
, elem
);
422 list_init(&reader
->elements
);
423 reader
->empty_element
= FALSE
;
426 static HRESULT
reader_inc_depth(xmlreader
*reader
)
428 if (++reader
->depth
> reader
->max_depth
) return SC_E_MAXELEMENTDEPTH
;
432 static void reader_dec_depth(xmlreader
*reader
)
434 if (reader
->depth
> 1) reader
->depth
--;
437 static HRESULT
reader_push_element(xmlreader
*reader
, strval
*qname
, strval
*localname
)
439 struct element
*elem
;
442 elem
= reader_alloc(reader
, sizeof(*elem
));
443 if (!elem
) return E_OUTOFMEMORY
;
445 hr
= reader_strvaldup(reader
, qname
, &elem
->qname
);
447 reader_free(reader
, elem
);
451 hr
= reader_strvaldup(reader
, localname
, &elem
->localname
);
454 reader_free_strvalued(reader
, &elem
->qname
);
455 reader_free(reader
, elem
);
459 if (!list_empty(&reader
->elements
))
461 hr
= reader_inc_depth(reader
);
463 reader_free(reader
, elem
);
468 list_add_head(&reader
->elements
, &elem
->entry
);
469 reader
->empty_element
= FALSE
;
473 static void reader_pop_element(xmlreader
*reader
)
475 struct element
*elem
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
479 list_remove(&elem
->entry
);
480 reader_free_strvalued(reader
, &elem
->qname
);
481 reader_free_strvalued(reader
, &elem
->localname
);
482 reader_free(reader
, elem
);
483 reader_dec_depth(reader
);
487 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
488 means node value is to be determined. */
489 static void reader_set_strvalue(xmlreader
*reader
, XmlReaderStringValue type
, const strval
*value
)
491 strval
*v
= &reader
->strvalues
[type
];
493 reader_free_strvalue(reader
, type
);
502 if (value
->str
== strval_empty
.str
)
506 if (type
== StringValue_Value
)
508 /* defer allocation for value string */
510 v
->start
= value
->start
;
515 v
->str
= reader_alloc(reader
, (value
->len
+ 1)*sizeof(WCHAR
));
516 memcpy(v
->str
, reader_get_strptr(reader
, value
), value
->len
*sizeof(WCHAR
));
517 v
->str
[value
->len
] = 0;
523 static inline int is_reader_pending(xmlreader
*reader
)
525 return reader
->input
->pending
;
528 static HRESULT
init_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
530 const int initial_len
= 0x2000;
531 buffer
->data
= readerinput_alloc(input
, initial_len
);
532 if (!buffer
->data
) return E_OUTOFMEMORY
;
534 memset(buffer
->data
, 0, 4);
536 buffer
->allocated
= initial_len
;
542 static void free_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
544 readerinput_free(input
, buffer
->data
);
547 static HRESULT
get_code_page(xml_encoding encoding
, UINT
*cp
)
549 if (encoding
== XmlEncoding_Unknown
)
551 FIXME("unsupported encoding %d\n", encoding
);
555 *cp
= xml_encoding_map
[encoding
].cp
;
560 xml_encoding
parse_encoding_name(const WCHAR
*name
, int len
)
564 if (!name
) return XmlEncoding_Unknown
;
567 max
= sizeof(xml_encoding_map
)/sizeof(struct xml_encoding_data
) - 1;
574 c
= strncmpiW(xml_encoding_map
[n
].name
, name
, len
);
576 c
= strcmpiW(xml_encoding_map
[n
].name
, name
);
578 return xml_encoding_map
[n
].enc
;
586 return XmlEncoding_Unknown
;
589 static HRESULT
alloc_input_buffer(xmlreaderinput
*input
)
591 input_buffer
*buffer
;
594 input
->buffer
= NULL
;
596 buffer
= readerinput_alloc(input
, sizeof(*buffer
));
597 if (!buffer
) return E_OUTOFMEMORY
;
599 buffer
->input
= input
;
600 buffer
->code_page
= ~0; /* code page is unknown at this point */
601 hr
= init_encoded_buffer(input
, &buffer
->utf16
);
603 readerinput_free(input
, buffer
);
607 hr
= init_encoded_buffer(input
, &buffer
->encoded
);
609 free_encoded_buffer(input
, &buffer
->utf16
);
610 readerinput_free(input
, buffer
);
614 input
->buffer
= buffer
;
618 static void free_input_buffer(input_buffer
*buffer
)
620 free_encoded_buffer(buffer
->input
, &buffer
->encoded
);
621 free_encoded_buffer(buffer
->input
, &buffer
->utf16
);
622 readerinput_free(buffer
->input
, buffer
);
625 static void readerinput_release_stream(xmlreaderinput
*readerinput
)
627 if (readerinput
->stream
) {
628 ISequentialStream_Release(readerinput
->stream
);
629 readerinput
->stream
= NULL
;
633 /* Queries already stored interface for IStream/ISequentialStream.
634 Interface supplied on creation will be overwritten */
635 static HRESULT
readerinput_query_for_stream(xmlreaderinput
*readerinput
)
639 readerinput_release_stream(readerinput
);
640 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_IStream
, (void**)&readerinput
->stream
);
642 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_ISequentialStream
, (void**)&readerinput
->stream
);
647 /* reads a chunk to raw buffer */
648 static HRESULT
readerinput_growraw(xmlreaderinput
*readerinput
)
650 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
651 /* to make sure aligned length won't exceed allocated length */
652 ULONG len
= buffer
->allocated
- buffer
->written
- 4;
656 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
657 variable width encodings like UTF-8 */
658 len
= (len
+ 3) & ~3;
659 /* try to use allocated space or grow */
660 if (buffer
->allocated
- buffer
->written
< len
)
662 buffer
->allocated
*= 2;
663 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, buffer
->allocated
);
664 len
= buffer
->allocated
- buffer
->written
;
668 hr
= ISequentialStream_Read(readerinput
->stream
, buffer
->data
+ buffer
->written
, len
, &read
);
669 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer
->written
, buffer
->allocated
, len
, read
, hr
);
670 readerinput
->pending
= hr
== E_PENDING
;
671 if (FAILED(hr
)) return hr
;
672 buffer
->written
+= read
;
677 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
678 static void readerinput_grow(xmlreaderinput
*readerinput
, int length
)
680 encoded_buffer
*buffer
= &readerinput
->buffer
->utf16
;
682 length
*= sizeof(WCHAR
);
683 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
684 if (buffer
->allocated
< buffer
->written
+ length
+ 4)
686 int grown_size
= max(2*buffer
->allocated
, buffer
->allocated
+ length
);
687 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, grown_size
);
688 buffer
->allocated
= grown_size
;
692 static inline BOOL
readerinput_is_utf8(xmlreaderinput
*readerinput
)
694 static const char startA
[] = {'<','?'};
695 static const char commentA
[] = {'<','!'};
696 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
697 unsigned char *ptr
= (unsigned char*)buffer
->data
;
699 return !memcmp(buffer
->data
, startA
, sizeof(startA
)) ||
700 !memcmp(buffer
->data
, commentA
, sizeof(commentA
)) ||
701 /* test start byte */
704 (ptr
[1] && (ptr
[1] <= 0x7f)) ||
705 (buffer
->data
[1] >> 5) == 0x6 || /* 2 bytes */
706 (buffer
->data
[1] >> 4) == 0xe || /* 3 bytes */
707 (buffer
->data
[1] >> 3) == 0x1e) /* 4 bytes */
711 static HRESULT
readerinput_detectencoding(xmlreaderinput
*readerinput
, xml_encoding
*enc
)
713 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
714 static const WCHAR startW
[] = {'<','?'};
715 static const WCHAR commentW
[] = {'<','!'};
716 static const char utf8bom
[] = {0xef,0xbb,0xbf};
717 static const char utf16lebom
[] = {0xff,0xfe};
719 *enc
= XmlEncoding_Unknown
;
721 if (buffer
->written
<= 3)
723 HRESULT hr
= readerinput_growraw(readerinput
);
724 if (FAILED(hr
)) return hr
;
725 if (buffer
->written
<= 3) return MX_E_INPUTEND
;
728 /* try start symbols if we have enough data to do that, input buffer should contain
729 first chunk already */
730 if (readerinput_is_utf8(readerinput
))
731 *enc
= XmlEncoding_UTF8
;
732 else if (!memcmp(buffer
->data
, startW
, sizeof(startW
)) ||
733 !memcmp(buffer
->data
, commentW
, sizeof(commentW
)))
734 *enc
= XmlEncoding_UTF16
;
735 /* try with BOM now */
736 else if (!memcmp(buffer
->data
, utf8bom
, sizeof(utf8bom
)))
738 buffer
->cur
+= sizeof(utf8bom
);
739 *enc
= XmlEncoding_UTF8
;
741 else if (!memcmp(buffer
->data
, utf16lebom
, sizeof(utf16lebom
)))
743 buffer
->cur
+= sizeof(utf16lebom
);
744 *enc
= XmlEncoding_UTF16
;
750 static int readerinput_get_utf8_convlen(xmlreaderinput
*readerinput
)
752 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
753 int len
= buffer
->written
;
755 /* complete single byte char */
756 if (!(buffer
->data
[len
-1] & 0x80)) return len
;
758 /* find start byte of multibyte char */
759 while (--len
&& !(buffer
->data
[len
] & 0xc0))
765 /* Returns byte length of complete char sequence for buffer code page,
766 it's relative to current buffer position which is currently used for BOM handling
768 static int readerinput_get_convlen(xmlreaderinput
*readerinput
)
770 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
773 if (readerinput
->buffer
->code_page
== CP_UTF8
)
774 len
= readerinput_get_utf8_convlen(readerinput
);
776 len
= buffer
->written
;
778 TRACE("%d\n", len
- buffer
->cur
);
779 return len
- buffer
->cur
;
782 /* It's possible that raw buffer has some leftovers from last conversion - some char
783 sequence that doesn't represent a full code point. Length argument should be calculated with
784 readerinput_get_convlen(), if it's -1 it will be calculated here. */
785 static void readerinput_shrinkraw(xmlreaderinput
*readerinput
, int len
)
787 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
790 len
= readerinput_get_convlen(readerinput
);
792 memmove(buffer
->data
, buffer
->data
+ buffer
->cur
+ (buffer
->written
- len
), len
);
793 /* everything below cur is lost too */
794 buffer
->written
-= len
+ buffer
->cur
;
795 /* after this point we don't need cur offset really,
796 it's used only to mark where actual data begins when first chunk is read */
800 /* note that raw buffer content is kept */
801 static void readerinput_switchencoding(xmlreaderinput
*readerinput
, xml_encoding enc
)
803 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
804 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
810 hr
= get_code_page(enc
, &cp
);
811 if (FAILED(hr
)) return;
813 readerinput
->buffer
->code_page
= cp
;
814 len
= readerinput_get_convlen(readerinput
);
816 TRACE("switching to cp %d\n", cp
);
818 /* just copy in this case */
819 if (enc
== XmlEncoding_UTF16
)
821 readerinput_grow(readerinput
, len
);
822 memcpy(dest
->data
, src
->data
+ src
->cur
, len
);
823 dest
->written
+= len
*sizeof(WCHAR
);
827 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
828 readerinput_grow(readerinput
, dest_len
);
829 ptr
= (WCHAR
*)dest
->data
;
830 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
832 dest
->written
+= dest_len
*sizeof(WCHAR
);
835 /* shrinks parsed data a buffer begins with */
836 static void reader_shrink(xmlreader
*reader
)
838 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
840 /* avoid to move too often using threshold shrink length */
841 if (buffer
->cur
*sizeof(WCHAR
) > buffer
->written
/ 2)
843 buffer
->written
-= buffer
->cur
*sizeof(WCHAR
);
844 memmove(buffer
->data
, (WCHAR
*)buffer
->data
+ buffer
->cur
, buffer
->written
);
846 *(WCHAR
*)&buffer
->data
[buffer
->written
] = 0;
850 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
851 It won't attempt to shrink but will grow destination buffer if needed */
852 static HRESULT
reader_more(xmlreader
*reader
)
854 xmlreaderinput
*readerinput
= reader
->input
;
855 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
856 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
857 UINT cp
= readerinput
->buffer
->code_page
;
862 /* get some raw data from stream first */
863 hr
= readerinput_growraw(readerinput
);
864 len
= readerinput_get_convlen(readerinput
);
866 /* just copy for UTF-16 case */
869 readerinput_grow(readerinput
, len
);
870 memcpy(dest
->data
+ dest
->written
, src
->data
+ src
->cur
, len
);
871 dest
->written
+= len
*sizeof(WCHAR
);
875 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
876 readerinput_grow(readerinput
, dest_len
);
877 ptr
= (WCHAR
*)(dest
->data
+ dest
->written
);
878 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
880 dest
->written
+= dest_len
*sizeof(WCHAR
);
881 /* get rid of processed data */
882 readerinput_shrinkraw(readerinput
, len
);
887 static inline UINT
reader_get_cur(xmlreader
*reader
)
889 return reader
->input
->buffer
->utf16
.cur
;
892 static inline WCHAR
*reader_get_ptr(xmlreader
*reader
)
894 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
895 WCHAR
*ptr
= (WCHAR
*)buffer
->data
+ buffer
->cur
;
896 if (!*ptr
) reader_more(reader
);
897 return (WCHAR
*)buffer
->data
+ buffer
->cur
;
900 static int reader_cmp(xmlreader
*reader
, const WCHAR
*str
)
902 const WCHAR
*ptr
= reader_get_ptr(reader
);
903 return strncmpW(str
, ptr
, strlenW(str
));
906 /* moves cursor n WCHARs forward */
907 static void reader_skipn(xmlreader
*reader
, int n
)
909 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
910 const WCHAR
*ptr
= reader_get_ptr(reader
);
912 while (*ptr
++ && n
--)
919 static inline BOOL
is_wchar_space(WCHAR ch
)
921 return ch
== ' ' || ch
== '\t' || ch
== '\r' || ch
== '\n';
924 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
925 static int reader_skipspaces(xmlreader
*reader
)
927 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
928 const WCHAR
*ptr
= reader_get_ptr(reader
);
929 UINT start
= reader_get_cur(reader
);
931 while (is_wchar_space(*ptr
))
935 else if (*ptr
== '\n')
944 ptr
= reader_get_ptr(reader
);
947 return reader_get_cur(reader
) - start
;
950 /* [26] VersionNum ::= '1.' [0-9]+ */
951 static HRESULT
reader_parse_versionnum(xmlreader
*reader
, strval
*val
)
953 static const WCHAR onedotW
[] = {'1','.',0};
957 if (reader_cmp(reader
, onedotW
)) return WC_E_XMLDECL
;
959 start
= reader_get_cur(reader
);
961 reader_skipn(reader
, 2);
963 ptr2
= ptr
= reader_get_ptr(reader
);
964 while (*ptr
>= '0' && *ptr
<= '9')
966 reader_skipn(reader
, 1);
967 ptr
= reader_get_ptr(reader
);
970 if (ptr2
== ptr
) return WC_E_DIGIT
;
971 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, val
);
972 TRACE("version=%s\n", debug_strval(reader
, val
));
976 /* [25] Eq ::= S? '=' S? */
977 static HRESULT
reader_parse_eq(xmlreader
*reader
)
979 static const WCHAR eqW
[] = {'=',0};
980 reader_skipspaces(reader
);
981 if (reader_cmp(reader
, eqW
)) return WC_E_EQUAL
;
983 reader_skipn(reader
, 1);
984 reader_skipspaces(reader
);
988 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
989 static HRESULT
reader_parse_versioninfo(xmlreader
*reader
)
991 static const WCHAR versionW
[] = {'v','e','r','s','i','o','n',0};
995 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
997 if (reader_cmp(reader
, versionW
)) return WC_E_XMLDECL
;
998 reader_init_strvalue(reader_get_cur(reader
), 7, &name
);
1000 reader_skipn(reader
, 7);
1002 hr
= reader_parse_eq(reader
);
1003 if (FAILED(hr
)) return hr
;
1005 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1008 reader_skipn(reader
, 1);
1010 hr
= reader_parse_versionnum(reader
, &val
);
1011 if (FAILED(hr
)) return hr
;
1013 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1017 reader_skipn(reader
, 1);
1019 return reader_add_attr(reader
, &name
, &val
);
1022 /* ([A-Za-z0-9._] | '-') */
1023 static inline BOOL
is_wchar_encname(WCHAR ch
)
1025 return ((ch
>= 'A' && ch
<= 'Z') ||
1026 (ch
>= 'a' && ch
<= 'z') ||
1027 (ch
>= '0' && ch
<= '9') ||
1028 (ch
== '.') || (ch
== '_') ||
1032 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1033 static HRESULT
reader_parse_encname(xmlreader
*reader
, strval
*val
)
1035 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
1039 if ((*start
< 'A' || *start
> 'Z') && (*start
< 'a' || *start
> 'z'))
1040 return WC_E_ENCNAME
;
1042 val
->start
= reader_get_cur(reader
);
1045 while (is_wchar_encname(*++ptr
))
1049 enc
= parse_encoding_name(start
, len
);
1050 TRACE("encoding name %s\n", debugstr_wn(start
, len
));
1054 if (enc
== XmlEncoding_Unknown
)
1055 return WC_E_ENCNAME
;
1057 /* skip encoding name */
1058 reader_skipn(reader
, len
);
1062 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1063 static HRESULT
reader_parse_encdecl(xmlreader
*reader
)
1065 static const WCHAR encodingW
[] = {'e','n','c','o','d','i','n','g',0};
1069 if (!reader_skipspaces(reader
)) return S_FALSE
;
1071 if (reader_cmp(reader
, encodingW
)) return S_FALSE
;
1072 name
.str
= reader_get_ptr(reader
);
1073 name
.start
= reader_get_cur(reader
);
1075 /* skip 'encoding' */
1076 reader_skipn(reader
, 8);
1078 hr
= reader_parse_eq(reader
);
1079 if (FAILED(hr
)) return hr
;
1081 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1084 reader_skipn(reader
, 1);
1086 hr
= reader_parse_encname(reader
, &val
);
1087 if (FAILED(hr
)) return hr
;
1089 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1093 reader_skipn(reader
, 1);
1095 return reader_add_attr(reader
, &name
, &val
);
1098 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1099 static HRESULT
reader_parse_sddecl(xmlreader
*reader
)
1101 static const WCHAR standaloneW
[] = {'s','t','a','n','d','a','l','o','n','e',0};
1102 static const WCHAR yesW
[] = {'y','e','s',0};
1103 static const WCHAR noW
[] = {'n','o',0};
1108 if (!reader_skipspaces(reader
)) return S_FALSE
;
1110 if (reader_cmp(reader
, standaloneW
)) return S_FALSE
;
1111 reader_init_strvalue(reader_get_cur(reader
), 10, &name
);
1112 /* skip 'standalone' */
1113 reader_skipn(reader
, 10);
1115 hr
= reader_parse_eq(reader
);
1116 if (FAILED(hr
)) return hr
;
1118 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1121 reader_skipn(reader
, 1);
1123 if (reader_cmp(reader
, yesW
) && reader_cmp(reader
, noW
))
1124 return WC_E_XMLDECL
;
1126 start
= reader_get_cur(reader
);
1127 /* skip 'yes'|'no' */
1128 reader_skipn(reader
, reader_cmp(reader
, yesW
) ? 2 : 3);
1129 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &val
);
1130 TRACE("standalone=%s\n", debug_strval(reader
, &val
));
1132 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1135 reader_skipn(reader
, 1);
1137 return reader_add_attr(reader
, &name
, &val
);
1140 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1141 static HRESULT
reader_parse_xmldecl(xmlreader
*reader
)
1143 static const WCHAR xmldeclW
[] = {'<','?','x','m','l',' ',0};
1144 static const WCHAR declcloseW
[] = {'?','>',0};
1147 /* check if we have "<?xml " */
1148 if (reader_cmp(reader
, xmldeclW
)) return S_FALSE
;
1150 reader_skipn(reader
, 5);
1151 hr
= reader_parse_versioninfo(reader
);
1155 hr
= reader_parse_encdecl(reader
);
1159 hr
= reader_parse_sddecl(reader
);
1163 reader_skipspaces(reader
);
1164 if (reader_cmp(reader
, declcloseW
)) return WC_E_XMLDECL
;
1165 reader_skipn(reader
, 2);
1167 reader_inc_depth(reader
);
1168 reader
->nodetype
= XmlNodeType_XmlDeclaration
;
1169 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1170 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1171 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1176 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1177 static HRESULT
reader_parse_comment(xmlreader
*reader
)
1182 if (reader
->resumestate
== XmlReadResumeState_Comment
)
1184 start
= reader
->resume
[XmlReadResume_Body
];
1185 ptr
= reader_get_ptr(reader
);
1190 reader_skipn(reader
, 4);
1191 reader_shrink(reader
);
1192 ptr
= reader_get_ptr(reader
);
1193 start
= reader_get_cur(reader
);
1194 reader
->nodetype
= XmlNodeType_Comment
;
1195 reader
->resume
[XmlReadResume_Body
] = start
;
1196 reader
->resumestate
= XmlReadResumeState_Comment
;
1197 reader_set_strvalue(reader
, StringValue_LocalName
, NULL
);
1198 reader_set_strvalue(reader
, StringValue_QualifiedName
, NULL
);
1199 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
1202 /* will exit when there's no more data, it won't attempt to
1203 read more from stream */
1214 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1215 TRACE("%s\n", debug_strval(reader
, &value
));
1217 /* skip rest of markup '->' */
1218 reader_skipn(reader
, 3);
1220 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1221 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1222 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1223 reader
->resume
[XmlReadResume_Body
] = 0;
1224 reader
->resumestate
= XmlReadResumeState_Initial
;
1228 return WC_E_COMMENT
;
1232 reader_skipn(reader
, 1);
1239 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1240 static inline BOOL
is_char(WCHAR ch
)
1242 return (ch
== '\t') || (ch
== '\r') || (ch
== '\n') ||
1243 (ch
>= 0x20 && ch
<= 0xd7ff) ||
1244 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1245 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1246 (ch
>= 0xe000 && ch
<= 0xfffd);
1249 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1250 static inline BOOL
is_pubchar(WCHAR ch
)
1252 return (ch
== ' ') ||
1253 (ch
>= 'a' && ch
<= 'z') ||
1254 (ch
>= 'A' && ch
<= 'Z') ||
1255 (ch
>= '0' && ch
<= '9') ||
1256 (ch
>= '-' && ch
<= ';') || /* '()*+,-./:; */
1257 (ch
== '=') || (ch
== '?') ||
1258 (ch
== '@') || (ch
== '!') ||
1259 (ch
>= '#' && ch
<= '%') || /* #$% */
1260 (ch
== '_') || (ch
== '\r') || (ch
== '\n');
1263 static inline BOOL
is_namestartchar(WCHAR ch
)
1265 return (ch
== ':') || (ch
>= 'A' && ch
<= 'Z') ||
1266 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1267 (ch
>= 0xc0 && ch
<= 0xd6) ||
1268 (ch
>= 0xd8 && ch
<= 0xf6) ||
1269 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1270 (ch
>= 0x370 && ch
<= 0x37d) ||
1271 (ch
>= 0x37f && ch
<= 0x1fff) ||
1272 (ch
>= 0x200c && ch
<= 0x200d) ||
1273 (ch
>= 0x2070 && ch
<= 0x218f) ||
1274 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1275 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1276 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1277 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1278 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1279 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1282 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1283 static inline BOOL
is_ncnamechar(WCHAR ch
)
1285 return (ch
>= 'A' && ch
<= 'Z') ||
1286 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1287 (ch
== '-') || (ch
== '.') ||
1288 (ch
>= '0' && ch
<= '9') ||
1290 (ch
>= 0xc0 && ch
<= 0xd6) ||
1291 (ch
>= 0xd8 && ch
<= 0xf6) ||
1292 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1293 (ch
>= 0x300 && ch
<= 0x36f) ||
1294 (ch
>= 0x370 && ch
<= 0x37d) ||
1295 (ch
>= 0x37f && ch
<= 0x1fff) ||
1296 (ch
>= 0x200c && ch
<= 0x200d) ||
1297 (ch
>= 0x203f && ch
<= 0x2040) ||
1298 (ch
>= 0x2070 && ch
<= 0x218f) ||
1299 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1300 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1301 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1302 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1303 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1304 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1307 static inline BOOL
is_namechar(WCHAR ch
)
1309 return (ch
== ':') || is_ncnamechar(ch
);
1312 static XmlNodeType
reader_get_nodetype(const xmlreader
*reader
)
1314 /* When we're on attribute always return attribute type, container node type is kept.
1315 Note that container is not necessarily an element, and attribute doesn't mean it's
1316 an attribute in XML spec terms. */
1317 return reader
->attr
? XmlNodeType_Attribute
: reader
->nodetype
;
1320 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1321 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1322 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1323 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1324 [5] Name ::= NameStartChar (NameChar)* */
1325 static HRESULT
reader_parse_name(xmlreader
*reader
, strval
*name
)
1330 if (reader
->resume
[XmlReadResume_Name
])
1332 start
= reader
->resume
[XmlReadResume_Name
];
1333 ptr
= reader_get_ptr(reader
);
1337 ptr
= reader_get_ptr(reader
);
1338 start
= reader_get_cur(reader
);
1339 if (!is_namestartchar(*ptr
)) return WC_E_NAMECHARACTER
;
1342 while (is_namechar(*ptr
))
1344 reader_skipn(reader
, 1);
1345 ptr
= reader_get_ptr(reader
);
1348 if (is_reader_pending(reader
))
1350 reader
->resume
[XmlReadResume_Name
] = start
;
1354 reader
->resume
[XmlReadResume_Name
] = 0;
1356 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, name
);
1357 TRACE("name %s:%d\n", debug_strval(reader
, name
), name
->len
);
1362 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1363 static HRESULT
reader_parse_pitarget(xmlreader
*reader
, strval
*target
)
1365 static const WCHAR xmlW
[] = {'x','m','l'};
1366 static const strval xmlval
= { (WCHAR
*)xmlW
, 3 };
1372 hr
= reader_parse_name(reader
, &name
);
1373 if (FAILED(hr
)) return is_reader_pending(reader
) ? E_PENDING
: WC_E_PI
;
1375 /* now that we got name check for illegal content */
1376 if (strval_eq(reader
, &name
, &xmlval
))
1377 return WC_E_LEADINGXML
;
1379 /* PITarget can't be a qualified name */
1380 ptr
= reader_get_strptr(reader
, &name
);
1381 for (i
= 0; i
< name
.len
; i
++)
1383 return i
? NC_E_NAMECOLON
: WC_E_PI
;
1385 TRACE("pitarget %s:%d\n", debug_strval(reader
, &name
), name
.len
);
1390 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1391 static HRESULT
reader_parse_pi(xmlreader
*reader
)
1398 switch (reader
->resumestate
)
1400 case XmlReadResumeState_Initial
:
1402 reader_skipn(reader
, 2);
1403 reader_shrink(reader
);
1404 reader
->resumestate
= XmlReadResumeState_PITarget
;
1405 case XmlReadResumeState_PITarget
:
1406 hr
= reader_parse_pitarget(reader
, &target
);
1407 if (FAILED(hr
)) return hr
;
1408 reader_set_strvalue(reader
, StringValue_LocalName
, &target
);
1409 reader_set_strvalue(reader
, StringValue_QualifiedName
, &target
);
1410 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1411 reader
->resumestate
= XmlReadResumeState_PIBody
;
1412 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1417 start
= reader
->resume
[XmlReadResume_Body
];
1418 ptr
= reader_get_ptr(reader
);
1425 UINT cur
= reader_get_cur(reader
);
1428 /* strip all leading whitespace chars */
1431 ptr
= reader_get_ptr2(reader
, start
);
1432 if (!is_wchar_space(*ptr
)) break;
1436 reader_init_strvalue(start
, cur
-start
, &value
);
1439 reader_skipn(reader
, 2);
1440 TRACE("%s\n", debug_strval(reader
, &value
));
1441 reader
->nodetype
= XmlNodeType_ProcessingInstruction
;
1442 reader
->resumestate
= XmlReadResumeState_Initial
;
1443 reader
->resume
[XmlReadResume_Body
] = 0;
1444 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1449 reader_skipn(reader
, 1);
1450 ptr
= reader_get_ptr(reader
);
1456 /* This one is used to parse significant whitespace nodes, like in Misc production */
1457 static HRESULT
reader_parse_whitespace(xmlreader
*reader
)
1459 switch (reader
->resumestate
)
1461 case XmlReadResumeState_Initial
:
1462 reader_shrink(reader
);
1463 reader
->resumestate
= XmlReadResumeState_Whitespace
;
1464 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1465 reader
->nodetype
= XmlNodeType_Whitespace
;
1466 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1467 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1468 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1470 case XmlReadResumeState_Whitespace
:
1475 reader_skipspaces(reader
);
1476 if (is_reader_pending(reader
)) return S_OK
;
1478 start
= reader
->resume
[XmlReadResume_Body
];
1479 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1480 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1481 TRACE("%s\n", debug_strval(reader
, &value
));
1482 reader
->resumestate
= XmlReadResumeState_Initial
;
1491 /* [27] Misc ::= Comment | PI | S */
1492 static HRESULT
reader_parse_misc(xmlreader
*reader
)
1494 HRESULT hr
= S_FALSE
;
1496 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
1498 hr
= reader_more(reader
);
1499 if (FAILED(hr
)) return hr
;
1501 /* finish current node */
1502 switch (reader
->resumestate
)
1504 case XmlReadResumeState_PITarget
:
1505 case XmlReadResumeState_PIBody
:
1506 return reader_parse_pi(reader
);
1507 case XmlReadResumeState_Comment
:
1508 return reader_parse_comment(reader
);
1509 case XmlReadResumeState_Whitespace
:
1510 return reader_parse_whitespace(reader
);
1512 ERR("unknown resume state %d\n", reader
->resumestate
);
1518 const WCHAR
*cur
= reader_get_ptr(reader
);
1520 if (is_wchar_space(*cur
))
1521 hr
= reader_parse_whitespace(reader
);
1522 else if (!reader_cmp(reader
, commentW
))
1523 hr
= reader_parse_comment(reader
);
1524 else if (!reader_cmp(reader
, piW
))
1525 hr
= reader_parse_pi(reader
);
1529 if (hr
!= S_FALSE
) return hr
;
1535 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1536 static HRESULT
reader_parse_sys_literal(xmlreader
*reader
, strval
*literal
)
1538 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1541 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1544 reader_skipn(reader
, 1);
1546 cur
= reader_get_ptr(reader
);
1547 start
= reader_get_cur(reader
);
1548 while (is_char(*cur
) && *cur
!= quote
)
1550 reader_skipn(reader
, 1);
1551 cur
= reader_get_ptr(reader
);
1553 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1554 if (*cur
== quote
) reader_skipn(reader
, 1);
1556 TRACE("%s\n", debug_strval(reader
, literal
));
1560 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1561 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1562 static HRESULT
reader_parse_pub_literal(xmlreader
*reader
, strval
*literal
)
1564 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1567 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1570 reader_skipn(reader
, 1);
1572 start
= reader_get_cur(reader
);
1573 cur
= reader_get_ptr(reader
);
1574 while (is_pubchar(*cur
) && *cur
!= quote
)
1576 reader_skipn(reader
, 1);
1577 cur
= reader_get_ptr(reader
);
1580 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1581 TRACE("%s\n", debug_strval(reader
, literal
));
1585 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1586 static HRESULT
reader_parse_externalid(xmlreader
*reader
)
1588 static WCHAR systemW
[] = {'S','Y','S','T','E','M',0};
1589 static WCHAR publicW
[] = {'P','U','B','L','I','C',0};
1594 if (reader_cmp(reader
, systemW
))
1596 if (reader_cmp(reader
, publicW
))
1603 reader_skipn(reader
, 6);
1604 cnt
= reader_skipspaces(reader
);
1605 if (!cnt
) return WC_E_WHITESPACE
;
1607 hr
= reader_parse_pub_literal(reader
, &pub
);
1608 if (FAILED(hr
)) return hr
;
1610 reader_init_cstrvalue(publicW
, strlenW(publicW
), &name
);
1611 return reader_add_attr(reader
, &name
, &pub
);
1619 reader_skipn(reader
, 6);
1620 cnt
= reader_skipspaces(reader
);
1621 if (!cnt
) return WC_E_WHITESPACE
;
1623 hr
= reader_parse_sys_literal(reader
, &sys
);
1624 if (FAILED(hr
)) return hr
;
1626 reader_init_cstrvalue(systemW
, strlenW(systemW
), &name
);
1627 return reader_add_attr(reader
, &name
, &sys
);
1633 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1634 static HRESULT
reader_parse_dtd(xmlreader
*reader
)
1636 static const WCHAR doctypeW
[] = {'<','!','D','O','C','T','Y','P','E',0};
1641 /* check if we have "<!DOCTYPE" */
1642 if (reader_cmp(reader
, doctypeW
)) return S_FALSE
;
1643 reader_shrink(reader
);
1645 /* DTD processing is not allowed by default */
1646 if (reader
->dtdmode
== DtdProcessing_Prohibit
) return WC_E_DTDPROHIBITED
;
1648 reader_skipn(reader
, 9);
1649 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1652 hr
= reader_parse_name(reader
, &name
);
1653 if (FAILED(hr
)) return WC_E_DECLDOCTYPE
;
1655 reader_skipspaces(reader
);
1657 hr
= reader_parse_externalid(reader
);
1658 if (FAILED(hr
)) return hr
;
1660 reader_skipspaces(reader
);
1662 cur
= reader_get_ptr(reader
);
1665 FIXME("internal subset parsing not implemented\n");
1670 reader_skipn(reader
, 1);
1672 reader
->nodetype
= XmlNodeType_DocumentType
;
1673 reader_set_strvalue(reader
, StringValue_LocalName
, &name
);
1674 reader_set_strvalue(reader
, StringValue_QualifiedName
, &name
);
1679 /* [11 NS] LocalPart ::= NCName */
1680 static HRESULT
reader_parse_local(xmlreader
*reader
, strval
*local
)
1685 if (reader
->resume
[XmlReadResume_Local
])
1687 start
= reader
->resume
[XmlReadResume_Local
];
1688 ptr
= reader_get_ptr(reader
);
1692 ptr
= reader_get_ptr(reader
);
1693 start
= reader_get_cur(reader
);
1696 while (is_ncnamechar(*ptr
))
1698 reader_skipn(reader
, 1);
1699 ptr
= reader_get_ptr(reader
);
1702 if (is_reader_pending(reader
))
1704 reader
->resume
[XmlReadResume_Local
] = start
;
1708 reader
->resume
[XmlReadResume_Local
] = 0;
1710 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, local
);
1715 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1716 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1717 [9 NS] UnprefixedName ::= LocalPart
1718 [10 NS] Prefix ::= NCName */
1719 static HRESULT
reader_parse_qname(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
)
1725 if (reader
->resume
[XmlReadResume_Name
])
1727 start
= reader
->resume
[XmlReadResume_Name
];
1728 ptr
= reader_get_ptr(reader
);
1732 ptr
= reader_get_ptr(reader
);
1733 start
= reader_get_cur(reader
);
1734 reader
->resume
[XmlReadResume_Name
] = start
;
1735 if (!is_ncnamechar(*ptr
)) return NC_E_QNAMECHARACTER
;
1738 if (reader
->resume
[XmlReadResume_Local
])
1740 hr
= reader_parse_local(reader
, local
);
1741 if (FAILED(hr
)) return hr
;
1743 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
],
1744 local
->start
- reader
->resume
[XmlReadResume_Name
] - 1,
1749 /* skip prefix part */
1750 while (is_ncnamechar(*ptr
))
1752 reader_skipn(reader
, 1);
1753 ptr
= reader_get_ptr(reader
);
1756 if (is_reader_pending(reader
)) return E_PENDING
;
1758 /* got a qualified name */
1761 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, prefix
);
1764 reader_skipn(reader
, 1);
1765 hr
= reader_parse_local(reader
, local
);
1766 if (FAILED(hr
)) return hr
;
1770 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
], reader_get_cur(reader
)-reader
->resume
[XmlReadResume_Name
], local
);
1771 reader_init_strvalue(0, 0, prefix
);
1775 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, local
);
1778 TRACE("qname %s:%s\n", debug_strval(reader
, prefix
), debug_strval(reader
, local
));
1780 TRACE("ncname %s\n", debug_strval(reader
, local
));
1782 reader_init_strvalue(prefix
->len
? prefix
->start
: local
->start
,
1784 (prefix
->len
? prefix
->len
+ 1 : 0) + local
->len
,
1787 reader
->resume
[XmlReadResume_Name
] = 0;
1788 reader
->resume
[XmlReadResume_Local
] = 0;
1793 /* Applies normalization rules to a single char, used for attribute values.
1795 Rules include 2 steps:
1797 1) replacing \r\n with a single \n;
1798 2) replacing all whitespace chars with ' '.
1801 static void reader_normalize_space(xmlreader
*reader
, WCHAR
*ptr
)
1803 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1805 if (!is_wchar_space(*ptr
)) return;
1807 if (*ptr
== '\r' && *(ptr
+1) == '\n')
1809 int len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - 2*sizeof(WCHAR
);
1810 memmove(ptr
+1, ptr
+2, len
);
1815 static WCHAR
get_predefined_entity(const xmlreader
*reader
, const strval
*name
)
1817 static const WCHAR entltW
[] = {'l','t'};
1818 static const WCHAR entgtW
[] = {'g','t'};
1819 static const WCHAR entampW
[] = {'a','m','p'};
1820 static const WCHAR entaposW
[] = {'a','p','o','s'};
1821 static const WCHAR entquotW
[] = {'q','u','o','t'};
1822 static const strval lt
= { (WCHAR
*)entltW
, 2 };
1823 static const strval gt
= { (WCHAR
*)entgtW
, 2 };
1824 static const strval amp
= { (WCHAR
*)entampW
, 3 };
1825 static const strval apos
= { (WCHAR
*)entaposW
, 4 };
1826 static const strval quot
= { (WCHAR
*)entquotW
, 4 };
1827 WCHAR
*str
= reader_get_strptr(reader
, name
);
1832 if (strval_eq(reader
, name
, <
)) return '<';
1835 if (strval_eq(reader
, name
, >
)) return '>';
1838 if (strval_eq(reader
, name
, &
))
1840 else if (strval_eq(reader
, name
, &apos
))
1844 if (strval_eq(reader
, name
, "
)) return '\"';
1853 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1854 [67] Reference ::= EntityRef | CharRef
1855 [68] EntityRef ::= '&' Name ';' */
1856 static HRESULT
reader_parse_reference(xmlreader
*reader
)
1858 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1859 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
1860 UINT cur
= reader_get_cur(reader
);
1865 reader_skipn(reader
, 1);
1866 ptr
= reader_get_ptr(reader
);
1870 reader_skipn(reader
, 1);
1871 ptr
= reader_get_ptr(reader
);
1873 /* hex char or decimal */
1876 reader_skipn(reader
, 1);
1877 ptr
= reader_get_ptr(reader
);
1881 if ((*ptr
>= '0' && *ptr
<= '9'))
1882 ch
= ch
*16 + *ptr
- '0';
1883 else if ((*ptr
>= 'a' && *ptr
<= 'f'))
1884 ch
= ch
*16 + *ptr
- 'a' + 10;
1885 else if ((*ptr
>= 'A' && *ptr
<= 'F'))
1886 ch
= ch
*16 + *ptr
- 'A' + 10;
1888 return ch
? WC_E_SEMICOLON
: WC_E_HEXDIGIT
;
1889 reader_skipn(reader
, 1);
1890 ptr
= reader_get_ptr(reader
);
1897 if ((*ptr
>= '0' && *ptr
<= '9'))
1899 ch
= ch
*10 + *ptr
- '0';
1900 reader_skipn(reader
, 1);
1901 ptr
= reader_get_ptr(reader
);
1904 return ch
? WC_E_SEMICOLON
: WC_E_DIGIT
;
1908 if (!is_char(ch
)) return WC_E_XMLCHARACTER
;
1911 if (is_wchar_space(ch
)) ch
= ' ';
1913 len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - sizeof(WCHAR
);
1914 memmove(start
+1, ptr
+1, len
);
1915 buffer
->cur
= cur
+ 1;
1924 hr
= reader_parse_name(reader
, &name
);
1925 if (FAILED(hr
)) return hr
;
1927 ptr
= reader_get_ptr(reader
);
1928 if (*ptr
!= ';') return WC_E_SEMICOLON
;
1930 /* predefined entities resolve to a single character */
1931 ch
= get_predefined_entity(reader
, &name
);
1934 len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - sizeof(WCHAR
);
1935 memmove(start
+1, ptr
+1, len
);
1936 buffer
->cur
= cur
+ 1;
1942 FIXME("undeclared entity %s\n", debug_strval(reader
, &name
));
1943 return WC_E_UNDECLAREDENTITY
;
1951 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
1952 static HRESULT
reader_parse_attvalue(xmlreader
*reader
, strval
*value
)
1957 ptr
= reader_get_ptr(reader
);
1959 /* skip opening quote */
1961 if (quote
!= '\"' && quote
!= '\'') return WC_E_QUOTE
;
1962 reader_skipn(reader
, 1);
1964 ptr
= reader_get_ptr(reader
);
1965 start
= reader_get_cur(reader
);
1968 if (*ptr
== '<') return WC_E_LESSTHAN
;
1972 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, value
);
1973 /* skip closing quote */
1974 reader_skipn(reader
, 1);
1980 HRESULT hr
= reader_parse_reference(reader
);
1981 if (FAILED(hr
)) return hr
;
1985 reader_normalize_space(reader
, ptr
);
1986 reader_skipn(reader
, 1);
1988 ptr
= reader_get_ptr(reader
);
1994 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
1995 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
1996 [3 NS] DefaultAttName ::= 'xmlns'
1997 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
1998 static HRESULT
reader_parse_attribute(xmlreader
*reader
)
2000 static const WCHAR xmlnsW
[] = {'x','m','l','n','s',0};
2001 strval prefix
, local
, qname
, xmlns
, value
;
2004 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2005 if (FAILED(hr
)) return hr
;
2007 reader_init_cstrvalue((WCHAR
*)xmlnsW
, 5, &xmlns
);
2009 if (strval_eq(reader
, &prefix
, &xmlns
))
2011 FIXME("namespace definitions not supported\n");
2015 if (strval_eq(reader
, &qname
, &xmlns
))
2016 FIXME("default namespace definitions not supported\n");
2018 hr
= reader_parse_eq(reader
);
2019 if (FAILED(hr
)) return hr
;
2021 hr
= reader_parse_attvalue(reader
, &value
);
2022 if (FAILED(hr
)) return hr
;
2024 TRACE("%s=%s\n", debug_strval(reader
, &local
), debug_strval(reader
, &value
));
2025 return reader_add_attr(reader
, &local
, &value
);
2028 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2029 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2030 static HRESULT
reader_parse_stag(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
, int *empty
)
2034 hr
= reader_parse_qname(reader
, prefix
, local
, qname
);
2035 if (FAILED(hr
)) return hr
;
2039 static const WCHAR endW
[] = {'/','>',0};
2041 reader_skipspaces(reader
);
2044 if ((*empty
= !reader_cmp(reader
, endW
)))
2047 reader_skipn(reader
, 2);
2048 reader
->empty_element
= TRUE
;
2052 /* got a start tag */
2053 if (!reader_cmp(reader
, gtW
))
2056 reader_skipn(reader
, 1);
2057 return reader_push_element(reader
, qname
, local
);
2060 hr
= reader_parse_attribute(reader
);
2061 if (FAILED(hr
)) return hr
;
2067 /* [39] element ::= EmptyElemTag | STag content ETag */
2068 static HRESULT
reader_parse_element(xmlreader
*reader
)
2072 switch (reader
->resumestate
)
2074 case XmlReadResumeState_Initial
:
2075 /* check if we are really on element */
2076 if (reader_cmp(reader
, ltW
)) return S_FALSE
;
2079 reader_skipn(reader
, 1);
2081 reader_shrink(reader
);
2082 reader
->resumestate
= XmlReadResumeState_STag
;
2083 case XmlReadResumeState_STag
:
2085 strval qname
, prefix
, local
;
2088 /* this handles empty elements too */
2089 hr
= reader_parse_stag(reader
, &prefix
, &local
, &qname
, &empty
);
2090 if (FAILED(hr
)) return hr
;
2092 /* FIXME: need to check for defined namespace to reject invalid prefix,
2093 currently reject all prefixes */
2094 if (prefix
.len
) return NC_E_UNDECLAREDPREFIX
;
2096 /* if we got empty element and stack is empty go straight to Misc */
2097 if (empty
&& list_empty(&reader
->elements
))
2098 reader
->instate
= XmlReadInState_MiscEnd
;
2100 reader
->instate
= XmlReadInState_Content
;
2102 reader
->nodetype
= XmlNodeType_Element
;
2103 reader
->resumestate
= XmlReadResumeState_Initial
;
2104 reader_set_strvalue(reader
, StringValue_LocalName
, &local
);
2105 reader_set_strvalue(reader
, StringValue_Prefix
, &prefix
);
2106 reader_set_strvalue(reader
, StringValue_QualifiedName
, &qname
);
2116 /* [13 NS] ETag ::= '</' QName S? '>' */
2117 static HRESULT
reader_parse_endtag(xmlreader
*reader
)
2119 strval prefix
, local
, qname
;
2120 struct element
*elem
;
2124 reader_skipn(reader
, 2);
2126 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2127 if (FAILED(hr
)) return hr
;
2129 reader_skipspaces(reader
);
2131 if (reader_cmp(reader
, gtW
)) return WC_E_GREATERTHAN
;
2134 reader_skipn(reader
, 1);
2136 /* Element stack should never be empty at this point, cause we shouldn't get to
2137 content parsing if it's empty. */
2138 elem
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
2139 if (!strval_eq(reader
, &elem
->qname
, &qname
)) return WC_E_ELEMENTMATCH
;
2141 reader_pop_element(reader
);
2143 /* It was a root element, the rest is expected as Misc */
2144 if (list_empty(&reader
->elements
))
2145 reader
->instate
= XmlReadInState_MiscEnd
;
2147 reader
->nodetype
= XmlNodeType_EndElement
;
2148 reader_set_strvalue(reader
, StringValue_LocalName
, &local
);
2149 reader_set_strvalue(reader
, StringValue_QualifiedName
, &qname
);
2154 /* [18] CDSect ::= CDStart CData CDEnd
2155 [19] CDStart ::= '<![CDATA['
2156 [20] CData ::= (Char* - (Char* ']]>' Char*))
2157 [21] CDEnd ::= ']]>' */
2158 static HRESULT
reader_parse_cdata(xmlreader
*reader
)
2163 if (reader
->resumestate
== XmlReadResumeState_CDATA
)
2165 start
= reader
->resume
[XmlReadResume_Body
];
2166 ptr
= reader_get_ptr(reader
);
2170 /* skip markup '<![CDATA[' */
2171 reader_skipn(reader
, 9);
2172 reader_shrink(reader
);
2173 ptr
= reader_get_ptr(reader
);
2174 start
= reader_get_cur(reader
);
2175 reader
->nodetype
= XmlNodeType_CDATA
;
2176 reader
->resume
[XmlReadResume_Body
] = start
;
2177 reader
->resumestate
= XmlReadResumeState_CDATA
;
2178 reader_set_strvalue(reader
, StringValue_LocalName
, NULL
);
2179 reader_set_strvalue(reader
, StringValue_QualifiedName
, NULL
);
2180 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2185 if (*ptr
== ']' && *(ptr
+1) == ']' && *(ptr
+2) == '>')
2189 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2192 reader_skipn(reader
, 3);
2193 TRACE("%s\n", debug_strval(reader
, &value
));
2195 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
2196 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
2197 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2198 reader
->resume
[XmlReadResume_Body
] = 0;
2199 reader
->resumestate
= XmlReadResumeState_Initial
;
2204 /* Value normalization is not fully implemented, rules are:
2206 - single '\r' -> '\n';
2207 - sequence '\r\n' -> '\n', in this case value length changes;
2209 if (*ptr
== '\r') *ptr
= '\n';
2210 reader_skipn(reader
, 1);
2218 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2219 static HRESULT
reader_parse_chardata(xmlreader
*reader
)
2224 if (reader
->resumestate
== XmlReadResumeState_CharData
)
2226 start
= reader
->resume
[XmlReadResume_Body
];
2227 ptr
= reader_get_ptr(reader
);
2231 reader_shrink(reader
);
2232 ptr
= reader_get_ptr(reader
);
2233 start
= reader_get_cur(reader
);
2234 /* There's no text */
2235 if (!*ptr
|| *ptr
== '<') return S_OK
;
2236 reader
->nodetype
= is_wchar_space(*ptr
) ? XmlNodeType_Whitespace
: XmlNodeType_Text
;
2237 reader
->resume
[XmlReadResume_Body
] = start
;
2238 reader
->resumestate
= XmlReadResumeState_CharData
;
2239 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
2240 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
2241 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2246 /* CDATA closing sequence ']]>' is not allowed */
2247 if (ptr
[0] == ']' && ptr
[1] == ']' && ptr
[2] == '>')
2248 return WC_E_CDSECTEND
;
2250 /* Found next markup part */
2255 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2256 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2257 reader
->resume
[XmlReadResume_Body
] = 0;
2258 reader
->resumestate
= XmlReadResumeState_Initial
;
2262 reader_skipn(reader
, 1);
2264 /* this covers a case when text has leading whitespace chars */
2265 if (!is_wchar_space(*ptr
)) reader
->nodetype
= XmlNodeType_Text
;
2272 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2273 static HRESULT
reader_parse_content(xmlreader
*reader
)
2275 static const WCHAR cdstartW
[] = {'<','!','[','C','D','A','T','A','[',0};
2276 static const WCHAR etagW
[] = {'<','/',0};
2277 static const WCHAR ampW
[] = {'&',0};
2279 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
2281 switch (reader
->resumestate
)
2283 case XmlReadResumeState_CDATA
:
2284 return reader_parse_cdata(reader
);
2285 case XmlReadResumeState_Comment
:
2286 return reader_parse_comment(reader
);
2287 case XmlReadResumeState_PIBody
:
2288 case XmlReadResumeState_PITarget
:
2289 return reader_parse_pi(reader
);
2290 case XmlReadResumeState_CharData
:
2291 return reader_parse_chardata(reader
);
2293 ERR("unknown resume state %d\n", reader
->resumestate
);
2297 reader_shrink(reader
);
2299 /* handle end tag here, it indicates end of content as well */
2300 if (!reader_cmp(reader
, etagW
))
2301 return reader_parse_endtag(reader
);
2303 if (!reader_cmp(reader
, commentW
))
2304 return reader_parse_comment(reader
);
2306 if (!reader_cmp(reader
, piW
))
2307 return reader_parse_pi(reader
);
2309 if (!reader_cmp(reader
, cdstartW
))
2310 return reader_parse_cdata(reader
);
2312 if (!reader_cmp(reader
, ampW
))
2313 return reader_parse_reference(reader
);
2315 if (!reader_cmp(reader
, ltW
))
2316 return reader_parse_element(reader
);
2318 /* what's left must be CharData */
2319 return reader_parse_chardata(reader
);
2322 static HRESULT
reader_parse_nextnode(xmlreader
*reader
)
2326 if (!is_reader_pending(reader
))
2327 reader_clear_attrs(reader
);
2331 switch (reader
->instate
)
2333 /* if it's a first call for a new input we need to detect stream encoding */
2334 case XmlReadInState_Initial
:
2338 hr
= readerinput_growraw(reader
->input
);
2339 if (FAILED(hr
)) return hr
;
2341 /* try to detect encoding by BOM or data and set input code page */
2342 hr
= readerinput_detectencoding(reader
->input
, &enc
);
2343 TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map
[enc
].name
), hr
);
2344 if (FAILED(hr
)) return hr
;
2346 /* always switch first time cause we have to put something in */
2347 readerinput_switchencoding(reader
->input
, enc
);
2349 /* parse xml declaration */
2350 hr
= reader_parse_xmldecl(reader
);
2351 if (FAILED(hr
)) return hr
;
2353 readerinput_shrinkraw(reader
->input
, -1);
2354 reader
->instate
= XmlReadInState_Misc_DTD
;
2355 if (hr
== S_OK
) return hr
;
2358 case XmlReadInState_Misc_DTD
:
2359 hr
= reader_parse_misc(reader
);
2360 if (FAILED(hr
)) return hr
;
2363 reader
->instate
= XmlReadInState_DTD
;
2367 case XmlReadInState_DTD
:
2368 hr
= reader_parse_dtd(reader
);
2369 if (FAILED(hr
)) return hr
;
2373 reader
->instate
= XmlReadInState_DTD_Misc
;
2377 reader
->instate
= XmlReadInState_Element
;
2379 case XmlReadInState_DTD_Misc
:
2380 hr
= reader_parse_misc(reader
);
2381 if (FAILED(hr
)) return hr
;
2384 reader
->instate
= XmlReadInState_Element
;
2388 case XmlReadInState_Element
:
2389 return reader_parse_element(reader
);
2390 case XmlReadInState_Content
:
2391 return reader_parse_content(reader
);
2392 case XmlReadInState_MiscEnd
:
2393 hr
= reader_parse_misc(reader
);
2394 if (FAILED(hr
)) return hr
;
2397 reader
->instate
= XmlReadInState_Eof
;
2399 case XmlReadInState_Eof
:
2402 FIXME("internal state %d not handled\n", reader
->instate
);
2410 static HRESULT WINAPI
xmlreader_QueryInterface(IXmlReader
*iface
, REFIID riid
, void** ppvObject
)
2412 xmlreader
*This
= impl_from_IXmlReader(iface
);
2414 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
2416 if (IsEqualGUID(riid
, &IID_IUnknown
) ||
2417 IsEqualGUID(riid
, &IID_IXmlReader
))
2423 FIXME("interface %s not implemented\n", debugstr_guid(riid
));
2425 return E_NOINTERFACE
;
2428 IXmlReader_AddRef(iface
);
2433 static ULONG WINAPI
xmlreader_AddRef(IXmlReader
*iface
)
2435 xmlreader
*This
= impl_from_IXmlReader(iface
);
2436 ULONG ref
= InterlockedIncrement(&This
->ref
);
2437 TRACE("(%p)->(%d)\n", This
, ref
);
2441 static ULONG WINAPI
xmlreader_Release(IXmlReader
*iface
)
2443 xmlreader
*This
= impl_from_IXmlReader(iface
);
2444 LONG ref
= InterlockedDecrement(&This
->ref
);
2446 TRACE("(%p)->(%d)\n", This
, ref
);
2450 IMalloc
*imalloc
= This
->imalloc
;
2451 if (This
->input
) IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2452 reader_clear_attrs(This
);
2453 reader_clear_elements(This
);
2454 reader_free_strvalues(This
);
2455 reader_free(This
, This
);
2456 if (imalloc
) IMalloc_Release(imalloc
);
2462 static HRESULT WINAPI
xmlreader_SetInput(IXmlReader
* iface
, IUnknown
*input
)
2464 xmlreader
*This
= impl_from_IXmlReader(iface
);
2465 IXmlReaderInput
*readerinput
;
2468 TRACE("(%p)->(%p)\n", This
, input
);
2472 readerinput_release_stream(This
->input
);
2473 IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2477 This
->line
= This
->pos
= 0;
2478 reader_clear_elements(This
);
2480 This
->resumestate
= XmlReadResumeState_Initial
;
2481 memset(This
->resume
, 0, sizeof(This
->resume
));
2483 /* just reset current input */
2486 This
->state
= XmlReadState_Initial
;
2490 /* now try IXmlReaderInput, ISequentialStream, IStream */
2491 hr
= IUnknown_QueryInterface(input
, &IID_IXmlReaderInput
, (void**)&readerinput
);
2494 if (readerinput
->lpVtbl
== &xmlreaderinputvtbl
)
2495 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2498 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2499 readerinput
, readerinput
->lpVtbl
);
2500 IUnknown_Release(readerinput
);
2506 if (hr
!= S_OK
|| !readerinput
)
2508 /* create IXmlReaderInput basing on supplied interface */
2509 hr
= CreateXmlReaderInputWithEncodingName(input
,
2510 NULL
, NULL
, FALSE
, NULL
, &readerinput
);
2511 if (hr
!= S_OK
) return hr
;
2512 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2515 /* set stream for supplied IXmlReaderInput */
2516 hr
= readerinput_query_for_stream(This
->input
);
2519 This
->state
= XmlReadState_Initial
;
2520 This
->instate
= XmlReadInState_Initial
;
2526 static HRESULT WINAPI
xmlreader_GetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR
*value
)
2528 xmlreader
*This
= impl_from_IXmlReader(iface
);
2530 TRACE("(%p)->(%s %p)\n", This
, debugstr_prop(property
), value
);
2532 if (!value
) return E_INVALIDARG
;
2536 case XmlReaderProperty_DtdProcessing
:
2537 *value
= This
->dtdmode
;
2539 case XmlReaderProperty_ReadState
:
2540 *value
= This
->state
;
2543 FIXME("Unimplemented property (%u)\n", property
);
2550 static HRESULT WINAPI
xmlreader_SetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR value
)
2552 xmlreader
*This
= impl_from_IXmlReader(iface
);
2554 TRACE("(%p)->(%s %lu)\n", This
, debugstr_prop(property
), value
);
2558 case XmlReaderProperty_DtdProcessing
:
2559 if (value
< 0 || value
> _DtdProcessing_Last
) return E_INVALIDARG
;
2560 This
->dtdmode
= value
;
2563 FIXME("Unimplemented property (%u)\n", property
);
2570 static HRESULT WINAPI
xmlreader_Read(IXmlReader
* iface
, XmlNodeType
*nodetype
)
2572 xmlreader
*This
= impl_from_IXmlReader(iface
);
2573 XmlNodeType oldtype
= This
->nodetype
;
2576 TRACE("(%p)->(%p)\n", This
, nodetype
);
2578 if (This
->state
== XmlReadState_Closed
) return S_FALSE
;
2580 hr
= reader_parse_nextnode(This
);
2581 if (oldtype
== XmlNodeType_None
&& This
->nodetype
!= oldtype
)
2582 This
->state
= XmlReadState_Interactive
;
2585 TRACE("node type %s\n", debugstr_nodetype(This
->nodetype
));
2586 *nodetype
= This
->nodetype
;
2592 static HRESULT WINAPI
xmlreader_GetNodeType(IXmlReader
* iface
, XmlNodeType
*node_type
)
2594 xmlreader
*This
= impl_from_IXmlReader(iface
);
2595 TRACE("(%p)->(%p)\n", This
, node_type
);
2597 *node_type
= reader_get_nodetype(This
);
2598 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
2601 static HRESULT WINAPI
xmlreader_MoveToFirstAttribute(IXmlReader
* iface
)
2603 xmlreader
*This
= impl_from_IXmlReader(iface
);
2605 TRACE("(%p)\n", This
);
2607 if (!This
->attr_count
) return S_FALSE
;
2608 This
->attr
= LIST_ENTRY(list_head(&This
->attrs
), struct attribute
, entry
);
2609 reader_set_strvalue(This
, StringValue_LocalName
, &This
->attr
->localname
);
2610 reader_set_strvalue(This
, StringValue_Value
, &This
->attr
->value
);
2615 static HRESULT WINAPI
xmlreader_MoveToNextAttribute(IXmlReader
* iface
)
2617 xmlreader
*This
= impl_from_IXmlReader(iface
);
2618 const struct list
*next
;
2620 TRACE("(%p)\n", This
);
2622 if (!This
->attr_count
) return S_FALSE
;
2625 return IXmlReader_MoveToFirstAttribute(iface
);
2627 next
= list_next(&This
->attrs
, &This
->attr
->entry
);
2630 This
->attr
= LIST_ENTRY(next
, struct attribute
, entry
);
2631 reader_set_strvalue(This
, StringValue_LocalName
, &This
->attr
->localname
);
2632 reader_set_strvalue(This
, StringValue_Value
, &This
->attr
->value
);
2635 return next
? S_OK
: S_FALSE
;
2638 static HRESULT WINAPI
xmlreader_MoveToAttributeByName(IXmlReader
* iface
,
2640 LPCWSTR namespaceUri
)
2642 FIXME("(%p %p %p): stub\n", iface
, local_name
, namespaceUri
);
2646 static HRESULT WINAPI
xmlreader_MoveToElement(IXmlReader
* iface
)
2648 xmlreader
*This
= impl_from_IXmlReader(iface
);
2649 struct element
*elem
;
2651 TRACE("(%p)\n", This
);
2653 if (!This
->attr_count
) return S_FALSE
;
2656 /* FIXME: support other node types with 'attributes' like DTD */
2657 elem
= LIST_ENTRY(list_head(&This
->elements
), struct element
, entry
);
2660 reader_set_strvalue(This
, StringValue_QualifiedName
, &elem
->qname
);
2661 reader_set_strvalue(This
, StringValue_LocalName
, &elem
->localname
);
2667 static HRESULT WINAPI
xmlreader_GetQualifiedName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
2669 xmlreader
*This
= impl_from_IXmlReader(iface
);
2671 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
2672 *name
= This
->strvalues
[StringValue_QualifiedName
].str
;
2673 *len
= This
->strvalues
[StringValue_QualifiedName
].len
;
2677 static HRESULT WINAPI
xmlreader_GetNamespaceUri(IXmlReader
* iface
,
2678 LPCWSTR
*namespaceUri
,
2679 UINT
*namespaceUri_length
)
2681 FIXME("(%p %p %p): stub\n", iface
, namespaceUri
, namespaceUri_length
);
2685 static HRESULT WINAPI
xmlreader_GetLocalName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
2687 xmlreader
*This
= impl_from_IXmlReader(iface
);
2689 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
2690 *name
= This
->strvalues
[StringValue_LocalName
].str
;
2691 if (len
) *len
= This
->strvalues
[StringValue_LocalName
].len
;
2695 static HRESULT WINAPI
xmlreader_GetPrefix(IXmlReader
* iface
, LPCWSTR
*prefix
, UINT
*len
)
2697 xmlreader
*This
= impl_from_IXmlReader(iface
);
2699 TRACE("(%p)->(%p %p)\n", This
, prefix
, len
);
2700 *prefix
= This
->strvalues
[StringValue_Prefix
].str
;
2701 if (len
) *len
= This
->strvalues
[StringValue_Prefix
].len
;
2705 static HRESULT WINAPI
xmlreader_GetValue(IXmlReader
* iface
, const WCHAR
**value
, UINT
*len
)
2707 xmlreader
*reader
= impl_from_IXmlReader(iface
);
2708 strval
*val
= &reader
->strvalues
[StringValue_Value
];
2710 TRACE("(%p)->(%p %p)\n", reader
, value
, len
);
2714 if ((reader
->nodetype
== XmlNodeType_Comment
&& !val
->str
) || is_reader_pending(reader
))
2719 hr
= IXmlReader_Read(iface
, &type
);
2720 if (FAILED(hr
)) return hr
;
2722 /* return if still pending, partially read values are not reported */
2723 if (is_reader_pending(reader
)) return E_PENDING
;
2728 WCHAR
*ptr
= reader_alloc(reader
, (val
->len
+1)*sizeof(WCHAR
));
2729 if (!ptr
) return E_OUTOFMEMORY
;
2730 memcpy(ptr
, reader_get_strptr(reader
, val
), val
->len
*sizeof(WCHAR
));
2736 if (len
) *len
= val
->len
;
2740 static HRESULT WINAPI
xmlreader_ReadValueChunk(IXmlReader
* iface
, WCHAR
*buffer
, UINT chunk_size
, UINT
*read
)
2742 xmlreader
*reader
= impl_from_IXmlReader(iface
);
2743 strval
*val
= &reader
->strvalues
[StringValue_Value
];
2746 TRACE("(%p)->(%p %u %p)\n", reader
, buffer
, chunk_size
, read
);
2748 /* Value is already allocated, chunked reads are not possible. */
2749 if (val
->str
) return S_FALSE
;
2753 len
= min(chunk_size
, val
->len
);
2754 memcpy(buffer
, reader_get_ptr2(reader
, val
->start
), len
);
2757 if (read
) *read
= len
;
2763 static HRESULT WINAPI
xmlreader_GetBaseUri(IXmlReader
* iface
,
2765 UINT
*baseUri_length
)
2767 FIXME("(%p %p %p): stub\n", iface
, baseUri
, baseUri_length
);
2771 static BOOL WINAPI
xmlreader_IsDefault(IXmlReader
* iface
)
2773 FIXME("(%p): stub\n", iface
);
2777 static BOOL WINAPI
xmlreader_IsEmptyElement(IXmlReader
* iface
)
2779 xmlreader
*This
= impl_from_IXmlReader(iface
);
2780 TRACE("(%p)\n", This
);
2781 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
2782 when current node is start tag of an element */
2783 return (reader_get_nodetype(This
) == XmlNodeType_Element
) ? This
->empty_element
: FALSE
;
2786 static HRESULT WINAPI
xmlreader_GetLineNumber(IXmlReader
* iface
, UINT
*lineNumber
)
2788 xmlreader
*This
= impl_from_IXmlReader(iface
);
2790 TRACE("(%p %p)\n", This
, lineNumber
);
2792 if (!lineNumber
) return E_INVALIDARG
;
2794 *lineNumber
= This
->line
;
2799 static HRESULT WINAPI
xmlreader_GetLinePosition(IXmlReader
* iface
, UINT
*linePosition
)
2801 xmlreader
*This
= impl_from_IXmlReader(iface
);
2803 TRACE("(%p %p)\n", This
, linePosition
);
2805 if (!linePosition
) return E_INVALIDARG
;
2807 *linePosition
= This
->pos
;
2812 static HRESULT WINAPI
xmlreader_GetAttributeCount(IXmlReader
* iface
, UINT
*count
)
2814 xmlreader
*This
= impl_from_IXmlReader(iface
);
2816 TRACE("(%p)->(%p)\n", This
, count
);
2818 if (!count
) return E_INVALIDARG
;
2820 *count
= This
->attr_count
;
2824 static HRESULT WINAPI
xmlreader_GetDepth(IXmlReader
* iface
, UINT
*depth
)
2826 xmlreader
*This
= impl_from_IXmlReader(iface
);
2827 TRACE("(%p)->(%p)\n", This
, depth
);
2828 *depth
= This
->depth
;
2832 static BOOL WINAPI
xmlreader_IsEOF(IXmlReader
* iface
)
2834 FIXME("(%p): stub\n", iface
);
2838 static const struct IXmlReaderVtbl xmlreader_vtbl
=
2840 xmlreader_QueryInterface
,
2844 xmlreader_GetProperty
,
2845 xmlreader_SetProperty
,
2847 xmlreader_GetNodeType
,
2848 xmlreader_MoveToFirstAttribute
,
2849 xmlreader_MoveToNextAttribute
,
2850 xmlreader_MoveToAttributeByName
,
2851 xmlreader_MoveToElement
,
2852 xmlreader_GetQualifiedName
,
2853 xmlreader_GetNamespaceUri
,
2854 xmlreader_GetLocalName
,
2855 xmlreader_GetPrefix
,
2857 xmlreader_ReadValueChunk
,
2858 xmlreader_GetBaseUri
,
2859 xmlreader_IsDefault
,
2860 xmlreader_IsEmptyElement
,
2861 xmlreader_GetLineNumber
,
2862 xmlreader_GetLinePosition
,
2863 xmlreader_GetAttributeCount
,
2868 /** IXmlReaderInput **/
2869 static HRESULT WINAPI
xmlreaderinput_QueryInterface(IXmlReaderInput
*iface
, REFIID riid
, void** ppvObject
)
2871 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
2873 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
2875 if (IsEqualGUID(riid
, &IID_IXmlReaderInput
) ||
2876 IsEqualGUID(riid
, &IID_IUnknown
))
2882 WARN("interface %s not implemented\n", debugstr_guid(riid
));
2884 return E_NOINTERFACE
;
2887 IUnknown_AddRef(iface
);
2892 static ULONG WINAPI
xmlreaderinput_AddRef(IXmlReaderInput
*iface
)
2894 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
2895 ULONG ref
= InterlockedIncrement(&This
->ref
);
2896 TRACE("(%p)->(%d)\n", This
, ref
);
2900 static ULONG WINAPI
xmlreaderinput_Release(IXmlReaderInput
*iface
)
2902 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
2903 LONG ref
= InterlockedDecrement(&This
->ref
);
2905 TRACE("(%p)->(%d)\n", This
, ref
);
2909 IMalloc
*imalloc
= This
->imalloc
;
2910 if (This
->input
) IUnknown_Release(This
->input
);
2911 if (This
->stream
) ISequentialStream_Release(This
->stream
);
2912 if (This
->buffer
) free_input_buffer(This
->buffer
);
2913 readerinput_free(This
, This
->baseuri
);
2914 readerinput_free(This
, This
);
2915 if (imalloc
) IMalloc_Release(imalloc
);
2921 static const struct IUnknownVtbl xmlreaderinputvtbl
=
2923 xmlreaderinput_QueryInterface
,
2924 xmlreaderinput_AddRef
,
2925 xmlreaderinput_Release
2928 HRESULT WINAPI
CreateXmlReader(REFIID riid
, void **obj
, IMalloc
*imalloc
)
2933 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid
), obj
, imalloc
);
2935 if (!IsEqualGUID(riid
, &IID_IXmlReader
))
2937 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid
));
2942 reader
= IMalloc_Alloc(imalloc
, sizeof(*reader
));
2944 reader
= heap_alloc(sizeof(*reader
));
2945 if(!reader
) return E_OUTOFMEMORY
;
2947 reader
->IXmlReader_iface
.lpVtbl
= &xmlreader_vtbl
;
2949 reader
->input
= NULL
;
2950 reader
->state
= XmlReadState_Closed
;
2951 reader
->instate
= XmlReadInState_Initial
;
2952 reader
->resumestate
= XmlReadResumeState_Initial
;
2953 reader
->dtdmode
= DtdProcessing_Prohibit
;
2954 reader
->line
= reader
->pos
= 0;
2955 reader
->imalloc
= imalloc
;
2956 if (imalloc
) IMalloc_AddRef(imalloc
);
2957 reader
->nodetype
= XmlNodeType_None
;
2958 list_init(&reader
->attrs
);
2959 reader
->attr_count
= 0;
2960 reader
->attr
= NULL
;
2961 list_init(&reader
->elements
);
2963 reader
->max_depth
= 256;
2964 reader
->empty_element
= FALSE
;
2965 memset(reader
->resume
, 0, sizeof(reader
->resume
));
2967 for (i
= 0; i
< StringValue_Last
; i
++)
2968 reader
->strvalues
[i
] = strval_empty
;
2970 *obj
= &reader
->IXmlReader_iface
;
2972 TRACE("returning iface %p\n", *obj
);
2977 HRESULT WINAPI
CreateXmlReaderInputWithEncodingName(IUnknown
*stream
,
2982 IXmlReaderInput
**ppInput
)
2984 xmlreaderinput
*readerinput
;
2987 TRACE("%p %p %s %d %s %p\n", stream
, imalloc
, wine_dbgstr_w(encoding
),
2988 hint
, wine_dbgstr_w(base_uri
), ppInput
);
2990 if (!stream
|| !ppInput
) return E_INVALIDARG
;
2993 readerinput
= IMalloc_Alloc(imalloc
, sizeof(*readerinput
));
2995 readerinput
= heap_alloc(sizeof(*readerinput
));
2996 if(!readerinput
) return E_OUTOFMEMORY
;
2998 readerinput
->IXmlReaderInput_iface
.lpVtbl
= &xmlreaderinputvtbl
;
2999 readerinput
->ref
= 1;
3000 readerinput
->imalloc
= imalloc
;
3001 readerinput
->stream
= NULL
;
3002 if (imalloc
) IMalloc_AddRef(imalloc
);
3003 readerinput
->encoding
= parse_encoding_name(encoding
, -1);
3004 readerinput
->hint
= hint
;
3005 readerinput
->baseuri
= readerinput_strdupW(readerinput
, base_uri
);
3006 readerinput
->pending
= 0;
3008 hr
= alloc_input_buffer(readerinput
);
3011 readerinput_free(readerinput
, readerinput
->baseuri
);
3012 readerinput_free(readerinput
, readerinput
);
3013 if (imalloc
) IMalloc_Release(imalloc
);
3016 IUnknown_QueryInterface(stream
, &IID_IUnknown
, (void**)&readerinput
->input
);
3018 *ppInput
= &readerinput
->IXmlReaderInput_iface
;
3020 TRACE("returning iface %p\n", *ppInput
);