2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 #include "xmllite_private.h"
25 #include <wine/list.h>
26 #include <wine/unicode.h>
28 /* not defined in public headers */
29 DEFINE_GUID(IID_IXmlReaderInput
, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
33 XmlReadInState_Initial
,
34 XmlReadInState_XmlDecl
,
35 XmlReadInState_Misc_DTD
,
37 XmlReadInState_DTD_Misc
,
38 XmlReadInState_Element
,
39 XmlReadInState_Content
,
40 XmlReadInState_MiscEnd
, /* optional Misc at the end of a document */
42 } XmlReaderInternalState
;
44 /* This state denotes where parsing was interrupted by input problem.
45 Reader resumes parsing using this information. */
48 XmlReadResumeState_Initial
,
49 XmlReadResumeState_PITarget
,
50 XmlReadResumeState_PIBody
,
51 XmlReadResumeState_CDATA
,
52 XmlReadResumeState_Comment
,
53 XmlReadResumeState_STag
,
54 XmlReadResumeState_CharData
,
55 XmlReadResumeState_Whitespace
56 } XmlReaderResumeState
;
58 /* saved pointer index to resume from particular input position */
61 XmlReadResume_Name
, /* PITarget, name for NCName, prefix for QName */
62 XmlReadResume_Local
, /* local for QName */
63 XmlReadResume_Body
, /* PI body, comment text, CDATA text, CharData text */
69 StringValue_LocalName
,
71 StringValue_QualifiedName
,
74 } XmlReaderStringValue
;
76 static const WCHAR utf16W
[] = {'U','T','F','-','1','6',0};
77 static const WCHAR utf8W
[] = {'U','T','F','-','8',0};
79 static const WCHAR dblquoteW
[] = {'\"',0};
80 static const WCHAR quoteW
[] = {'\'',0};
81 static const WCHAR ltW
[] = {'<',0};
82 static const WCHAR gtW
[] = {'>',0};
83 static const WCHAR commentW
[] = {'<','!','-','-',0};
84 static const WCHAR piW
[] = {'<','?',0};
86 static const char *debugstr_nodetype(XmlNodeType nodetype
)
88 static const char * const type_names
[] =
97 "ProcessingInstruction",
110 if (nodetype
> _XmlNodeType_Last
)
111 return wine_dbg_sprintf("unknown type=%d", nodetype
);
113 return type_names
[nodetype
];
116 static const char *debugstr_reader_prop(XmlReaderProperty prop
)
118 static const char * const prop_names
[] =
130 if (prop
> _XmlReaderProperty_Last
)
131 return wine_dbg_sprintf("unknown property=%d", prop
);
133 return prop_names
[prop
];
136 struct xml_encoding_data
143 static const struct xml_encoding_data xml_encoding_map
[] = {
144 { utf16W
, XmlEncoding_UTF16
, ~0 },
145 { utf8W
, XmlEncoding_UTF8
, CP_UTF8
}
148 const WCHAR
*get_encoding_name(xml_encoding encoding
)
150 return xml_encoding_map
[encoding
].name
;
157 unsigned int allocated
;
158 unsigned int written
;
161 typedef struct input_buffer input_buffer
;
165 IXmlReaderInput IXmlReaderInput_iface
;
167 /* reference passed on IXmlReaderInput creation, is kept when input is created */
170 xml_encoding encoding
;
173 /* stream reference set after SetInput() call from reader,
174 stored as sequential stream, cause currently
175 optimizations possible with IStream aren't implemented */
176 ISequentialStream
*stream
;
177 input_buffer
*buffer
;
178 unsigned int pending
: 1;
181 static const struct IUnknownVtbl xmlreaderinputvtbl
;
183 /* Structure to hold parsed string of specific length.
185 Reader stores node value as 'start' pointer, on request
186 a null-terminated version of it is allocated.
188 To init a strval variable use reader_init_strval(),
189 to set strval as a reader value use reader_set_strval().
193 WCHAR
*str
; /* allocated null-terminated string */
194 UINT len
; /* length in WCHARs, altered after ReadValueChunk */
195 UINT start
; /* input position where value starts */
198 static WCHAR emptyW
[] = {0};
199 static const strval strval_empty
= { emptyW
};
217 IXmlReader IXmlReader_iface
;
219 xmlreaderinput
*input
;
222 XmlReaderInternalState instate
;
223 XmlReaderResumeState resumestate
;
224 XmlNodeType nodetype
;
225 DtdProcessing dtdmode
;
226 UINT line
, pos
; /* reader position in XML stream */
227 struct list attrs
; /* attributes list for current node */
228 struct attribute
*attr
; /* current attribute */
230 struct list elements
;
231 strval strvalues
[StringValue_Last
];
235 UINT resume
[XmlReadResume_Last
]; /* offsets used to resume reader */
240 encoded_buffer utf16
;
241 encoded_buffer encoded
;
243 xmlreaderinput
*input
;
246 static inline xmlreader
*impl_from_IXmlReader(IXmlReader
*iface
)
248 return CONTAINING_RECORD(iface
, xmlreader
, IXmlReader_iface
);
251 static inline xmlreaderinput
*impl_from_IXmlReaderInput(IXmlReaderInput
*iface
)
253 return CONTAINING_RECORD(iface
, xmlreaderinput
, IXmlReaderInput_iface
);
256 /* reader memory allocation functions */
257 static inline void *reader_alloc(xmlreader
*reader
, size_t len
)
259 return m_alloc(reader
->imalloc
, len
);
262 static inline void reader_free(xmlreader
*reader
, void *mem
)
264 m_free(reader
->imalloc
, mem
);
267 /* Just return pointer from offset, no attempt to read more. */
268 static inline WCHAR
*reader_get_ptr2(const xmlreader
*reader
, UINT offset
)
270 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
271 return (WCHAR
*)buffer
->data
+ offset
;
274 static inline WCHAR
*reader_get_strptr(const xmlreader
*reader
, const strval
*v
)
276 return v
->str
? v
->str
: reader_get_ptr2(reader
, v
->start
);
279 static HRESULT
reader_strvaldup(xmlreader
*reader
, const strval
*src
, strval
*dest
)
283 if (src
->str
!= strval_empty
.str
)
285 dest
->str
= reader_alloc(reader
, (dest
->len
+1)*sizeof(WCHAR
));
286 if (!dest
->str
) return E_OUTOFMEMORY
;
287 memcpy(dest
->str
, reader_get_strptr(reader
, src
), dest
->len
*sizeof(WCHAR
));
288 dest
->str
[dest
->len
] = 0;
295 /* reader input memory allocation functions */
296 static inline void *readerinput_alloc(xmlreaderinput
*input
, size_t len
)
298 return m_alloc(input
->imalloc
, len
);
301 static inline void *readerinput_realloc(xmlreaderinput
*input
, void *mem
, size_t len
)
303 return m_realloc(input
->imalloc
, mem
, len
);
306 static inline void readerinput_free(xmlreaderinput
*input
, void *mem
)
308 m_free(input
->imalloc
, mem
);
311 static inline WCHAR
*readerinput_strdupW(xmlreaderinput
*input
, const WCHAR
*str
)
318 size
= (strlenW(str
)+1)*sizeof(WCHAR
);
319 ret
= readerinput_alloc(input
, size
);
320 if (ret
) memcpy(ret
, str
, size
);
326 static void reader_clear_attrs(xmlreader
*reader
)
328 struct attribute
*attr
, *attr2
;
329 LIST_FOR_EACH_ENTRY_SAFE(attr
, attr2
, &reader
->attrs
, struct attribute
, entry
)
331 reader_free(reader
, attr
);
333 list_init(&reader
->attrs
);
334 reader
->attr_count
= 0;
338 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
339 while we are on a node with attributes */
340 static HRESULT
reader_add_attr(xmlreader
*reader
, strval
*localname
, strval
*value
)
342 struct attribute
*attr
;
344 attr
= reader_alloc(reader
, sizeof(*attr
));
345 if (!attr
) return E_OUTOFMEMORY
;
347 attr
->localname
= *localname
;
348 attr
->value
= *value
;
349 list_add_tail(&reader
->attrs
, &attr
->entry
);
350 reader
->attr_count
++;
355 /* This one frees stored string value if needed */
356 static void reader_free_strvalued(xmlreader
*reader
, strval
*v
)
358 if (v
->str
!= strval_empty
.str
)
360 reader_free(reader
, v
->str
);
365 static inline void reader_init_strvalue(UINT start
, UINT len
, strval
*v
)
372 static inline const char* debug_strval(const xmlreader
*reader
, const strval
*v
)
374 return debugstr_wn(reader_get_strptr(reader
, v
), v
->len
);
377 /* used to initialize from constant string */
378 static inline void reader_init_cstrvalue(WCHAR
*str
, UINT len
, strval
*v
)
385 static void reader_free_strvalue(xmlreader
*reader
, XmlReaderStringValue type
)
387 reader_free_strvalued(reader
, &reader
->strvalues
[type
]);
390 static void reader_free_strvalues(xmlreader
*reader
)
393 for (type
= 0; type
< StringValue_Last
; type
++)
394 reader_free_strvalue(reader
, type
);
397 /* This helper should only be used to test if strings are the same,
398 it doesn't try to sort. */
399 static inline int strval_eq(const xmlreader
*reader
, const strval
*str1
, const strval
*str2
)
401 if (str1
->len
!= str2
->len
) return 0;
402 return !memcmp(reader_get_strptr(reader
, str1
), reader_get_strptr(reader
, str2
), str1
->len
*sizeof(WCHAR
));
405 static void reader_clear_elements(xmlreader
*reader
)
407 struct element
*elem
, *elem2
;
408 LIST_FOR_EACH_ENTRY_SAFE(elem
, elem2
, &reader
->elements
, struct element
, entry
)
410 reader_free_strvalued(reader
, &elem
->qname
);
411 reader_free(reader
, elem
);
413 list_init(&reader
->elements
);
414 reader
->empty_element
= FALSE
;
417 static HRESULT
reader_inc_depth(xmlreader
*reader
)
419 if (++reader
->depth
> reader
->max_depth
) return SC_E_MAXELEMENTDEPTH
;
423 static void reader_dec_depth(xmlreader
*reader
)
425 if (reader
->depth
> 1) reader
->depth
--;
428 static HRESULT
reader_push_element(xmlreader
*reader
, strval
*qname
, strval
*localname
)
430 struct element
*elem
;
433 elem
= reader_alloc(reader
, sizeof(*elem
));
434 if (!elem
) return E_OUTOFMEMORY
;
436 hr
= reader_strvaldup(reader
, qname
, &elem
->qname
);
438 reader_free(reader
, elem
);
442 hr
= reader_strvaldup(reader
, localname
, &elem
->localname
);
445 reader_free_strvalued(reader
, &elem
->qname
);
446 reader_free(reader
, elem
);
450 if (!list_empty(&reader
->elements
))
452 hr
= reader_inc_depth(reader
);
454 reader_free(reader
, elem
);
459 list_add_head(&reader
->elements
, &elem
->entry
);
460 reader
->empty_element
= FALSE
;
464 static void reader_pop_element(xmlreader
*reader
)
466 struct element
*elem
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
470 list_remove(&elem
->entry
);
471 reader_free_strvalued(reader
, &elem
->qname
);
472 reader_free_strvalued(reader
, &elem
->localname
);
473 reader_free(reader
, elem
);
474 reader_dec_depth(reader
);
478 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
479 means node value is to be determined. */
480 static void reader_set_strvalue(xmlreader
*reader
, XmlReaderStringValue type
, const strval
*value
)
482 strval
*v
= &reader
->strvalues
[type
];
484 reader_free_strvalue(reader
, type
);
493 if (value
->str
== strval_empty
.str
)
497 if (type
== StringValue_Value
)
499 /* defer allocation for value string */
501 v
->start
= value
->start
;
506 v
->str
= reader_alloc(reader
, (value
->len
+ 1)*sizeof(WCHAR
));
507 memcpy(v
->str
, reader_get_strptr(reader
, value
), value
->len
*sizeof(WCHAR
));
508 v
->str
[value
->len
] = 0;
514 static inline int is_reader_pending(xmlreader
*reader
)
516 return reader
->input
->pending
;
519 static HRESULT
init_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
521 const int initial_len
= 0x2000;
522 buffer
->data
= readerinput_alloc(input
, initial_len
);
523 if (!buffer
->data
) return E_OUTOFMEMORY
;
525 memset(buffer
->data
, 0, 4);
527 buffer
->allocated
= initial_len
;
533 static void free_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
535 readerinput_free(input
, buffer
->data
);
538 HRESULT
get_code_page(xml_encoding encoding
, UINT
*cp
)
540 if (encoding
== XmlEncoding_Unknown
)
542 FIXME("unsupported encoding %d\n", encoding
);
546 *cp
= xml_encoding_map
[encoding
].cp
;
551 xml_encoding
parse_encoding_name(const WCHAR
*name
, int len
)
555 if (!name
) return XmlEncoding_Unknown
;
558 max
= sizeof(xml_encoding_map
)/sizeof(struct xml_encoding_data
) - 1;
565 c
= strncmpiW(xml_encoding_map
[n
].name
, name
, len
);
567 c
= strcmpiW(xml_encoding_map
[n
].name
, name
);
569 return xml_encoding_map
[n
].enc
;
577 return XmlEncoding_Unknown
;
580 static HRESULT
alloc_input_buffer(xmlreaderinput
*input
)
582 input_buffer
*buffer
;
585 input
->buffer
= NULL
;
587 buffer
= readerinput_alloc(input
, sizeof(*buffer
));
588 if (!buffer
) return E_OUTOFMEMORY
;
590 buffer
->input
= input
;
591 buffer
->code_page
= ~0; /* code page is unknown at this point */
592 hr
= init_encoded_buffer(input
, &buffer
->utf16
);
594 readerinput_free(input
, buffer
);
598 hr
= init_encoded_buffer(input
, &buffer
->encoded
);
600 free_encoded_buffer(input
, &buffer
->utf16
);
601 readerinput_free(input
, buffer
);
605 input
->buffer
= buffer
;
609 static void free_input_buffer(input_buffer
*buffer
)
611 free_encoded_buffer(buffer
->input
, &buffer
->encoded
);
612 free_encoded_buffer(buffer
->input
, &buffer
->utf16
);
613 readerinput_free(buffer
->input
, buffer
);
616 static void readerinput_release_stream(xmlreaderinput
*readerinput
)
618 if (readerinput
->stream
) {
619 ISequentialStream_Release(readerinput
->stream
);
620 readerinput
->stream
= NULL
;
624 /* Queries already stored interface for IStream/ISequentialStream.
625 Interface supplied on creation will be overwritten */
626 static inline HRESULT
readerinput_query_for_stream(xmlreaderinput
*readerinput
)
630 readerinput_release_stream(readerinput
);
631 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_IStream
, (void**)&readerinput
->stream
);
633 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_ISequentialStream
, (void**)&readerinput
->stream
);
638 /* reads a chunk to raw buffer */
639 static HRESULT
readerinput_growraw(xmlreaderinput
*readerinput
)
641 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
642 /* to make sure aligned length won't exceed allocated length */
643 ULONG len
= buffer
->allocated
- buffer
->written
- 4;
647 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
648 variable width encodings like UTF-8 */
649 len
= (len
+ 3) & ~3;
650 /* try to use allocated space or grow */
651 if (buffer
->allocated
- buffer
->written
< len
)
653 buffer
->allocated
*= 2;
654 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, buffer
->allocated
);
655 len
= buffer
->allocated
- buffer
->written
;
659 hr
= ISequentialStream_Read(readerinput
->stream
, buffer
->data
+ buffer
->written
, len
, &read
);
660 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer
->written
, buffer
->allocated
, len
, read
, hr
);
661 readerinput
->pending
= hr
== E_PENDING
;
662 if (FAILED(hr
)) return hr
;
663 buffer
->written
+= read
;
668 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
669 static void readerinput_grow(xmlreaderinput
*readerinput
, int length
)
671 encoded_buffer
*buffer
= &readerinput
->buffer
->utf16
;
673 length
*= sizeof(WCHAR
);
674 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
675 if (buffer
->allocated
< buffer
->written
+ length
+ 4)
677 int grown_size
= max(2*buffer
->allocated
, buffer
->allocated
+ length
);
678 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, grown_size
);
679 buffer
->allocated
= grown_size
;
683 static inline BOOL
readerinput_is_utf8(xmlreaderinput
*readerinput
)
685 static const char startA
[] = {'<','?'};
686 static const char commentA
[] = {'<','!'};
687 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
688 unsigned char *ptr
= (unsigned char*)buffer
->data
;
690 return !memcmp(buffer
->data
, startA
, sizeof(startA
)) ||
691 !memcmp(buffer
->data
, commentA
, sizeof(commentA
)) ||
692 /* test start byte */
695 (ptr
[1] && (ptr
[1] <= 0x7f)) ||
696 (buffer
->data
[1] >> 5) == 0x6 || /* 2 bytes */
697 (buffer
->data
[1] >> 4) == 0xe || /* 3 bytes */
698 (buffer
->data
[1] >> 3) == 0x1e) /* 4 bytes */
702 static HRESULT
readerinput_detectencoding(xmlreaderinput
*readerinput
, xml_encoding
*enc
)
704 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
705 static const WCHAR startW
[] = {'<','?'};
706 static const WCHAR commentW
[] = {'<','!'};
707 static const char utf8bom
[] = {0xef,0xbb,0xbf};
708 static const char utf16lebom
[] = {0xff,0xfe};
710 *enc
= XmlEncoding_Unknown
;
712 if (buffer
->written
<= 3)
714 HRESULT hr
= readerinput_growraw(readerinput
);
715 if (FAILED(hr
)) return hr
;
716 if (buffer
->written
<= 3) return MX_E_INPUTEND
;
719 /* try start symbols if we have enough data to do that, input buffer should contain
720 first chunk already */
721 if (readerinput_is_utf8(readerinput
))
722 *enc
= XmlEncoding_UTF8
;
723 else if (!memcmp(buffer
->data
, startW
, sizeof(startW
)) ||
724 !memcmp(buffer
->data
, commentW
, sizeof(commentW
)))
725 *enc
= XmlEncoding_UTF16
;
726 /* try with BOM now */
727 else if (!memcmp(buffer
->data
, utf8bom
, sizeof(utf8bom
)))
729 buffer
->cur
+= sizeof(utf8bom
);
730 *enc
= XmlEncoding_UTF8
;
732 else if (!memcmp(buffer
->data
, utf16lebom
, sizeof(utf16lebom
)))
734 buffer
->cur
+= sizeof(utf16lebom
);
735 *enc
= XmlEncoding_UTF16
;
741 static int readerinput_get_utf8_convlen(xmlreaderinput
*readerinput
)
743 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
744 int len
= buffer
->written
;
746 /* complete single byte char */
747 if (!(buffer
->data
[len
-1] & 0x80)) return len
;
749 /* find start byte of multibyte char */
750 while (--len
&& !(buffer
->data
[len
] & 0xc0))
756 /* Returns byte length of complete char sequence for buffer code page,
757 it's relative to current buffer position which is currently used for BOM handling
759 static int readerinput_get_convlen(xmlreaderinput
*readerinput
)
761 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
764 if (readerinput
->buffer
->code_page
== CP_UTF8
)
765 len
= readerinput_get_utf8_convlen(readerinput
);
767 len
= buffer
->written
;
769 TRACE("%d\n", len
- buffer
->cur
);
770 return len
- buffer
->cur
;
773 /* It's possible that raw buffer has some leftovers from last conversion - some char
774 sequence that doesn't represent a full code point. Length argument should be calculated with
775 readerinput_get_convlen(), if it's -1 it will be calculated here. */
776 static void readerinput_shrinkraw(xmlreaderinput
*readerinput
, int len
)
778 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
781 len
= readerinput_get_convlen(readerinput
);
783 memmove(buffer
->data
, buffer
->data
+ buffer
->cur
+ (buffer
->written
- len
), len
);
784 /* everything below cur is lost too */
785 buffer
->written
-= len
+ buffer
->cur
;
786 /* after this point we don't need cur offset really,
787 it's used only to mark where actual data begins when first chunk is read */
791 /* note that raw buffer content is kept */
792 static void readerinput_switchencoding(xmlreaderinput
*readerinput
, xml_encoding enc
)
794 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
795 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
801 hr
= get_code_page(enc
, &cp
);
802 if (FAILED(hr
)) return;
804 readerinput
->buffer
->code_page
= cp
;
805 len
= readerinput_get_convlen(readerinput
);
807 TRACE("switching to cp %d\n", cp
);
809 /* just copy in this case */
810 if (enc
== XmlEncoding_UTF16
)
812 readerinput_grow(readerinput
, len
);
813 memcpy(dest
->data
, src
->data
+ src
->cur
, len
);
814 dest
->written
+= len
*sizeof(WCHAR
);
818 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
819 readerinput_grow(readerinput
, dest_len
);
820 ptr
= (WCHAR
*)dest
->data
;
821 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
823 dest
->written
+= dest_len
*sizeof(WCHAR
);
826 /* shrinks parsed data a buffer begins with */
827 static void reader_shrink(xmlreader
*reader
)
829 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
831 /* avoid to move too often using threshold shrink length */
832 if (buffer
->cur
*sizeof(WCHAR
) > buffer
->written
/ 2)
834 buffer
->written
-= buffer
->cur
*sizeof(WCHAR
);
835 memmove(buffer
->data
, (WCHAR
*)buffer
->data
+ buffer
->cur
, buffer
->written
);
837 *(WCHAR
*)&buffer
->data
[buffer
->written
] = 0;
841 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
842 It won't attempt to shrink but will grow destination buffer if needed */
843 static HRESULT
reader_more(xmlreader
*reader
)
845 xmlreaderinput
*readerinput
= reader
->input
;
846 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
847 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
848 UINT cp
= readerinput
->buffer
->code_page
;
853 /* get some raw data from stream first */
854 hr
= readerinput_growraw(readerinput
);
855 len
= readerinput_get_convlen(readerinput
);
857 /* just copy for UTF-16 case */
860 readerinput_grow(readerinput
, len
);
861 memcpy(dest
->data
+ dest
->written
, src
->data
+ src
->cur
, len
);
862 dest
->written
+= len
*sizeof(WCHAR
);
866 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
867 readerinput_grow(readerinput
, dest_len
);
868 ptr
= (WCHAR
*)(dest
->data
+ dest
->written
);
869 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
871 dest
->written
+= dest_len
*sizeof(WCHAR
);
872 /* get rid of processed data */
873 readerinput_shrinkraw(readerinput
, len
);
878 static inline UINT
reader_get_cur(xmlreader
*reader
)
880 return reader
->input
->buffer
->utf16
.cur
;
883 static inline WCHAR
*reader_get_ptr(xmlreader
*reader
)
885 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
886 WCHAR
*ptr
= (WCHAR
*)buffer
->data
+ buffer
->cur
;
887 if (!*ptr
) reader_more(reader
);
888 return (WCHAR
*)buffer
->data
+ buffer
->cur
;
891 static int reader_cmp(xmlreader
*reader
, const WCHAR
*str
)
893 const WCHAR
*ptr
= reader_get_ptr(reader
);
894 return strncmpW(str
, ptr
, strlenW(str
));
897 /* moves cursor n WCHARs forward */
898 static void reader_skipn(xmlreader
*reader
, int n
)
900 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
901 const WCHAR
*ptr
= reader_get_ptr(reader
);
903 while (*ptr
++ && n
--)
910 static inline BOOL
is_wchar_space(WCHAR ch
)
912 return ch
== ' ' || ch
== '\t' || ch
== '\r' || ch
== '\n';
915 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
916 static int reader_skipspaces(xmlreader
*reader
)
918 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
919 const WCHAR
*ptr
= reader_get_ptr(reader
);
920 UINT start
= reader_get_cur(reader
);
922 while (is_wchar_space(*ptr
))
926 else if (*ptr
== '\n')
935 ptr
= reader_get_ptr(reader
);
938 return reader_get_cur(reader
) - start
;
941 /* [26] VersionNum ::= '1.' [0-9]+ */
942 static HRESULT
reader_parse_versionnum(xmlreader
*reader
, strval
*val
)
944 static const WCHAR onedotW
[] = {'1','.',0};
948 if (reader_cmp(reader
, onedotW
)) return WC_E_XMLDECL
;
950 start
= reader_get_cur(reader
);
952 reader_skipn(reader
, 2);
954 ptr2
= ptr
= reader_get_ptr(reader
);
955 while (*ptr
>= '0' && *ptr
<= '9')
957 reader_skipn(reader
, 1);
958 ptr
= reader_get_ptr(reader
);
961 if (ptr2
== ptr
) return WC_E_DIGIT
;
962 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, val
);
963 TRACE("version=%s\n", debug_strval(reader
, val
));
967 /* [25] Eq ::= S? '=' S? */
968 static HRESULT
reader_parse_eq(xmlreader
*reader
)
970 static const WCHAR eqW
[] = {'=',0};
971 reader_skipspaces(reader
);
972 if (reader_cmp(reader
, eqW
)) return WC_E_EQUAL
;
974 reader_skipn(reader
, 1);
975 reader_skipspaces(reader
);
979 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
980 static HRESULT
reader_parse_versioninfo(xmlreader
*reader
)
982 static const WCHAR versionW
[] = {'v','e','r','s','i','o','n',0};
986 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
988 if (reader_cmp(reader
, versionW
)) return WC_E_XMLDECL
;
989 reader_init_strvalue(reader_get_cur(reader
), 7, &name
);
991 reader_skipn(reader
, 7);
993 hr
= reader_parse_eq(reader
);
994 if (FAILED(hr
)) return hr
;
996 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
999 reader_skipn(reader
, 1);
1001 hr
= reader_parse_versionnum(reader
, &val
);
1002 if (FAILED(hr
)) return hr
;
1004 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1008 reader_skipn(reader
, 1);
1010 return reader_add_attr(reader
, &name
, &val
);
1013 /* ([A-Za-z0-9._] | '-') */
1014 static inline BOOL
is_wchar_encname(WCHAR ch
)
1016 return ((ch
>= 'A' && ch
<= 'Z') ||
1017 (ch
>= 'a' && ch
<= 'z') ||
1018 (ch
>= '0' && ch
<= '9') ||
1019 (ch
== '.') || (ch
== '_') ||
1023 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1024 static HRESULT
reader_parse_encname(xmlreader
*reader
, strval
*val
)
1026 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
1030 if ((*start
< 'A' || *start
> 'Z') && (*start
< 'a' || *start
> 'z'))
1031 return WC_E_ENCNAME
;
1033 val
->start
= reader_get_cur(reader
);
1036 while (is_wchar_encname(*++ptr
))
1040 enc
= parse_encoding_name(start
, len
);
1041 TRACE("encoding name %s\n", debugstr_wn(start
, len
));
1045 if (enc
== XmlEncoding_Unknown
)
1046 return WC_E_ENCNAME
;
1048 /* skip encoding name */
1049 reader_skipn(reader
, len
);
1053 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1054 static HRESULT
reader_parse_encdecl(xmlreader
*reader
)
1056 static const WCHAR encodingW
[] = {'e','n','c','o','d','i','n','g',0};
1060 if (!reader_skipspaces(reader
)) return S_FALSE
;
1062 if (reader_cmp(reader
, encodingW
)) return S_FALSE
;
1063 name
.str
= reader_get_ptr(reader
);
1064 name
.start
= reader_get_cur(reader
);
1066 /* skip 'encoding' */
1067 reader_skipn(reader
, 8);
1069 hr
= reader_parse_eq(reader
);
1070 if (FAILED(hr
)) return hr
;
1072 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1075 reader_skipn(reader
, 1);
1077 hr
= reader_parse_encname(reader
, &val
);
1078 if (FAILED(hr
)) return hr
;
1080 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1084 reader_skipn(reader
, 1);
1086 return reader_add_attr(reader
, &name
, &val
);
1089 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1090 static HRESULT
reader_parse_sddecl(xmlreader
*reader
)
1092 static const WCHAR standaloneW
[] = {'s','t','a','n','d','a','l','o','n','e',0};
1093 static const WCHAR yesW
[] = {'y','e','s',0};
1094 static const WCHAR noW
[] = {'n','o',0};
1099 if (!reader_skipspaces(reader
)) return S_FALSE
;
1101 if (reader_cmp(reader
, standaloneW
)) return S_FALSE
;
1102 reader_init_strvalue(reader_get_cur(reader
), 10, &name
);
1103 /* skip 'standalone' */
1104 reader_skipn(reader
, 10);
1106 hr
= reader_parse_eq(reader
);
1107 if (FAILED(hr
)) return hr
;
1109 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1112 reader_skipn(reader
, 1);
1114 if (reader_cmp(reader
, yesW
) && reader_cmp(reader
, noW
))
1115 return WC_E_XMLDECL
;
1117 start
= reader_get_cur(reader
);
1118 /* skip 'yes'|'no' */
1119 reader_skipn(reader
, reader_cmp(reader
, yesW
) ? 2 : 3);
1120 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &val
);
1121 TRACE("standalone=%s\n", debug_strval(reader
, &val
));
1123 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1126 reader_skipn(reader
, 1);
1128 return reader_add_attr(reader
, &name
, &val
);
1131 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1132 static HRESULT
reader_parse_xmldecl(xmlreader
*reader
)
1134 static const WCHAR xmldeclW
[] = {'<','?','x','m','l',' ',0};
1135 static const WCHAR declcloseW
[] = {'?','>',0};
1138 /* check if we have "<?xml " */
1139 if (reader_cmp(reader
, xmldeclW
)) return S_FALSE
;
1141 reader_skipn(reader
, 5);
1142 hr
= reader_parse_versioninfo(reader
);
1146 hr
= reader_parse_encdecl(reader
);
1150 hr
= reader_parse_sddecl(reader
);
1154 reader_skipspaces(reader
);
1155 if (reader_cmp(reader
, declcloseW
)) return WC_E_XMLDECL
;
1156 reader_skipn(reader
, 2);
1158 reader_inc_depth(reader
);
1159 reader
->nodetype
= XmlNodeType_XmlDeclaration
;
1160 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1161 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1162 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1167 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1168 static HRESULT
reader_parse_comment(xmlreader
*reader
)
1173 if (reader
->resumestate
== XmlReadResumeState_Comment
)
1175 start
= reader
->resume
[XmlReadResume_Body
];
1176 ptr
= reader_get_ptr(reader
);
1181 reader_skipn(reader
, 4);
1182 reader_shrink(reader
);
1183 ptr
= reader_get_ptr(reader
);
1184 start
= reader_get_cur(reader
);
1185 reader
->nodetype
= XmlNodeType_Comment
;
1186 reader
->resume
[XmlReadResume_Body
] = start
;
1187 reader
->resumestate
= XmlReadResumeState_Comment
;
1188 reader_set_strvalue(reader
, StringValue_LocalName
, NULL
);
1189 reader_set_strvalue(reader
, StringValue_QualifiedName
, NULL
);
1190 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
1193 /* will exit when there's no more data, it won't attempt to
1194 read more from stream */
1205 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1206 TRACE("%s\n", debug_strval(reader
, &value
));
1208 /* skip rest of markup '->' */
1209 reader_skipn(reader
, 3);
1211 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1212 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1213 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1214 reader
->resume
[XmlReadResume_Body
] = 0;
1215 reader
->resumestate
= XmlReadResumeState_Initial
;
1219 return WC_E_COMMENT
;
1223 reader_skipn(reader
, 1);
1230 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1231 static inline BOOL
is_char(WCHAR ch
)
1233 return (ch
== '\t') || (ch
== '\r') || (ch
== '\n') ||
1234 (ch
>= 0x20 && ch
<= 0xd7ff) ||
1235 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1236 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1237 (ch
>= 0xe000 && ch
<= 0xfffd);
1240 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1241 static inline BOOL
is_pubchar(WCHAR ch
)
1243 return (ch
== ' ') ||
1244 (ch
>= 'a' && ch
<= 'z') ||
1245 (ch
>= 'A' && ch
<= 'Z') ||
1246 (ch
>= '0' && ch
<= '9') ||
1247 (ch
>= '-' && ch
<= ';') || /* '()*+,-./:; */
1248 (ch
== '=') || (ch
== '?') ||
1249 (ch
== '@') || (ch
== '!') ||
1250 (ch
>= '#' && ch
<= '%') || /* #$% */
1251 (ch
== '_') || (ch
== '\r') || (ch
== '\n');
1254 static inline BOOL
is_namestartchar(WCHAR ch
)
1256 return (ch
== ':') || (ch
>= 'A' && ch
<= 'Z') ||
1257 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1258 (ch
>= 0xc0 && ch
<= 0xd6) ||
1259 (ch
>= 0xd8 && ch
<= 0xf6) ||
1260 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1261 (ch
>= 0x370 && ch
<= 0x37d) ||
1262 (ch
>= 0x37f && ch
<= 0x1fff) ||
1263 (ch
>= 0x200c && ch
<= 0x200d) ||
1264 (ch
>= 0x2070 && ch
<= 0x218f) ||
1265 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1266 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1267 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1268 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1269 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1270 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1273 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1274 static inline BOOL
is_ncnamechar(WCHAR ch
)
1276 return (ch
>= 'A' && ch
<= 'Z') ||
1277 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1278 (ch
== '-') || (ch
== '.') ||
1279 (ch
>= '0' && ch
<= '9') ||
1281 (ch
>= 0xc0 && ch
<= 0xd6) ||
1282 (ch
>= 0xd8 && ch
<= 0xf6) ||
1283 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1284 (ch
>= 0x300 && ch
<= 0x36f) ||
1285 (ch
>= 0x370 && ch
<= 0x37d) ||
1286 (ch
>= 0x37f && ch
<= 0x1fff) ||
1287 (ch
>= 0x200c && ch
<= 0x200d) ||
1288 (ch
>= 0x203f && ch
<= 0x2040) ||
1289 (ch
>= 0x2070 && ch
<= 0x218f) ||
1290 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1291 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1292 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1293 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1294 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1295 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1298 static inline BOOL
is_namechar(WCHAR ch
)
1300 return (ch
== ':') || is_ncnamechar(ch
);
1303 static XmlNodeType
reader_get_nodetype(const xmlreader
*reader
)
1305 /* When we're on attribute always return attribute type, container node type is kept.
1306 Note that container is not necessarily an element, and attribute doesn't mean it's
1307 an attribute in XML spec terms. */
1308 return reader
->attr
? XmlNodeType_Attribute
: reader
->nodetype
;
1311 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1312 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1313 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1314 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1315 [5] Name ::= NameStartChar (NameChar)* */
1316 static HRESULT
reader_parse_name(xmlreader
*reader
, strval
*name
)
1321 if (reader
->resume
[XmlReadResume_Name
])
1323 start
= reader
->resume
[XmlReadResume_Name
];
1324 ptr
= reader_get_ptr(reader
);
1328 ptr
= reader_get_ptr(reader
);
1329 start
= reader_get_cur(reader
);
1330 if (!is_namestartchar(*ptr
)) return WC_E_NAMECHARACTER
;
1333 while (is_namechar(*ptr
))
1335 reader_skipn(reader
, 1);
1336 ptr
= reader_get_ptr(reader
);
1339 if (is_reader_pending(reader
))
1341 reader
->resume
[XmlReadResume_Name
] = start
;
1345 reader
->resume
[XmlReadResume_Name
] = 0;
1347 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, name
);
1348 TRACE("name %s:%d\n", debug_strval(reader
, name
), name
->len
);
1353 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1354 static HRESULT
reader_parse_pitarget(xmlreader
*reader
, strval
*target
)
1356 static const WCHAR xmlW
[] = {'x','m','l'};
1357 static const strval xmlval
= { (WCHAR
*)xmlW
, 3 };
1363 hr
= reader_parse_name(reader
, &name
);
1364 if (FAILED(hr
)) return is_reader_pending(reader
) ? E_PENDING
: WC_E_PI
;
1366 /* now that we got name check for illegal content */
1367 if (strval_eq(reader
, &name
, &xmlval
))
1368 return WC_E_LEADINGXML
;
1370 /* PITarget can't be a qualified name */
1371 ptr
= reader_get_strptr(reader
, &name
);
1372 for (i
= 0; i
< name
.len
; i
++)
1374 return i
? NC_E_NAMECOLON
: WC_E_PI
;
1376 TRACE("pitarget %s:%d\n", debug_strval(reader
, &name
), name
.len
);
1381 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1382 static HRESULT
reader_parse_pi(xmlreader
*reader
)
1389 switch (reader
->resumestate
)
1391 case XmlReadResumeState_Initial
:
1393 reader_skipn(reader
, 2);
1394 reader_shrink(reader
);
1395 reader
->resumestate
= XmlReadResumeState_PITarget
;
1396 case XmlReadResumeState_PITarget
:
1397 hr
= reader_parse_pitarget(reader
, &target
);
1398 if (FAILED(hr
)) return hr
;
1399 reader_set_strvalue(reader
, StringValue_LocalName
, &target
);
1400 reader_set_strvalue(reader
, StringValue_QualifiedName
, &target
);
1401 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1402 reader
->resumestate
= XmlReadResumeState_PIBody
;
1403 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1408 start
= reader
->resume
[XmlReadResume_Body
];
1409 ptr
= reader_get_ptr(reader
);
1416 UINT cur
= reader_get_cur(reader
);
1419 /* strip all leading whitespace chars */
1422 ptr
= reader_get_ptr2(reader
, start
);
1423 if (!is_wchar_space(*ptr
)) break;
1427 reader_init_strvalue(start
, cur
-start
, &value
);
1430 reader_skipn(reader
, 2);
1431 TRACE("%s\n", debug_strval(reader
, &value
));
1432 reader
->nodetype
= XmlNodeType_ProcessingInstruction
;
1433 reader
->resumestate
= XmlReadResumeState_Initial
;
1434 reader
->resume
[XmlReadResume_Body
] = 0;
1435 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1440 reader_skipn(reader
, 1);
1441 ptr
= reader_get_ptr(reader
);
1447 /* This one is used to parse significant whitespace nodes, like in Misc production */
1448 static HRESULT
reader_parse_whitespace(xmlreader
*reader
)
1450 switch (reader
->resumestate
)
1452 case XmlReadResumeState_Initial
:
1453 reader_shrink(reader
);
1454 reader
->resumestate
= XmlReadResumeState_Whitespace
;
1455 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1456 reader
->nodetype
= XmlNodeType_Whitespace
;
1457 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1458 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1459 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1461 case XmlReadResumeState_Whitespace
:
1466 reader_skipspaces(reader
);
1467 if (is_reader_pending(reader
)) return S_OK
;
1469 start
= reader
->resume
[XmlReadResume_Body
];
1470 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1471 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1472 TRACE("%s\n", debug_strval(reader
, &value
));
1473 reader
->resumestate
= XmlReadResumeState_Initial
;
1482 /* [27] Misc ::= Comment | PI | S */
1483 static HRESULT
reader_parse_misc(xmlreader
*reader
)
1485 HRESULT hr
= S_FALSE
;
1487 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
1489 hr
= reader_more(reader
);
1490 if (FAILED(hr
)) return hr
;
1492 /* finish current node */
1493 switch (reader
->resumestate
)
1495 case XmlReadResumeState_PITarget
:
1496 case XmlReadResumeState_PIBody
:
1497 return reader_parse_pi(reader
);
1498 case XmlReadResumeState_Comment
:
1499 return reader_parse_comment(reader
);
1500 case XmlReadResumeState_Whitespace
:
1501 return reader_parse_whitespace(reader
);
1503 ERR("unknown resume state %d\n", reader
->resumestate
);
1509 const WCHAR
*cur
= reader_get_ptr(reader
);
1511 if (is_wchar_space(*cur
))
1512 hr
= reader_parse_whitespace(reader
);
1513 else if (!reader_cmp(reader
, commentW
))
1514 hr
= reader_parse_comment(reader
);
1515 else if (!reader_cmp(reader
, piW
))
1516 hr
= reader_parse_pi(reader
);
1520 if (hr
!= S_FALSE
) return hr
;
1526 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1527 static HRESULT
reader_parse_sys_literal(xmlreader
*reader
, strval
*literal
)
1529 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1532 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1535 reader_skipn(reader
, 1);
1537 cur
= reader_get_ptr(reader
);
1538 start
= reader_get_cur(reader
);
1539 while (is_char(*cur
) && *cur
!= quote
)
1541 reader_skipn(reader
, 1);
1542 cur
= reader_get_ptr(reader
);
1544 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1545 if (*cur
== quote
) reader_skipn(reader
, 1);
1547 TRACE("%s\n", debug_strval(reader
, literal
));
1551 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1552 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1553 static HRESULT
reader_parse_pub_literal(xmlreader
*reader
, strval
*literal
)
1555 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1558 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1561 reader_skipn(reader
, 1);
1563 start
= reader_get_cur(reader
);
1564 cur
= reader_get_ptr(reader
);
1565 while (is_pubchar(*cur
) && *cur
!= quote
)
1567 reader_skipn(reader
, 1);
1568 cur
= reader_get_ptr(reader
);
1571 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1572 TRACE("%s\n", debug_strval(reader
, literal
));
1576 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1577 static HRESULT
reader_parse_externalid(xmlreader
*reader
)
1579 static WCHAR systemW
[] = {'S','Y','S','T','E','M',0};
1580 static WCHAR publicW
[] = {'P','U','B','L','I','C',0};
1585 if (reader_cmp(reader
, systemW
))
1587 if (reader_cmp(reader
, publicW
))
1594 reader_skipn(reader
, 6);
1595 cnt
= reader_skipspaces(reader
);
1596 if (!cnt
) return WC_E_WHITESPACE
;
1598 hr
= reader_parse_pub_literal(reader
, &pub
);
1599 if (FAILED(hr
)) return hr
;
1601 reader_init_cstrvalue(publicW
, strlenW(publicW
), &name
);
1602 return reader_add_attr(reader
, &name
, &pub
);
1610 reader_skipn(reader
, 6);
1611 cnt
= reader_skipspaces(reader
);
1612 if (!cnt
) return WC_E_WHITESPACE
;
1614 hr
= reader_parse_sys_literal(reader
, &sys
);
1615 if (FAILED(hr
)) return hr
;
1617 reader_init_cstrvalue(systemW
, strlenW(systemW
), &name
);
1618 return reader_add_attr(reader
, &name
, &sys
);
1624 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1625 static HRESULT
reader_parse_dtd(xmlreader
*reader
)
1627 static const WCHAR doctypeW
[] = {'<','!','D','O','C','T','Y','P','E',0};
1632 /* check if we have "<!DOCTYPE" */
1633 if (reader_cmp(reader
, doctypeW
)) return S_FALSE
;
1634 reader_shrink(reader
);
1636 /* DTD processing is not allowed by default */
1637 if (reader
->dtdmode
== DtdProcessing_Prohibit
) return WC_E_DTDPROHIBITED
;
1639 reader_skipn(reader
, 9);
1640 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1643 hr
= reader_parse_name(reader
, &name
);
1644 if (FAILED(hr
)) return WC_E_DECLDOCTYPE
;
1646 reader_skipspaces(reader
);
1648 hr
= reader_parse_externalid(reader
);
1649 if (FAILED(hr
)) return hr
;
1651 reader_skipspaces(reader
);
1653 cur
= reader_get_ptr(reader
);
1656 FIXME("internal subset parsing not implemented\n");
1661 reader_skipn(reader
, 1);
1663 reader
->nodetype
= XmlNodeType_DocumentType
;
1664 reader_set_strvalue(reader
, StringValue_LocalName
, &name
);
1665 reader_set_strvalue(reader
, StringValue_QualifiedName
, &name
);
1670 /* [11 NS] LocalPart ::= NCName */
1671 static HRESULT
reader_parse_local(xmlreader
*reader
, strval
*local
)
1676 if (reader
->resume
[XmlReadResume_Local
])
1678 start
= reader
->resume
[XmlReadResume_Local
];
1679 ptr
= reader_get_ptr(reader
);
1683 ptr
= reader_get_ptr(reader
);
1684 start
= reader_get_cur(reader
);
1687 while (is_ncnamechar(*ptr
))
1689 reader_skipn(reader
, 1);
1690 ptr
= reader_get_ptr(reader
);
1693 if (is_reader_pending(reader
))
1695 reader
->resume
[XmlReadResume_Local
] = start
;
1699 reader
->resume
[XmlReadResume_Local
] = 0;
1701 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, local
);
1706 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1707 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1708 [9 NS] UnprefixedName ::= LocalPart
1709 [10 NS] Prefix ::= NCName */
1710 static HRESULT
reader_parse_qname(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
)
1716 if (reader
->resume
[XmlReadResume_Name
])
1718 start
= reader
->resume
[XmlReadResume_Name
];
1719 ptr
= reader_get_ptr(reader
);
1723 ptr
= reader_get_ptr(reader
);
1724 start
= reader_get_cur(reader
);
1725 reader
->resume
[XmlReadResume_Name
] = start
;
1726 if (!is_ncnamechar(*ptr
)) return NC_E_QNAMECHARACTER
;
1729 if (reader
->resume
[XmlReadResume_Local
])
1731 hr
= reader_parse_local(reader
, local
);
1732 if (FAILED(hr
)) return hr
;
1734 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
],
1735 local
->start
- reader
->resume
[XmlReadResume_Name
] - 1,
1740 /* skip prefix part */
1741 while (is_ncnamechar(*ptr
))
1743 reader_skipn(reader
, 1);
1744 ptr
= reader_get_ptr(reader
);
1747 if (is_reader_pending(reader
)) return E_PENDING
;
1749 /* got a qualified name */
1752 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, prefix
);
1755 reader_skipn(reader
, 1);
1756 hr
= reader_parse_local(reader
, local
);
1757 if (FAILED(hr
)) return hr
;
1761 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
], reader_get_cur(reader
)-reader
->resume
[XmlReadResume_Name
], local
);
1762 reader_init_strvalue(0, 0, prefix
);
1766 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, local
);
1769 TRACE("qname %s:%s\n", debug_strval(reader
, prefix
), debug_strval(reader
, local
));
1771 TRACE("ncname %s\n", debug_strval(reader
, local
));
1773 reader_init_strvalue(prefix
->len
? prefix
->start
: local
->start
,
1775 (prefix
->len
? prefix
->len
+ 1 : 0) + local
->len
,
1778 reader
->resume
[XmlReadResume_Name
] = 0;
1779 reader
->resume
[XmlReadResume_Local
] = 0;
1784 /* Applies normalization rules to a single char, used for attribute values.
1786 Rules include 2 steps:
1788 1) replacing \r\n with a single \n;
1789 2) replacing all whitespace chars with ' '.
1792 static void reader_normalize_space(xmlreader
*reader
, WCHAR
*ptr
)
1794 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1796 if (!is_wchar_space(*ptr
)) return;
1798 if (*ptr
== '\r' && *(ptr
+1) == '\n')
1800 int len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - 2*sizeof(WCHAR
);
1801 memmove(ptr
+1, ptr
+2, len
);
1806 static WCHAR
get_predefined_entity(const xmlreader
*reader
, const strval
*name
)
1808 static const WCHAR entltW
[] = {'l','t'};
1809 static const WCHAR entgtW
[] = {'g','t'};
1810 static const WCHAR entampW
[] = {'a','m','p'};
1811 static const WCHAR entaposW
[] = {'a','p','o','s'};
1812 static const WCHAR entquotW
[] = {'q','u','o','t'};
1813 static const strval lt
= { (WCHAR
*)entltW
, 2 };
1814 static const strval gt
= { (WCHAR
*)entgtW
, 2 };
1815 static const strval amp
= { (WCHAR
*)entampW
, 3 };
1816 static const strval apos
= { (WCHAR
*)entaposW
, 4 };
1817 static const strval quot
= { (WCHAR
*)entquotW
, 4 };
1818 WCHAR
*str
= reader_get_strptr(reader
, name
);
1823 if (strval_eq(reader
, name
, <
)) return '<';
1826 if (strval_eq(reader
, name
, >
)) return '>';
1829 if (strval_eq(reader
, name
, &
))
1831 else if (strval_eq(reader
, name
, &apos
))
1835 if (strval_eq(reader
, name
, "
)) return '\"';
1844 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1845 [67] Reference ::= EntityRef | CharRef
1846 [68] EntityRef ::= '&' Name ';' */
1847 static HRESULT
reader_parse_reference(xmlreader
*reader
)
1849 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1850 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
1851 UINT cur
= reader_get_cur(reader
);
1856 reader_skipn(reader
, 1);
1857 ptr
= reader_get_ptr(reader
);
1861 reader_skipn(reader
, 1);
1862 ptr
= reader_get_ptr(reader
);
1864 /* hex char or decimal */
1867 reader_skipn(reader
, 1);
1868 ptr
= reader_get_ptr(reader
);
1872 if ((*ptr
>= '0' && *ptr
<= '9'))
1873 ch
= ch
*16 + *ptr
- '0';
1874 else if ((*ptr
>= 'a' && *ptr
<= 'f'))
1875 ch
= ch
*16 + *ptr
- 'a' + 10;
1876 else if ((*ptr
>= 'A' && *ptr
<= 'F'))
1877 ch
= ch
*16 + *ptr
- 'A' + 10;
1879 return ch
? WC_E_SEMICOLON
: WC_E_HEXDIGIT
;
1880 reader_skipn(reader
, 1);
1881 ptr
= reader_get_ptr(reader
);
1888 if ((*ptr
>= '0' && *ptr
<= '9'))
1890 ch
= ch
*10 + *ptr
- '0';
1891 reader_skipn(reader
, 1);
1892 ptr
= reader_get_ptr(reader
);
1895 return ch
? WC_E_SEMICOLON
: WC_E_DIGIT
;
1899 if (!is_char(ch
)) return WC_E_XMLCHARACTER
;
1902 if (is_wchar_space(ch
)) ch
= ' ';
1904 len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - sizeof(WCHAR
);
1905 memmove(start
+1, ptr
+1, len
);
1906 buffer
->cur
= cur
+ 1;
1915 hr
= reader_parse_name(reader
, &name
);
1916 if (FAILED(hr
)) return hr
;
1918 ptr
= reader_get_ptr(reader
);
1919 if (*ptr
!= ';') return WC_E_SEMICOLON
;
1921 /* predefined entities resolve to a single character */
1922 ch
= get_predefined_entity(reader
, &name
);
1925 len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - sizeof(WCHAR
);
1926 memmove(start
+1, ptr
+1, len
);
1927 buffer
->cur
= cur
+ 1;
1933 FIXME("undeclared entity %s\n", debug_strval(reader
, &name
));
1934 return WC_E_UNDECLAREDENTITY
;
1942 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
1943 static HRESULT
reader_parse_attvalue(xmlreader
*reader
, strval
*value
)
1948 ptr
= reader_get_ptr(reader
);
1950 /* skip opening quote */
1952 if (quote
!= '\"' && quote
!= '\'') return WC_E_QUOTE
;
1953 reader_skipn(reader
, 1);
1955 ptr
= reader_get_ptr(reader
);
1956 start
= reader_get_cur(reader
);
1959 if (*ptr
== '<') return WC_E_LESSTHAN
;
1963 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, value
);
1964 /* skip closing quote */
1965 reader_skipn(reader
, 1);
1971 HRESULT hr
= reader_parse_reference(reader
);
1972 if (FAILED(hr
)) return hr
;
1976 reader_normalize_space(reader
, ptr
);
1977 reader_skipn(reader
, 1);
1979 ptr
= reader_get_ptr(reader
);
1985 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
1986 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
1987 [3 NS] DefaultAttName ::= 'xmlns'
1988 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
1989 static HRESULT
reader_parse_attribute(xmlreader
*reader
)
1991 static const WCHAR xmlnsW
[] = {'x','m','l','n','s',0};
1992 strval prefix
, local
, qname
, xmlns
, value
;
1995 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
1996 if (FAILED(hr
)) return hr
;
1998 reader_init_cstrvalue((WCHAR
*)xmlnsW
, 5, &xmlns
);
2000 if (strval_eq(reader
, &prefix
, &xmlns
))
2002 FIXME("namespace definitions not supported\n");
2006 if (strval_eq(reader
, &qname
, &xmlns
))
2007 FIXME("default namespace definitions not supported\n");
2009 hr
= reader_parse_eq(reader
);
2010 if (FAILED(hr
)) return hr
;
2012 hr
= reader_parse_attvalue(reader
, &value
);
2013 if (FAILED(hr
)) return hr
;
2015 TRACE("%s=%s\n", debug_strval(reader
, &local
), debug_strval(reader
, &value
));
2016 return reader_add_attr(reader
, &local
, &value
);
2019 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2020 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2021 static HRESULT
reader_parse_stag(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
, int *empty
)
2025 hr
= reader_parse_qname(reader
, prefix
, local
, qname
);
2026 if (FAILED(hr
)) return hr
;
2030 static const WCHAR endW
[] = {'/','>',0};
2032 reader_skipspaces(reader
);
2035 if ((*empty
= !reader_cmp(reader
, endW
)))
2038 reader_skipn(reader
, 2);
2039 reader
->empty_element
= TRUE
;
2043 /* got a start tag */
2044 if (!reader_cmp(reader
, gtW
))
2047 reader_skipn(reader
, 1);
2048 return reader_push_element(reader
, qname
, local
);
2051 hr
= reader_parse_attribute(reader
);
2052 if (FAILED(hr
)) return hr
;
2058 /* [39] element ::= EmptyElemTag | STag content ETag */
2059 static HRESULT
reader_parse_element(xmlreader
*reader
)
2063 switch (reader
->resumestate
)
2065 case XmlReadResumeState_Initial
:
2066 /* check if we are really on element */
2067 if (reader_cmp(reader
, ltW
)) return S_FALSE
;
2070 reader_skipn(reader
, 1);
2072 reader_shrink(reader
);
2073 reader
->resumestate
= XmlReadResumeState_STag
;
2074 case XmlReadResumeState_STag
:
2076 strval qname
, prefix
, local
;
2079 /* this handles empty elements too */
2080 hr
= reader_parse_stag(reader
, &prefix
, &local
, &qname
, &empty
);
2081 if (FAILED(hr
)) return hr
;
2083 /* FIXME: need to check for defined namespace to reject invalid prefix,
2084 currently reject all prefixes */
2085 if (prefix
.len
) return NC_E_UNDECLAREDPREFIX
;
2087 /* if we got empty element and stack is empty go straight to Misc */
2088 if (empty
&& list_empty(&reader
->elements
))
2089 reader
->instate
= XmlReadInState_MiscEnd
;
2091 reader
->instate
= XmlReadInState_Content
;
2093 reader
->nodetype
= XmlNodeType_Element
;
2094 reader
->resumestate
= XmlReadResumeState_Initial
;
2095 reader_set_strvalue(reader
, StringValue_LocalName
, &local
);
2096 reader_set_strvalue(reader
, StringValue_Prefix
, &prefix
);
2097 reader_set_strvalue(reader
, StringValue_QualifiedName
, &qname
);
2107 /* [13 NS] ETag ::= '</' QName S? '>' */
2108 static HRESULT
reader_parse_endtag(xmlreader
*reader
)
2110 strval prefix
, local
, qname
;
2111 struct element
*elem
;
2115 reader_skipn(reader
, 2);
2117 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2118 if (FAILED(hr
)) return hr
;
2120 reader_skipspaces(reader
);
2122 if (reader_cmp(reader
, gtW
)) return WC_E_GREATERTHAN
;
2125 reader_skipn(reader
, 1);
2127 /* Element stack should never be empty at this point, cause we shouldn't get to
2128 content parsing if it's empty. */
2129 elem
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
2130 if (!strval_eq(reader
, &elem
->qname
, &qname
)) return WC_E_ELEMENTMATCH
;
2132 reader_pop_element(reader
);
2134 /* It was a root element, the rest is expected as Misc */
2135 if (list_empty(&reader
->elements
))
2136 reader
->instate
= XmlReadInState_MiscEnd
;
2138 reader
->nodetype
= XmlNodeType_EndElement
;
2139 reader_set_strvalue(reader
, StringValue_LocalName
, &local
);
2140 reader_set_strvalue(reader
, StringValue_QualifiedName
, &qname
);
2145 /* [18] CDSect ::= CDStart CData CDEnd
2146 [19] CDStart ::= '<![CDATA['
2147 [20] CData ::= (Char* - (Char* ']]>' Char*))
2148 [21] CDEnd ::= ']]>' */
2149 static HRESULT
reader_parse_cdata(xmlreader
*reader
)
2154 if (reader
->resumestate
== XmlReadResumeState_CDATA
)
2156 start
= reader
->resume
[XmlReadResume_Body
];
2157 ptr
= reader_get_ptr(reader
);
2161 /* skip markup '<![CDATA[' */
2162 reader_skipn(reader
, 9);
2163 reader_shrink(reader
);
2164 ptr
= reader_get_ptr(reader
);
2165 start
= reader_get_cur(reader
);
2166 reader
->nodetype
= XmlNodeType_CDATA
;
2167 reader
->resume
[XmlReadResume_Body
] = start
;
2168 reader
->resumestate
= XmlReadResumeState_CDATA
;
2169 reader_set_strvalue(reader
, StringValue_LocalName
, NULL
);
2170 reader_set_strvalue(reader
, StringValue_QualifiedName
, NULL
);
2171 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2176 if (*ptr
== ']' && *(ptr
+1) == ']' && *(ptr
+2) == '>')
2180 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2183 reader_skipn(reader
, 3);
2184 TRACE("%s\n", debug_strval(reader
, &value
));
2186 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
2187 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
2188 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2189 reader
->resume
[XmlReadResume_Body
] = 0;
2190 reader
->resumestate
= XmlReadResumeState_Initial
;
2195 /* Value normalization is not fully implemented, rules are:
2197 - single '\r' -> '\n';
2198 - sequence '\r\n' -> '\n', in this case value length changes;
2200 if (*ptr
== '\r') *ptr
= '\n';
2201 reader_skipn(reader
, 1);
2209 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2210 static HRESULT
reader_parse_chardata(xmlreader
*reader
)
2215 if (reader
->resumestate
== XmlReadResumeState_CharData
)
2217 start
= reader
->resume
[XmlReadResume_Body
];
2218 ptr
= reader_get_ptr(reader
);
2222 reader_shrink(reader
);
2223 ptr
= reader_get_ptr(reader
);
2224 start
= reader_get_cur(reader
);
2225 /* There's no text */
2226 if (!*ptr
|| *ptr
== '<') return S_OK
;
2227 reader
->nodetype
= is_wchar_space(*ptr
) ? XmlNodeType_Whitespace
: XmlNodeType_Text
;
2228 reader
->resume
[XmlReadResume_Body
] = start
;
2229 reader
->resumestate
= XmlReadResumeState_CharData
;
2230 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
2231 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
2232 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2237 /* CDATA closing sequence ']]>' is not allowed */
2238 if (ptr
[0] == ']' && ptr
[1] == ']' && ptr
[2] == '>')
2239 return WC_E_CDSECTEND
;
2241 /* Found next markup part */
2246 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2247 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2248 reader
->resume
[XmlReadResume_Body
] = 0;
2249 reader
->resumestate
= XmlReadResumeState_Initial
;
2253 reader_skipn(reader
, 1);
2255 /* this covers a case when text has leading whitespace chars */
2256 if (!is_wchar_space(*ptr
)) reader
->nodetype
= XmlNodeType_Text
;
2263 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2264 static HRESULT
reader_parse_content(xmlreader
*reader
)
2266 static const WCHAR cdstartW
[] = {'<','!','[','C','D','A','T','A','[',0};
2267 static const WCHAR etagW
[] = {'<','/',0};
2268 static const WCHAR ampW
[] = {'&',0};
2270 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
2272 switch (reader
->resumestate
)
2274 case XmlReadResumeState_CDATA
:
2275 return reader_parse_cdata(reader
);
2276 case XmlReadResumeState_Comment
:
2277 return reader_parse_comment(reader
);
2278 case XmlReadResumeState_PIBody
:
2279 case XmlReadResumeState_PITarget
:
2280 return reader_parse_pi(reader
);
2281 case XmlReadResumeState_CharData
:
2282 return reader_parse_chardata(reader
);
2284 ERR("unknown resume state %d\n", reader
->resumestate
);
2288 reader_shrink(reader
);
2290 /* handle end tag here, it indicates end of content as well */
2291 if (!reader_cmp(reader
, etagW
))
2292 return reader_parse_endtag(reader
);
2294 if (!reader_cmp(reader
, commentW
))
2295 return reader_parse_comment(reader
);
2297 if (!reader_cmp(reader
, piW
))
2298 return reader_parse_pi(reader
);
2300 if (!reader_cmp(reader
, cdstartW
))
2301 return reader_parse_cdata(reader
);
2303 if (!reader_cmp(reader
, ampW
))
2304 return reader_parse_reference(reader
);
2306 if (!reader_cmp(reader
, ltW
))
2307 return reader_parse_element(reader
);
2309 /* what's left must be CharData */
2310 return reader_parse_chardata(reader
);
2313 static HRESULT
reader_parse_nextnode(xmlreader
*reader
)
2317 if (!is_reader_pending(reader
))
2318 reader_clear_attrs(reader
);
2322 switch (reader
->instate
)
2324 /* if it's a first call for a new input we need to detect stream encoding */
2325 case XmlReadInState_Initial
:
2329 hr
= readerinput_growraw(reader
->input
);
2330 if (FAILED(hr
)) return hr
;
2332 /* try to detect encoding by BOM or data and set input code page */
2333 hr
= readerinput_detectencoding(reader
->input
, &enc
);
2334 TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map
[enc
].name
), hr
);
2335 if (FAILED(hr
)) return hr
;
2337 /* always switch first time cause we have to put something in */
2338 readerinput_switchencoding(reader
->input
, enc
);
2340 /* parse xml declaration */
2341 hr
= reader_parse_xmldecl(reader
);
2342 if (FAILED(hr
)) return hr
;
2344 readerinput_shrinkraw(reader
->input
, -1);
2345 reader
->instate
= XmlReadInState_Misc_DTD
;
2346 if (hr
== S_OK
) return hr
;
2349 case XmlReadInState_Misc_DTD
:
2350 hr
= reader_parse_misc(reader
);
2351 if (FAILED(hr
)) return hr
;
2354 reader
->instate
= XmlReadInState_DTD
;
2358 case XmlReadInState_DTD
:
2359 hr
= reader_parse_dtd(reader
);
2360 if (FAILED(hr
)) return hr
;
2364 reader
->instate
= XmlReadInState_DTD_Misc
;
2368 reader
->instate
= XmlReadInState_Element
;
2370 case XmlReadInState_DTD_Misc
:
2371 hr
= reader_parse_misc(reader
);
2372 if (FAILED(hr
)) return hr
;
2375 reader
->instate
= XmlReadInState_Element
;
2379 case XmlReadInState_Element
:
2380 return reader_parse_element(reader
);
2381 case XmlReadInState_Content
:
2382 return reader_parse_content(reader
);
2383 case XmlReadInState_MiscEnd
:
2384 hr
= reader_parse_misc(reader
);
2385 if (FAILED(hr
)) return hr
;
2388 reader
->instate
= XmlReadInState_Eof
;
2390 case XmlReadInState_Eof
:
2393 FIXME("internal state %d not handled\n", reader
->instate
);
2401 static HRESULT WINAPI
xmlreader_QueryInterface(IXmlReader
*iface
, REFIID riid
, void** ppvObject
)
2403 xmlreader
*This
= impl_from_IXmlReader(iface
);
2405 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
2407 if (IsEqualGUID(riid
, &IID_IUnknown
) ||
2408 IsEqualGUID(riid
, &IID_IXmlReader
))
2414 FIXME("interface %s not implemented\n", debugstr_guid(riid
));
2416 return E_NOINTERFACE
;
2419 IXmlReader_AddRef(iface
);
2424 static ULONG WINAPI
xmlreader_AddRef(IXmlReader
*iface
)
2426 xmlreader
*This
= impl_from_IXmlReader(iface
);
2427 ULONG ref
= InterlockedIncrement(&This
->ref
);
2428 TRACE("(%p)->(%d)\n", This
, ref
);
2432 static ULONG WINAPI
xmlreader_Release(IXmlReader
*iface
)
2434 xmlreader
*This
= impl_from_IXmlReader(iface
);
2435 LONG ref
= InterlockedDecrement(&This
->ref
);
2437 TRACE("(%p)->(%d)\n", This
, ref
);
2441 IMalloc
*imalloc
= This
->imalloc
;
2442 if (This
->input
) IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2443 reader_clear_attrs(This
);
2444 reader_clear_elements(This
);
2445 reader_free_strvalues(This
);
2446 reader_free(This
, This
);
2447 if (imalloc
) IMalloc_Release(imalloc
);
2453 static HRESULT WINAPI
xmlreader_SetInput(IXmlReader
* iface
, IUnknown
*input
)
2455 xmlreader
*This
= impl_from_IXmlReader(iface
);
2456 IXmlReaderInput
*readerinput
;
2459 TRACE("(%p)->(%p)\n", This
, input
);
2463 readerinput_release_stream(This
->input
);
2464 IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2468 This
->line
= This
->pos
= 0;
2469 reader_clear_elements(This
);
2471 This
->resumestate
= XmlReadResumeState_Initial
;
2472 memset(This
->resume
, 0, sizeof(This
->resume
));
2474 /* just reset current input */
2477 This
->state
= XmlReadState_Initial
;
2481 /* now try IXmlReaderInput, ISequentialStream, IStream */
2482 hr
= IUnknown_QueryInterface(input
, &IID_IXmlReaderInput
, (void**)&readerinput
);
2485 if (readerinput
->lpVtbl
== &xmlreaderinputvtbl
)
2486 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2489 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2490 readerinput
, readerinput
->lpVtbl
);
2491 IUnknown_Release(readerinput
);
2497 if (hr
!= S_OK
|| !readerinput
)
2499 /* create IXmlReaderInput basing on supplied interface */
2500 hr
= CreateXmlReaderInputWithEncodingName(input
,
2501 This
->imalloc
, NULL
, FALSE
, NULL
, &readerinput
);
2502 if (hr
!= S_OK
) return hr
;
2503 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2506 /* set stream for supplied IXmlReaderInput */
2507 hr
= readerinput_query_for_stream(This
->input
);
2510 This
->state
= XmlReadState_Initial
;
2511 This
->instate
= XmlReadInState_Initial
;
2517 static HRESULT WINAPI
xmlreader_GetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR
*value
)
2519 xmlreader
*This
= impl_from_IXmlReader(iface
);
2521 TRACE("(%p)->(%s %p)\n", This
, debugstr_reader_prop(property
), value
);
2523 if (!value
) return E_INVALIDARG
;
2527 case XmlReaderProperty_DtdProcessing
:
2528 *value
= This
->dtdmode
;
2530 case XmlReaderProperty_ReadState
:
2531 *value
= This
->state
;
2534 FIXME("Unimplemented property (%u)\n", property
);
2541 static HRESULT WINAPI
xmlreader_SetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR value
)
2543 xmlreader
*This
= impl_from_IXmlReader(iface
);
2545 TRACE("(%p)->(%s %lu)\n", This
, debugstr_reader_prop(property
), value
);
2549 case XmlReaderProperty_DtdProcessing
:
2550 if (value
< 0 || value
> _DtdProcessing_Last
) return E_INVALIDARG
;
2551 This
->dtdmode
= value
;
2554 FIXME("Unimplemented property (%u)\n", property
);
2561 static HRESULT WINAPI
xmlreader_Read(IXmlReader
* iface
, XmlNodeType
*nodetype
)
2563 xmlreader
*This
= impl_from_IXmlReader(iface
);
2564 XmlNodeType oldtype
= This
->nodetype
;
2567 TRACE("(%p)->(%p)\n", This
, nodetype
);
2569 if (This
->state
== XmlReadState_Closed
) return S_FALSE
;
2571 hr
= reader_parse_nextnode(This
);
2572 if (oldtype
== XmlNodeType_None
&& This
->nodetype
!= oldtype
)
2573 This
->state
= XmlReadState_Interactive
;
2576 TRACE("node type %s\n", debugstr_nodetype(This
->nodetype
));
2577 *nodetype
= This
->nodetype
;
2583 static HRESULT WINAPI
xmlreader_GetNodeType(IXmlReader
* iface
, XmlNodeType
*node_type
)
2585 xmlreader
*This
= impl_from_IXmlReader(iface
);
2586 TRACE("(%p)->(%p)\n", This
, node_type
);
2588 *node_type
= reader_get_nodetype(This
);
2589 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
2592 static HRESULT WINAPI
xmlreader_MoveToFirstAttribute(IXmlReader
* iface
)
2594 xmlreader
*This
= impl_from_IXmlReader(iface
);
2596 TRACE("(%p)\n", This
);
2598 if (!This
->attr_count
) return S_FALSE
;
2599 This
->attr
= LIST_ENTRY(list_head(&This
->attrs
), struct attribute
, entry
);
2600 reader_set_strvalue(This
, StringValue_LocalName
, &This
->attr
->localname
);
2601 reader_set_strvalue(This
, StringValue_Value
, &This
->attr
->value
);
2606 static HRESULT WINAPI
xmlreader_MoveToNextAttribute(IXmlReader
* iface
)
2608 xmlreader
*This
= impl_from_IXmlReader(iface
);
2609 const struct list
*next
;
2611 TRACE("(%p)\n", This
);
2613 if (!This
->attr_count
) return S_FALSE
;
2616 return IXmlReader_MoveToFirstAttribute(iface
);
2618 next
= list_next(&This
->attrs
, &This
->attr
->entry
);
2621 This
->attr
= LIST_ENTRY(next
, struct attribute
, entry
);
2622 reader_set_strvalue(This
, StringValue_LocalName
, &This
->attr
->localname
);
2623 reader_set_strvalue(This
, StringValue_Value
, &This
->attr
->value
);
2626 return next
? S_OK
: S_FALSE
;
2629 static HRESULT WINAPI
xmlreader_MoveToAttributeByName(IXmlReader
* iface
,
2631 LPCWSTR namespaceUri
)
2633 FIXME("(%p %p %p): stub\n", iface
, local_name
, namespaceUri
);
2637 static HRESULT WINAPI
xmlreader_MoveToElement(IXmlReader
* iface
)
2639 xmlreader
*This
= impl_from_IXmlReader(iface
);
2640 struct element
*elem
;
2642 TRACE("(%p)\n", This
);
2644 if (!This
->attr_count
) return S_FALSE
;
2647 /* FIXME: support other node types with 'attributes' like DTD */
2648 elem
= LIST_ENTRY(list_head(&This
->elements
), struct element
, entry
);
2651 reader_set_strvalue(This
, StringValue_QualifiedName
, &elem
->qname
);
2652 reader_set_strvalue(This
, StringValue_LocalName
, &elem
->localname
);
2658 static HRESULT WINAPI
xmlreader_GetQualifiedName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
2660 xmlreader
*This
= impl_from_IXmlReader(iface
);
2662 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
2663 *name
= This
->strvalues
[StringValue_QualifiedName
].str
;
2664 *len
= This
->strvalues
[StringValue_QualifiedName
].len
;
2668 static HRESULT WINAPI
xmlreader_GetNamespaceUri(IXmlReader
* iface
,
2669 LPCWSTR
*namespaceUri
,
2670 UINT
*namespaceUri_length
)
2672 FIXME("(%p %p %p): stub\n", iface
, namespaceUri
, namespaceUri_length
);
2676 static HRESULT WINAPI
xmlreader_GetLocalName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
2678 xmlreader
*This
= impl_from_IXmlReader(iface
);
2680 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
2681 *name
= This
->strvalues
[StringValue_LocalName
].str
;
2682 if (len
) *len
= This
->strvalues
[StringValue_LocalName
].len
;
2686 static HRESULT WINAPI
xmlreader_GetPrefix(IXmlReader
* iface
, LPCWSTR
*prefix
, UINT
*len
)
2688 xmlreader
*This
= impl_from_IXmlReader(iface
);
2690 TRACE("(%p)->(%p %p)\n", This
, prefix
, len
);
2691 *prefix
= This
->strvalues
[StringValue_Prefix
].str
;
2692 if (len
) *len
= This
->strvalues
[StringValue_Prefix
].len
;
2696 static HRESULT WINAPI
xmlreader_GetValue(IXmlReader
* iface
, const WCHAR
**value
, UINT
*len
)
2698 xmlreader
*reader
= impl_from_IXmlReader(iface
);
2699 strval
*val
= &reader
->strvalues
[StringValue_Value
];
2701 TRACE("(%p)->(%p %p)\n", reader
, value
, len
);
2705 if ((reader
->nodetype
== XmlNodeType_Comment
&& !val
->str
) || is_reader_pending(reader
))
2710 hr
= IXmlReader_Read(iface
, &type
);
2711 if (FAILED(hr
)) return hr
;
2713 /* return if still pending, partially read values are not reported */
2714 if (is_reader_pending(reader
)) return E_PENDING
;
2719 WCHAR
*ptr
= reader_alloc(reader
, (val
->len
+1)*sizeof(WCHAR
));
2720 if (!ptr
) return E_OUTOFMEMORY
;
2721 memcpy(ptr
, reader_get_strptr(reader
, val
), val
->len
*sizeof(WCHAR
));
2727 if (len
) *len
= val
->len
;
2731 static HRESULT WINAPI
xmlreader_ReadValueChunk(IXmlReader
* iface
, WCHAR
*buffer
, UINT chunk_size
, UINT
*read
)
2733 xmlreader
*reader
= impl_from_IXmlReader(iface
);
2734 strval
*val
= &reader
->strvalues
[StringValue_Value
];
2737 TRACE("(%p)->(%p %u %p)\n", reader
, buffer
, chunk_size
, read
);
2739 /* Value is already allocated, chunked reads are not possible. */
2740 if (val
->str
) return S_FALSE
;
2744 len
= min(chunk_size
, val
->len
);
2745 memcpy(buffer
, reader_get_ptr2(reader
, val
->start
), len
);
2748 if (read
) *read
= len
;
2754 static HRESULT WINAPI
xmlreader_GetBaseUri(IXmlReader
* iface
,
2756 UINT
*baseUri_length
)
2758 FIXME("(%p %p %p): stub\n", iface
, baseUri
, baseUri_length
);
2762 static BOOL WINAPI
xmlreader_IsDefault(IXmlReader
* iface
)
2764 FIXME("(%p): stub\n", iface
);
2768 static BOOL WINAPI
xmlreader_IsEmptyElement(IXmlReader
* iface
)
2770 xmlreader
*This
= impl_from_IXmlReader(iface
);
2771 TRACE("(%p)\n", This
);
2772 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
2773 when current node is start tag of an element */
2774 return (reader_get_nodetype(This
) == XmlNodeType_Element
) ? This
->empty_element
: FALSE
;
2777 static HRESULT WINAPI
xmlreader_GetLineNumber(IXmlReader
* iface
, UINT
*lineNumber
)
2779 xmlreader
*This
= impl_from_IXmlReader(iface
);
2781 TRACE("(%p %p)\n", This
, lineNumber
);
2783 if (!lineNumber
) return E_INVALIDARG
;
2785 *lineNumber
= This
->line
;
2790 static HRESULT WINAPI
xmlreader_GetLinePosition(IXmlReader
* iface
, UINT
*linePosition
)
2792 xmlreader
*This
= impl_from_IXmlReader(iface
);
2794 TRACE("(%p %p)\n", This
, linePosition
);
2796 if (!linePosition
) return E_INVALIDARG
;
2798 *linePosition
= This
->pos
;
2803 static HRESULT WINAPI
xmlreader_GetAttributeCount(IXmlReader
* iface
, UINT
*count
)
2805 xmlreader
*This
= impl_from_IXmlReader(iface
);
2807 TRACE("(%p)->(%p)\n", This
, count
);
2809 if (!count
) return E_INVALIDARG
;
2811 *count
= This
->attr_count
;
2815 static HRESULT WINAPI
xmlreader_GetDepth(IXmlReader
* iface
, UINT
*depth
)
2817 xmlreader
*This
= impl_from_IXmlReader(iface
);
2818 TRACE("(%p)->(%p)\n", This
, depth
);
2819 *depth
= This
->depth
;
2823 static BOOL WINAPI
xmlreader_IsEOF(IXmlReader
* iface
)
2825 FIXME("(%p): stub\n", iface
);
2829 static const struct IXmlReaderVtbl xmlreader_vtbl
=
2831 xmlreader_QueryInterface
,
2835 xmlreader_GetProperty
,
2836 xmlreader_SetProperty
,
2838 xmlreader_GetNodeType
,
2839 xmlreader_MoveToFirstAttribute
,
2840 xmlreader_MoveToNextAttribute
,
2841 xmlreader_MoveToAttributeByName
,
2842 xmlreader_MoveToElement
,
2843 xmlreader_GetQualifiedName
,
2844 xmlreader_GetNamespaceUri
,
2845 xmlreader_GetLocalName
,
2846 xmlreader_GetPrefix
,
2848 xmlreader_ReadValueChunk
,
2849 xmlreader_GetBaseUri
,
2850 xmlreader_IsDefault
,
2851 xmlreader_IsEmptyElement
,
2852 xmlreader_GetLineNumber
,
2853 xmlreader_GetLinePosition
,
2854 xmlreader_GetAttributeCount
,
2859 /** IXmlReaderInput **/
2860 static HRESULT WINAPI
xmlreaderinput_QueryInterface(IXmlReaderInput
*iface
, REFIID riid
, void** ppvObject
)
2862 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
2864 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
2866 if (IsEqualGUID(riid
, &IID_IXmlReaderInput
) ||
2867 IsEqualGUID(riid
, &IID_IUnknown
))
2873 WARN("interface %s not implemented\n", debugstr_guid(riid
));
2875 return E_NOINTERFACE
;
2878 IUnknown_AddRef(iface
);
2883 static ULONG WINAPI
xmlreaderinput_AddRef(IXmlReaderInput
*iface
)
2885 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
2886 ULONG ref
= InterlockedIncrement(&This
->ref
);
2887 TRACE("(%p)->(%d)\n", This
, ref
);
2891 static ULONG WINAPI
xmlreaderinput_Release(IXmlReaderInput
*iface
)
2893 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
2894 LONG ref
= InterlockedDecrement(&This
->ref
);
2896 TRACE("(%p)->(%d)\n", This
, ref
);
2900 IMalloc
*imalloc
= This
->imalloc
;
2901 if (This
->input
) IUnknown_Release(This
->input
);
2902 if (This
->stream
) ISequentialStream_Release(This
->stream
);
2903 if (This
->buffer
) free_input_buffer(This
->buffer
);
2904 readerinput_free(This
, This
->baseuri
);
2905 readerinput_free(This
, This
);
2906 if (imalloc
) IMalloc_Release(imalloc
);
2912 static const struct IUnknownVtbl xmlreaderinputvtbl
=
2914 xmlreaderinput_QueryInterface
,
2915 xmlreaderinput_AddRef
,
2916 xmlreaderinput_Release
2919 HRESULT WINAPI
CreateXmlReader(REFIID riid
, void **obj
, IMalloc
*imalloc
)
2924 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid
), obj
, imalloc
);
2926 if (!IsEqualGUID(riid
, &IID_IXmlReader
))
2928 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid
));
2933 reader
= IMalloc_Alloc(imalloc
, sizeof(*reader
));
2935 reader
= heap_alloc(sizeof(*reader
));
2936 if(!reader
) return E_OUTOFMEMORY
;
2938 reader
->IXmlReader_iface
.lpVtbl
= &xmlreader_vtbl
;
2940 reader
->input
= NULL
;
2941 reader
->state
= XmlReadState_Closed
;
2942 reader
->instate
= XmlReadInState_Initial
;
2943 reader
->resumestate
= XmlReadResumeState_Initial
;
2944 reader
->dtdmode
= DtdProcessing_Prohibit
;
2945 reader
->line
= reader
->pos
= 0;
2946 reader
->imalloc
= imalloc
;
2947 if (imalloc
) IMalloc_AddRef(imalloc
);
2948 reader
->nodetype
= XmlNodeType_None
;
2949 list_init(&reader
->attrs
);
2950 reader
->attr_count
= 0;
2951 reader
->attr
= NULL
;
2952 list_init(&reader
->elements
);
2954 reader
->max_depth
= 256;
2955 reader
->empty_element
= FALSE
;
2956 memset(reader
->resume
, 0, sizeof(reader
->resume
));
2958 for (i
= 0; i
< StringValue_Last
; i
++)
2959 reader
->strvalues
[i
] = strval_empty
;
2961 *obj
= &reader
->IXmlReader_iface
;
2963 TRACE("returning iface %p\n", *obj
);
2968 HRESULT WINAPI
CreateXmlReaderInputWithEncodingName(IUnknown
*stream
,
2973 IXmlReaderInput
**ppInput
)
2975 xmlreaderinput
*readerinput
;
2978 TRACE("%p %p %s %d %s %p\n", stream
, imalloc
, wine_dbgstr_w(encoding
),
2979 hint
, wine_dbgstr_w(base_uri
), ppInput
);
2981 if (!stream
|| !ppInput
) return E_INVALIDARG
;
2984 readerinput
= IMalloc_Alloc(imalloc
, sizeof(*readerinput
));
2986 readerinput
= heap_alloc(sizeof(*readerinput
));
2987 if(!readerinput
) return E_OUTOFMEMORY
;
2989 readerinput
->IXmlReaderInput_iface
.lpVtbl
= &xmlreaderinputvtbl
;
2990 readerinput
->ref
= 1;
2991 readerinput
->imalloc
= imalloc
;
2992 readerinput
->stream
= NULL
;
2993 if (imalloc
) IMalloc_AddRef(imalloc
);
2994 readerinput
->encoding
= parse_encoding_name(encoding
, -1);
2995 readerinput
->hint
= hint
;
2996 readerinput
->baseuri
= readerinput_strdupW(readerinput
, base_uri
);
2997 readerinput
->pending
= 0;
2999 hr
= alloc_input_buffer(readerinput
);
3002 readerinput_free(readerinput
, readerinput
->baseuri
);
3003 readerinput_free(readerinput
, readerinput
);
3004 if (imalloc
) IMalloc_Release(imalloc
);
3007 IUnknown_QueryInterface(stream
, &IID_IUnknown
, (void**)&readerinput
->input
);
3009 *ppInput
= &readerinput
->IXmlReaderInput_iface
;
3011 TRACE("returning iface %p\n", *ppInput
);