[XMLLITE] Sync with Wine Staging 4.18. CORE-16441
[reactos.git] / dll / win32 / xmllite / reader.c
1 /*
2 * IXmlReader implementation
3 *
4 * Copyright 2010, 2012-2013, 2016-2017 Nikolay Sivov
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 */
20
21 #define COBJMACROS
22
23 #include <stdio.h>
24 #include <stdarg.h>
25 #include <assert.h>
26 #include "windef.h"
27 #include "winbase.h"
28 #include "initguid.h"
29 #include "objbase.h"
30 #include "xmllite.h"
31 #include "xmllite_private.h"
32 #ifdef __REACTOS__
33 #include <winnls.h>
34 #endif
35
36 #include "wine/debug.h"
37 #include "wine/list.h"
38
39 WINE_DEFAULT_DEBUG_CHANNEL(xmllite);
40
41 /* not defined in public headers */
42 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
43
44 typedef enum
45 {
46 XmlReadInState_Initial,
47 XmlReadInState_XmlDecl,
48 XmlReadInState_Misc_DTD,
49 XmlReadInState_DTD,
50 XmlReadInState_DTD_Misc,
51 XmlReadInState_Element,
52 XmlReadInState_Content,
53 XmlReadInState_MiscEnd, /* optional Misc at the end of a document */
54 XmlReadInState_Eof
55 } XmlReaderInternalState;
56
57 /* This state denotes where parsing was interrupted by input problem.
58 Reader resumes parsing using this information. */
59 typedef enum
60 {
61 XmlReadResumeState_Initial,
62 XmlReadResumeState_PITarget,
63 XmlReadResumeState_PIBody,
64 XmlReadResumeState_CDATA,
65 XmlReadResumeState_Comment,
66 XmlReadResumeState_STag,
67 XmlReadResumeState_CharData,
68 XmlReadResumeState_Whitespace
69 } XmlReaderResumeState;
70
71 /* saved pointer index to resume from particular input position */
72 typedef enum
73 {
74 XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */
75 XmlReadResume_Local, /* local for QName */
76 XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */
77 XmlReadResume_Last
78 } XmlReaderResume;
79
80 typedef enum
81 {
82 StringValue_LocalName,
83 StringValue_Prefix,
84 StringValue_QualifiedName,
85 StringValue_Value,
86 StringValue_Last
87 } XmlReaderStringValue;
88
89 static const WCHAR usasciiW[] = {'U','S','-','A','S','C','I','I',0};
90 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
91 static const WCHAR utf8W[] = {'U','T','F','-','8',0};
92
93 static const WCHAR dblquoteW[] = {'\"',0};
94 static const WCHAR quoteW[] = {'\'',0};
95 static const WCHAR ltW[] = {'<',0};
96 static const WCHAR gtW[] = {'>',0};
97 static const WCHAR commentW[] = {'<','!','-','-',0};
98 static const WCHAR piW[] = {'<','?',0};
99
100 BOOL is_namestartchar(WCHAR ch);
101
102 static const char *debugstr_nodetype(XmlNodeType nodetype)
103 {
104 static const char * const type_names[] =
105 {
106 "None",
107 "Element",
108 "Attribute",
109 "Text",
110 "CDATA",
111 "",
112 "",
113 "ProcessingInstruction",
114 "Comment",
115 "",
116 "DocumentType",
117 "",
118 "",
119 "Whitespace",
120 "",
121 "EndElement",
122 "",
123 "XmlDeclaration"
124 };
125
126 if (nodetype > _XmlNodeType_Last)
127 return wine_dbg_sprintf("unknown type=%d", nodetype);
128
129 return type_names[nodetype];
130 }
131
132 static const char *debugstr_reader_prop(XmlReaderProperty prop)
133 {
134 static const char * const prop_names[] =
135 {
136 "MultiLanguage",
137 "ConformanceLevel",
138 "RandomAccess",
139 "XmlResolver",
140 "DtdProcessing",
141 "ReadState",
142 "MaxElementDepth",
143 "MaxEntityExpansion"
144 };
145
146 if (prop > _XmlReaderProperty_Last)
147 return wine_dbg_sprintf("unknown property=%d", prop);
148
149 return prop_names[prop];
150 }
151
152 struct xml_encoding_data
153 {
154 const WCHAR *name;
155 xml_encoding enc;
156 UINT cp;
157 };
158
159 static const struct xml_encoding_data xml_encoding_map[] = {
160 { usasciiW, XmlEncoding_USASCII, 20127 },
161 { utf16W, XmlEncoding_UTF16, 1200 },
162 { utf8W, XmlEncoding_UTF8, CP_UTF8 },
163 };
164
165 const WCHAR *get_encoding_name(xml_encoding encoding)
166 {
167 return xml_encoding_map[encoding].name;
168 }
169
170 xml_encoding get_encoding_from_codepage(UINT codepage)
171 {
172 int i;
173 for (i = 0; i < ARRAY_SIZE(xml_encoding_map); i++)
174 {
175 if (xml_encoding_map[i].cp == codepage) return xml_encoding_map[i].enc;
176 }
177 return XmlEncoding_Unknown;
178 }
179
180 typedef struct
181 {
182 char *data;
183 UINT cur;
184 unsigned int allocated;
185 unsigned int written;
186 BOOL prev_cr;
187 } encoded_buffer;
188
189 typedef struct input_buffer input_buffer;
190
191 typedef struct
192 {
193 IXmlReaderInput IXmlReaderInput_iface;
194 LONG ref;
195 /* reference passed on IXmlReaderInput creation, is kept when input is created */
196 IUnknown *input;
197 IMalloc *imalloc;
198 xml_encoding encoding;
199 BOOL hint;
200 WCHAR *baseuri;
201 /* stream reference set after SetInput() call from reader,
202 stored as sequential stream, cause currently
203 optimizations possible with IStream aren't implemented */
204 ISequentialStream *stream;
205 input_buffer *buffer;
206 unsigned int pending : 1;
207 } xmlreaderinput;
208
209 static const struct IUnknownVtbl xmlreaderinputvtbl;
210
211 /* Structure to hold parsed string of specific length.
212
213 Reader stores node value as 'start' pointer, on request
214 a null-terminated version of it is allocated.
215
216 To init a strval variable use reader_init_strval(),
217 to set strval as a reader value use reader_set_strval().
218 */
219 typedef struct
220 {
221 WCHAR *str; /* allocated null-terminated string */
222 UINT len; /* length in WCHARs, altered after ReadValueChunk */
223 UINT start; /* input position where value starts */
224 } strval;
225
226 static WCHAR emptyW[] = {0};
227 static WCHAR xmlW[] = {'x','m','l',0};
228 static WCHAR xmlnsW[] = {'x','m','l','n','s',0};
229 static const strval strval_empty = { emptyW };
230 static const strval strval_xml = { xmlW, 3 };
231 static const strval strval_xmlns = { xmlnsW, 5 };
232
233 struct reader_position
234 {
235 UINT line_number;
236 UINT line_position;
237 };
238
239 enum attribute_flags
240 {
241 ATTRIBUTE_NS_DEFINITION = 0x1,
242 ATTRIBUTE_DEFAULT_NS_DEFINITION = 0x2,
243 };
244
245 struct attribute
246 {
247 struct list entry;
248 strval prefix;
249 strval localname;
250 strval qname;
251 strval value;
252 struct reader_position position;
253 unsigned int flags;
254 };
255
256 struct element
257 {
258 struct list entry;
259 strval prefix;
260 strval localname;
261 strval qname;
262 struct reader_position position;
263 };
264
265 struct ns
266 {
267 struct list entry;
268 strval prefix;
269 strval uri;
270 struct element *element;
271 };
272
273 typedef struct
274 {
275 IXmlReader IXmlReader_iface;
276 LONG ref;
277 xmlreaderinput *input;
278 IMalloc *imalloc;
279 XmlReadState state;
280 HRESULT error; /* error set on XmlReadState_Error */
281 XmlReaderInternalState instate;
282 XmlReaderResumeState resumestate;
283 XmlNodeType nodetype;
284 DtdProcessing dtdmode;
285 IXmlResolver *resolver;
286 IUnknown *mlang;
287 struct reader_position position;
288 struct list attrs; /* attributes list for current node */
289 struct attribute *attr; /* current attribute */
290 UINT attr_count;
291 struct list nsdef;
292 struct list ns;
293 struct list elements;
294 int chunk_read_off;
295 strval strvalues[StringValue_Last];
296 UINT depth;
297 UINT max_depth;
298 BOOL is_empty_element;
299 struct element empty_element; /* used for empty elements without end tag <a />,
300 and to keep <?xml reader position */
301 UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */
302 } xmlreader;
303
304 struct input_buffer
305 {
306 encoded_buffer utf16;
307 encoded_buffer encoded;
308 UINT code_page;
309 xmlreaderinput *input;
310 };
311
312 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface)
313 {
314 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
315 }
316
317 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface)
318 {
319 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
320 }
321
322 /* reader memory allocation functions */
323 static inline void *reader_alloc(xmlreader *reader, size_t len)
324 {
325 return m_alloc(reader->imalloc, len);
326 }
327
328 static inline void *reader_alloc_zero(xmlreader *reader, size_t len)
329 {
330 void *ret = reader_alloc(reader, len);
331 if (ret)
332 memset(ret, 0, len);
333 return ret;
334 }
335
336 static inline void reader_free(xmlreader *reader, void *mem)
337 {
338 m_free(reader->imalloc, mem);
339 }
340
341 /* Just return pointer from offset, no attempt to read more. */
342 static inline WCHAR *reader_get_ptr2(const xmlreader *reader, UINT offset)
343 {
344 encoded_buffer *buffer = &reader->input->buffer->utf16;
345 return (WCHAR*)buffer->data + offset;
346 }
347
348 static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v)
349 {
350 return v->str ? v->str : reader_get_ptr2(reader, v->start);
351 }
352
353 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest)
354 {
355 *dest = *src;
356
357 if (src->str != strval_empty.str)
358 {
359 dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR));
360 if (!dest->str) return E_OUTOFMEMORY;
361 memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR));
362 dest->str[dest->len] = 0;
363 dest->start = 0;
364 }
365
366 return S_OK;
367 }
368
369 /* reader input memory allocation functions */
370 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
371 {
372 return m_alloc(input->imalloc, len);
373 }
374
375 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
376 {
377 return m_realloc(input->imalloc, mem, len);
378 }
379
380 static inline void readerinput_free(xmlreaderinput *input, void *mem)
381 {
382 m_free(input->imalloc, mem);
383 }
384
385 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str)
386 {
387 LPWSTR ret = NULL;
388
389 if(str) {
390 DWORD size;
391
392 size = (lstrlenW(str)+1)*sizeof(WCHAR);
393 ret = readerinput_alloc(input, size);
394 if (ret) memcpy(ret, str, size);
395 }
396
397 return ret;
398 }
399
400 /* This one frees stored string value if needed */
401 static void reader_free_strvalued(xmlreader *reader, strval *v)
402 {
403 if (v->str != strval_empty.str)
404 {
405 reader_free(reader, v->str);
406 *v = strval_empty;
407 }
408 }
409
410 static void reader_clear_attrs(xmlreader *reader)
411 {
412 struct attribute *attr, *attr2;
413 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
414 {
415 reader_free_strvalued(reader, &attr->localname);
416 reader_free_strvalued(reader, &attr->value);
417 reader_free(reader, attr);
418 }
419 list_init(&reader->attrs);
420 reader->attr_count = 0;
421 reader->attr = NULL;
422 }
423
424 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
425 while we are on a node with attributes */
426 static HRESULT reader_add_attr(xmlreader *reader, strval *prefix, strval *localname, strval *qname,
427 strval *value, const struct reader_position *position, unsigned int flags)
428 {
429 struct attribute *attr;
430 HRESULT hr;
431
432 attr = reader_alloc(reader, sizeof(*attr));
433 if (!attr) return E_OUTOFMEMORY;
434
435 hr = reader_strvaldup(reader, localname, &attr->localname);
436 if (hr == S_OK)
437 {
438 hr = reader_strvaldup(reader, value, &attr->value);
439 if (hr != S_OK)
440 reader_free_strvalued(reader, &attr->value);
441 }
442 if (hr != S_OK)
443 {
444 reader_free(reader, attr);
445 return hr;
446 }
447
448 if (prefix)
449 attr->prefix = *prefix;
450 else
451 memset(&attr->prefix, 0, sizeof(attr->prefix));
452 attr->qname = qname ? *qname : *localname;
453 attr->position = *position;
454 attr->flags = flags;
455 list_add_tail(&reader->attrs, &attr->entry);
456 reader->attr_count++;
457
458 return S_OK;
459 }
460
461 /* Returns current element, doesn't check if reader is actually positioned on it. */
462 static struct element *reader_get_element(xmlreader *reader)
463 {
464 if (reader->is_empty_element)
465 return &reader->empty_element;
466
467 return LIST_ENTRY(list_head(&reader->elements), struct element, entry);
468 }
469
470 static inline void reader_init_strvalue(UINT start, UINT len, strval *v)
471 {
472 v->start = start;
473 v->len = len;
474 v->str = NULL;
475 }
476
477 static inline const char* debug_strval(const xmlreader *reader, const strval *v)
478 {
479 return debugstr_wn(reader_get_strptr(reader, v), v->len);
480 }
481
482 /* used to initialize from constant string */
483 static inline void reader_init_cstrvalue(WCHAR *str, UINT len, strval *v)
484 {
485 v->start = 0;
486 v->len = len;
487 v->str = str;
488 }
489
490 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type)
491 {
492 reader_free_strvalued(reader, &reader->strvalues[type]);
493 }
494
495 static void reader_free_strvalues(xmlreader *reader)
496 {
497 int type;
498 for (type = 0; type < StringValue_Last; type++)
499 reader_free_strvalue(reader, type);
500 }
501
502 /* This helper should only be used to test if strings are the same,
503 it doesn't try to sort. */
504 static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2)
505 {
506 if (str1->len != str2->len) return 0;
507 return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR));
508 }
509
510 static void reader_clear_elements(xmlreader *reader)
511 {
512 struct element *elem, *elem2;
513 LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry)
514 {
515 reader_free_strvalued(reader, &elem->prefix);
516 reader_free_strvalued(reader, &elem->localname);
517 reader_free_strvalued(reader, &elem->qname);
518 reader_free(reader, elem);
519 }
520 list_init(&reader->elements);
521 reader_free_strvalued(reader, &reader->empty_element.localname);
522 reader_free_strvalued(reader, &reader->empty_element.qname);
523 reader->is_empty_element = FALSE;
524 }
525
526 static struct ns *reader_lookup_ns(xmlreader *reader, const strval *prefix)
527 {
528 struct list *nslist = prefix ? &reader->ns : &reader->nsdef;
529 struct ns *ns;
530
531 LIST_FOR_EACH_ENTRY_REV(ns, nslist, struct ns, entry) {
532 if (strval_eq(reader, prefix, &ns->prefix))
533 return ns;
534 }
535
536 return NULL;
537 }
538
539 static HRESULT reader_inc_depth(xmlreader *reader)
540 {
541 return (++reader->depth >= reader->max_depth && reader->max_depth) ? SC_E_MAXELEMENTDEPTH : S_OK;
542 }
543
544 static void reader_dec_depth(xmlreader *reader)
545 {
546 if (reader->depth)
547 reader->depth--;
548 }
549
550 static HRESULT reader_push_ns(xmlreader *reader, const strval *prefix, const strval *uri, BOOL def)
551 {
552 struct ns *ns;
553 HRESULT hr;
554
555 ns = reader_alloc(reader, sizeof(*ns));
556 if (!ns) return E_OUTOFMEMORY;
557
558 if (def)
559 memset(&ns->prefix, 0, sizeof(ns->prefix));
560 else {
561 hr = reader_strvaldup(reader, prefix, &ns->prefix);
562 if (FAILED(hr)) {
563 reader_free(reader, ns);
564 return hr;
565 }
566 }
567
568 hr = reader_strvaldup(reader, uri, &ns->uri);
569 if (FAILED(hr)) {
570 reader_free_strvalued(reader, &ns->prefix);
571 reader_free(reader, ns);
572 return hr;
573 }
574
575 ns->element = NULL;
576 list_add_head(def ? &reader->nsdef : &reader->ns, &ns->entry);
577 return hr;
578 }
579
580 static void reader_free_element(xmlreader *reader, struct element *element)
581 {
582 reader_free_strvalued(reader, &element->prefix);
583 reader_free_strvalued(reader, &element->localname);
584 reader_free_strvalued(reader, &element->qname);
585 reader_free(reader, element);
586 }
587
588 static void reader_mark_ns_nodes(xmlreader *reader, struct element *element)
589 {
590 struct ns *ns;
591
592 LIST_FOR_EACH_ENTRY(ns, &reader->ns, struct ns, entry) {
593 if (ns->element)
594 break;
595 ns->element = element;
596 }
597
598 LIST_FOR_EACH_ENTRY(ns, &reader->nsdef, struct ns, entry) {
599 if (ns->element)
600 break;
601 ns->element = element;
602 }
603 }
604
605 static HRESULT reader_push_element(xmlreader *reader, strval *prefix, strval *localname,
606 strval *qname, const struct reader_position *position)
607 {
608 struct element *element;
609 HRESULT hr;
610
611 element = reader_alloc_zero(reader, sizeof(*element));
612 if (!element)
613 return E_OUTOFMEMORY;
614
615 if ((hr = reader_strvaldup(reader, prefix, &element->prefix)) == S_OK &&
616 (hr = reader_strvaldup(reader, localname, &element->localname)) == S_OK &&
617 (hr = reader_strvaldup(reader, qname, &element->qname)) == S_OK)
618 {
619 list_add_head(&reader->elements, &element->entry);
620 reader_mark_ns_nodes(reader, element);
621 reader->is_empty_element = FALSE;
622 element->position = *position;
623 }
624 else
625 reader_free_element(reader, element);
626
627 return hr;
628 }
629
630 static void reader_pop_ns_nodes(xmlreader *reader, struct element *element)
631 {
632 struct ns *ns, *ns2;
633
634 LIST_FOR_EACH_ENTRY_SAFE_REV(ns, ns2, &reader->ns, struct ns, entry) {
635 if (ns->element != element)
636 break;
637
638 list_remove(&ns->entry);
639 reader_free_strvalued(reader, &ns->prefix);
640 reader_free_strvalued(reader, &ns->uri);
641 reader_free(reader, ns);
642 }
643
644 if (!list_empty(&reader->nsdef)) {
645 ns = LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
646 if (ns->element == element) {
647 list_remove(&ns->entry);
648 reader_free_strvalued(reader, &ns->prefix);
649 reader_free_strvalued(reader, &ns->uri);
650 reader_free(reader, ns);
651 }
652 }
653 }
654
655 static void reader_pop_element(xmlreader *reader)
656 {
657 struct element *element;
658
659 if (list_empty(&reader->elements))
660 return;
661
662 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
663 list_remove(&element->entry);
664
665 reader_pop_ns_nodes(reader, element);
666 reader_free_element(reader, element);
667
668 /* It was a root element, the rest is expected as Misc */
669 if (list_empty(&reader->elements))
670 reader->instate = XmlReadInState_MiscEnd;
671 }
672
673 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
674 means node value is to be determined. */
675 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value)
676 {
677 strval *v = &reader->strvalues[type];
678
679 reader_free_strvalue(reader, type);
680 if (!value)
681 {
682 v->str = NULL;
683 v->start = 0;
684 v->len = 0;
685 return;
686 }
687
688 if (value->str == strval_empty.str)
689 *v = *value;
690 else
691 {
692 if (type == StringValue_Value)
693 {
694 /* defer allocation for value string */
695 v->str = NULL;
696 v->start = value->start;
697 v->len = value->len;
698 }
699 else
700 {
701 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
702 memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR));
703 v->str[value->len] = 0;
704 v->len = value->len;
705 }
706 }
707 }
708
709 static inline int is_reader_pending(xmlreader *reader)
710 {
711 return reader->input->pending;
712 }
713
714 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
715 {
716 const int initial_len = 0x2000;
717 buffer->data = readerinput_alloc(input, initial_len);
718 if (!buffer->data) return E_OUTOFMEMORY;
719
720 memset(buffer->data, 0, 4);
721 buffer->cur = 0;
722 buffer->allocated = initial_len;
723 buffer->written = 0;
724 buffer->prev_cr = FALSE;
725
726 return S_OK;
727 }
728
729 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
730 {
731 readerinput_free(input, buffer->data);
732 }
733
734 HRESULT get_code_page(xml_encoding encoding, UINT *cp)
735 {
736 if (encoding == XmlEncoding_Unknown)
737 {
738 FIXME("unsupported encoding %d\n", encoding);
739 return E_NOTIMPL;
740 }
741
742 *cp = xml_encoding_map[encoding].cp;
743
744 return S_OK;
745 }
746
747 xml_encoding parse_encoding_name(const WCHAR *name, int len)
748 {
749 int min, max, n, c;
750
751 if (!name) return XmlEncoding_Unknown;
752
753 min = 0;
754 max = ARRAY_SIZE(xml_encoding_map) - 1;
755
756 while (min <= max)
757 {
758 n = (min+max)/2;
759
760 if (len != -1)
761 c = _wcsnicmp(xml_encoding_map[n].name, name, len);
762 else
763 c = wcsicmp(xml_encoding_map[n].name, name);
764 if (!c)
765 return xml_encoding_map[n].enc;
766
767 if (c > 0)
768 max = n-1;
769 else
770 min = n+1;
771 }
772
773 return XmlEncoding_Unknown;
774 }
775
776 static HRESULT alloc_input_buffer(xmlreaderinput *input)
777 {
778 input_buffer *buffer;
779 HRESULT hr;
780
781 input->buffer = NULL;
782
783 buffer = readerinput_alloc(input, sizeof(*buffer));
784 if (!buffer) return E_OUTOFMEMORY;
785
786 buffer->input = input;
787 buffer->code_page = ~0; /* code page is unknown at this point */
788 hr = init_encoded_buffer(input, &buffer->utf16);
789 if (hr != S_OK) {
790 readerinput_free(input, buffer);
791 return hr;
792 }
793
794 hr = init_encoded_buffer(input, &buffer->encoded);
795 if (hr != S_OK) {
796 free_encoded_buffer(input, &buffer->utf16);
797 readerinput_free(input, buffer);
798 return hr;
799 }
800
801 input->buffer = buffer;
802 return S_OK;
803 }
804
805 static void free_input_buffer(input_buffer *buffer)
806 {
807 free_encoded_buffer(buffer->input, &buffer->encoded);
808 free_encoded_buffer(buffer->input, &buffer->utf16);
809 readerinput_free(buffer->input, buffer);
810 }
811
812 static void readerinput_release_stream(xmlreaderinput *readerinput)
813 {
814 if (readerinput->stream) {
815 ISequentialStream_Release(readerinput->stream);
816 readerinput->stream = NULL;
817 }
818 }
819
820 /* Queries already stored interface for IStream/ISequentialStream.
821 Interface supplied on creation will be overwritten */
822 static inline HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
823 {
824 HRESULT hr;
825
826 readerinput_release_stream(readerinput);
827 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
828 if (hr != S_OK)
829 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
830
831 return hr;
832 }
833
834 /* reads a chunk to raw buffer */
835 static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
836 {
837 encoded_buffer *buffer = &readerinput->buffer->encoded;
838 /* to make sure aligned length won't exceed allocated length */
839 ULONG len = buffer->allocated - buffer->written - 4;
840 ULONG read;
841 HRESULT hr;
842
843 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
844 variable width encodings like UTF-8 */
845 len = (len + 3) & ~3;
846 /* try to use allocated space or grow */
847 if (buffer->allocated - buffer->written < len)
848 {
849 buffer->allocated *= 2;
850 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
851 len = buffer->allocated - buffer->written;
852 }
853
854 read = 0;
855 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
856 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer->written, buffer->allocated, len, read, hr);
857 readerinput->pending = hr == E_PENDING;
858 if (FAILED(hr)) return hr;
859 buffer->written += read;
860
861 return hr;
862 }
863
864 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
865 static void readerinput_grow(xmlreaderinput *readerinput, int length)
866 {
867 encoded_buffer *buffer = &readerinput->buffer->utf16;
868
869 length *= sizeof(WCHAR);
870 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
871 if (buffer->allocated < buffer->written + length + 4)
872 {
873 int grown_size = max(2*buffer->allocated, buffer->allocated + length);
874 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
875 buffer->allocated = grown_size;
876 }
877 }
878
879 static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput)
880 {
881 static const char startA[] = {'<','?'};
882 static const char commentA[] = {'<','!'};
883 encoded_buffer *buffer = &readerinput->buffer->encoded;
884 unsigned char *ptr = (unsigned char*)buffer->data;
885
886 return !memcmp(buffer->data, startA, sizeof(startA)) ||
887 !memcmp(buffer->data, commentA, sizeof(commentA)) ||
888 /* test start byte */
889 (ptr[0] == '<' &&
890 (
891 (ptr[1] && (ptr[1] <= 0x7f)) ||
892 (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
893 (buffer->data[1] >> 4) == 0xe || /* 3 bytes */
894 (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
895 );
896 }
897
898 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
899 {
900 encoded_buffer *buffer = &readerinput->buffer->encoded;
901 static const char utf8bom[] = {0xef,0xbb,0xbf};
902 static const char utf16lebom[] = {0xff,0xfe};
903 WCHAR *ptrW;
904
905 *enc = XmlEncoding_Unknown;
906
907 if (buffer->written <= 3)
908 {
909 HRESULT hr = readerinput_growraw(readerinput);
910 if (FAILED(hr)) return hr;
911 if (buffer->written < 3) return MX_E_INPUTEND;
912 }
913
914 ptrW = (WCHAR *)buffer->data;
915 /* try start symbols if we have enough data to do that, input buffer should contain
916 first chunk already */
917 if (readerinput_is_utf8(readerinput))
918 *enc = XmlEncoding_UTF8;
919 else if (*ptrW == '<')
920 {
921 ptrW++;
922 if (*ptrW == '?' || *ptrW == '!' || is_namestartchar(*ptrW))
923 *enc = XmlEncoding_UTF16;
924 }
925 /* try with BOM now */
926 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
927 {
928 buffer->cur += sizeof(utf8bom);
929 *enc = XmlEncoding_UTF8;
930 }
931 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
932 {
933 buffer->cur += sizeof(utf16lebom);
934 *enc = XmlEncoding_UTF16;
935 }
936
937 return S_OK;
938 }
939
940 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
941 {
942 encoded_buffer *buffer = &readerinput->buffer->encoded;
943 int len = buffer->written;
944
945 /* complete single byte char */
946 if (!(buffer->data[len-1] & 0x80)) return len;
947
948 /* find start byte of multibyte char */
949 while (--len && !(buffer->data[len] & 0xc0))
950 ;
951
952 return len;
953 }
954
955 /* Returns byte length of complete char sequence for buffer code page,
956 it's relative to current buffer position which is currently used for BOM handling
957 only. */
958 static int readerinput_get_convlen(xmlreaderinput *readerinput)
959 {
960 encoded_buffer *buffer = &readerinput->buffer->encoded;
961 int len;
962
963 if (readerinput->buffer->code_page == CP_UTF8)
964 len = readerinput_get_utf8_convlen(readerinput);
965 else
966 len = buffer->written;
967
968 TRACE("%d\n", len - buffer->cur);
969 return len - buffer->cur;
970 }
971
972 /* It's possible that raw buffer has some leftovers from last conversion - some char
973 sequence that doesn't represent a full code point. Length argument should be calculated with
974 readerinput_get_convlen(), if it's -1 it will be calculated here. */
975 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
976 {
977 encoded_buffer *buffer = &readerinput->buffer->encoded;
978
979 if (len == -1)
980 len = readerinput_get_convlen(readerinput);
981
982 memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len);
983 /* everything below cur is lost too */
984 buffer->written -= len + buffer->cur;
985 /* after this point we don't need cur offset really,
986 it's used only to mark where actual data begins when first chunk is read */
987 buffer->cur = 0;
988 }
989
990 static void fixup_buffer_cr(encoded_buffer *buffer, int off)
991 {
992 BOOL prev_cr = buffer->prev_cr;
993 const WCHAR *src;
994 WCHAR *dest;
995
996 src = dest = (WCHAR*)buffer->data + off;
997 while ((const char*)src < buffer->data + buffer->written)
998 {
999 if (*src == '\r')
1000 {
1001 *dest++ = '\n';
1002 src++;
1003 prev_cr = TRUE;
1004 continue;
1005 }
1006 if(prev_cr && *src == '\n')
1007 src++;
1008 else
1009 *dest++ = *src++;
1010 prev_cr = FALSE;
1011 }
1012
1013 buffer->written = (char*)dest - buffer->data;
1014 buffer->prev_cr = prev_cr;
1015 *dest = 0;
1016 }
1017
1018 /* note that raw buffer content is kept */
1019 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
1020 {
1021 encoded_buffer *src = &readerinput->buffer->encoded;
1022 encoded_buffer *dest = &readerinput->buffer->utf16;
1023 int len, dest_len;
1024 UINT cp = ~0u;
1025 HRESULT hr;
1026 WCHAR *ptr;
1027
1028 hr = get_code_page(enc, &cp);
1029 if (FAILED(hr)) return;
1030
1031 readerinput->buffer->code_page = cp;
1032 len = readerinput_get_convlen(readerinput);
1033
1034 TRACE("switching to cp %d\n", cp);
1035
1036 /* just copy in this case */
1037 if (enc == XmlEncoding_UTF16)
1038 {
1039 readerinput_grow(readerinput, len);
1040 memcpy(dest->data, src->data + src->cur, len);
1041 dest->written += len*sizeof(WCHAR);
1042 }
1043 else
1044 {
1045 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
1046 readerinput_grow(readerinput, dest_len);
1047 ptr = (WCHAR*)dest->data;
1048 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
1049 ptr[dest_len] = 0;
1050 dest->written += dest_len*sizeof(WCHAR);
1051 }
1052
1053 fixup_buffer_cr(dest, 0);
1054 }
1055
1056 /* shrinks parsed data a buffer begins with */
1057 static void reader_shrink(xmlreader *reader)
1058 {
1059 encoded_buffer *buffer = &reader->input->buffer->utf16;
1060
1061 /* avoid to move too often using threshold shrink length */
1062 if (buffer->cur*sizeof(WCHAR) > buffer->written / 2)
1063 {
1064 buffer->written -= buffer->cur*sizeof(WCHAR);
1065 memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written);
1066 buffer->cur = 0;
1067 *(WCHAR*)&buffer->data[buffer->written] = 0;
1068 }
1069 }
1070
1071 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
1072 It won't attempt to shrink but will grow destination buffer if needed */
1073 static HRESULT reader_more(xmlreader *reader)
1074 {
1075 xmlreaderinput *readerinput = reader->input;
1076 encoded_buffer *src = &readerinput->buffer->encoded;
1077 encoded_buffer *dest = &readerinput->buffer->utf16;
1078 UINT cp = readerinput->buffer->code_page;
1079 int len, dest_len, prev_len;
1080 HRESULT hr;
1081 WCHAR *ptr;
1082
1083 /* get some raw data from stream first */
1084 hr = readerinput_growraw(readerinput);
1085 len = readerinput_get_convlen(readerinput);
1086 prev_len = dest->written / sizeof(WCHAR);
1087
1088 /* just copy for UTF-16 case */
1089 if (cp == 1200)
1090 {
1091 readerinput_grow(readerinput, len);
1092 memcpy(dest->data + dest->written, src->data + src->cur, len);
1093 dest->written += len*sizeof(WCHAR);
1094 }
1095 else
1096 {
1097 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
1098 readerinput_grow(readerinput, dest_len);
1099 ptr = (WCHAR*)(dest->data + dest->written);
1100 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
1101 ptr[dest_len] = 0;
1102 dest->written += dest_len*sizeof(WCHAR);
1103 /* get rid of processed data */
1104 readerinput_shrinkraw(readerinput, len);
1105 }
1106
1107 fixup_buffer_cr(dest, prev_len);
1108 return hr;
1109 }
1110
1111 static inline UINT reader_get_cur(xmlreader *reader)
1112 {
1113 return reader->input->buffer->utf16.cur;
1114 }
1115
1116 static inline WCHAR *reader_get_ptr(xmlreader *reader)
1117 {
1118 encoded_buffer *buffer = &reader->input->buffer->utf16;
1119 WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur;
1120 if (!*ptr) reader_more(reader);
1121 return (WCHAR*)buffer->data + buffer->cur;
1122 }
1123
1124 static int reader_cmp(xmlreader *reader, const WCHAR *str)
1125 {
1126 int i=0;
1127 const WCHAR *ptr = reader_get_ptr(reader);
1128 while (str[i])
1129 {
1130 if (!ptr[i])
1131 {
1132 reader_more(reader);
1133 ptr = reader_get_ptr(reader);
1134 }
1135 if (str[i] != ptr[i])
1136 return ptr[i] - str[i];
1137 i++;
1138 }
1139 return 0;
1140 }
1141
1142 static void reader_update_position(xmlreader *reader, WCHAR ch)
1143 {
1144 if (ch == '\r')
1145 reader->position.line_position = 1;
1146 else if (ch == '\n')
1147 {
1148 reader->position.line_number++;
1149 reader->position.line_position = 1;
1150 }
1151 else
1152 reader->position.line_position++;
1153 }
1154
1155 /* moves cursor n WCHARs forward */
1156 static void reader_skipn(xmlreader *reader, int n)
1157 {
1158 encoded_buffer *buffer = &reader->input->buffer->utf16;
1159 const WCHAR *ptr;
1160
1161 while (*(ptr = reader_get_ptr(reader)) && n--)
1162 {
1163 reader_update_position(reader, *ptr);
1164 buffer->cur++;
1165 }
1166 }
1167
1168 static inline BOOL is_wchar_space(WCHAR ch)
1169 {
1170 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
1171 }
1172
1173 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
1174 static int reader_skipspaces(xmlreader *reader)
1175 {
1176 const WCHAR *ptr = reader_get_ptr(reader);
1177 UINT start = reader_get_cur(reader);
1178
1179 while (is_wchar_space(*ptr))
1180 {
1181 reader_skipn(reader, 1);
1182 ptr = reader_get_ptr(reader);
1183 }
1184
1185 return reader_get_cur(reader) - start;
1186 }
1187
1188 /* [26] VersionNum ::= '1.' [0-9]+ */
1189 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val)
1190 {
1191 static const WCHAR onedotW[] = {'1','.',0};
1192 WCHAR *ptr, *ptr2;
1193 UINT start;
1194
1195 if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL;
1196
1197 start = reader_get_cur(reader);
1198 /* skip "1." */
1199 reader_skipn(reader, 2);
1200
1201 ptr2 = ptr = reader_get_ptr(reader);
1202 while (*ptr >= '0' && *ptr <= '9')
1203 {
1204 reader_skipn(reader, 1);
1205 ptr = reader_get_ptr(reader);
1206 }
1207
1208 if (ptr2 == ptr) return WC_E_DIGIT;
1209 reader_init_strvalue(start, reader_get_cur(reader)-start, val);
1210 TRACE("version=%s\n", debug_strval(reader, val));
1211 return S_OK;
1212 }
1213
1214 /* [25] Eq ::= S? '=' S? */
1215 static HRESULT reader_parse_eq(xmlreader *reader)
1216 {
1217 static const WCHAR eqW[] = {'=',0};
1218 reader_skipspaces(reader);
1219 if (reader_cmp(reader, eqW)) return WC_E_EQUAL;
1220 /* skip '=' */
1221 reader_skipn(reader, 1);
1222 reader_skipspaces(reader);
1223 return S_OK;
1224 }
1225
1226 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1227 static HRESULT reader_parse_versioninfo(xmlreader *reader)
1228 {
1229 static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0};
1230 struct reader_position position;
1231 strval val, name;
1232 HRESULT hr;
1233
1234 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1235
1236 position = reader->position;
1237 if (reader_cmp(reader, versionW)) return WC_E_XMLDECL;
1238 reader_init_strvalue(reader_get_cur(reader), 7, &name);
1239 /* skip 'version' */
1240 reader_skipn(reader, 7);
1241
1242 hr = reader_parse_eq(reader);
1243 if (FAILED(hr)) return hr;
1244
1245 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1246 return WC_E_QUOTE;
1247 /* skip "'"|'"' */
1248 reader_skipn(reader, 1);
1249
1250 hr = reader_parse_versionnum(reader, &val);
1251 if (FAILED(hr)) return hr;
1252
1253 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1254 return WC_E_QUOTE;
1255
1256 /* skip "'"|'"' */
1257 reader_skipn(reader, 1);
1258
1259 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1260 }
1261
1262 /* ([A-Za-z0-9._] | '-') */
1263 static inline BOOL is_wchar_encname(WCHAR ch)
1264 {
1265 return ((ch >= 'A' && ch <= 'Z') ||
1266 (ch >= 'a' && ch <= 'z') ||
1267 (ch >= '0' && ch <= '9') ||
1268 (ch == '.') || (ch == '_') ||
1269 (ch == '-'));
1270 }
1271
1272 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1273 static HRESULT reader_parse_encname(xmlreader *reader, strval *val)
1274 {
1275 WCHAR *start = reader_get_ptr(reader), *ptr;
1276 xml_encoding enc;
1277 int len;
1278
1279 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
1280 return WC_E_ENCNAME;
1281
1282 val->start = reader_get_cur(reader);
1283
1284 ptr = start;
1285 while (is_wchar_encname(*++ptr))
1286 ;
1287
1288 len = ptr - start;
1289 enc = parse_encoding_name(start, len);
1290 TRACE("encoding name %s\n", debugstr_wn(start, len));
1291 val->str = start;
1292 val->len = len;
1293
1294 if (enc == XmlEncoding_Unknown)
1295 return WC_E_ENCNAME;
1296
1297 /* skip encoding name */
1298 reader_skipn(reader, len);
1299 return S_OK;
1300 }
1301
1302 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1303 static HRESULT reader_parse_encdecl(xmlreader *reader)
1304 {
1305 static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0};
1306 struct reader_position position;
1307 strval name, val;
1308 HRESULT hr;
1309
1310 if (!reader_skipspaces(reader)) return S_FALSE;
1311
1312 position = reader->position;
1313 if (reader_cmp(reader, encodingW)) return S_FALSE;
1314 name.str = reader_get_ptr(reader);
1315 name.start = reader_get_cur(reader);
1316 name.len = 8;
1317 /* skip 'encoding' */
1318 reader_skipn(reader, 8);
1319
1320 hr = reader_parse_eq(reader);
1321 if (FAILED(hr)) return hr;
1322
1323 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1324 return WC_E_QUOTE;
1325 /* skip "'"|'"' */
1326 reader_skipn(reader, 1);
1327
1328 hr = reader_parse_encname(reader, &val);
1329 if (FAILED(hr)) return hr;
1330
1331 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1332 return WC_E_QUOTE;
1333
1334 /* skip "'"|'"' */
1335 reader_skipn(reader, 1);
1336
1337 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1338 }
1339
1340 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1341 static HRESULT reader_parse_sddecl(xmlreader *reader)
1342 {
1343 static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0};
1344 static const WCHAR yesW[] = {'y','e','s',0};
1345 static const WCHAR noW[] = {'n','o',0};
1346 struct reader_position position;
1347 strval name, val;
1348 UINT start;
1349 HRESULT hr;
1350
1351 if (!reader_skipspaces(reader)) return S_FALSE;
1352
1353 position = reader->position;
1354 if (reader_cmp(reader, standaloneW)) return S_FALSE;
1355 reader_init_strvalue(reader_get_cur(reader), 10, &name);
1356 /* skip 'standalone' */
1357 reader_skipn(reader, 10);
1358
1359 hr = reader_parse_eq(reader);
1360 if (FAILED(hr)) return hr;
1361
1362 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1363 return WC_E_QUOTE;
1364 /* skip "'"|'"' */
1365 reader_skipn(reader, 1);
1366
1367 if (reader_cmp(reader, yesW) && reader_cmp(reader, noW))
1368 return WC_E_XMLDECL;
1369
1370 start = reader_get_cur(reader);
1371 /* skip 'yes'|'no' */
1372 reader_skipn(reader, reader_cmp(reader, yesW) ? 2 : 3);
1373 reader_init_strvalue(start, reader_get_cur(reader)-start, &val);
1374 TRACE("standalone=%s\n", debug_strval(reader, &val));
1375
1376 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1377 return WC_E_QUOTE;
1378 /* skip "'"|'"' */
1379 reader_skipn(reader, 1);
1380
1381 return reader_add_attr(reader, NULL, &name, NULL, &val, &position, 0);
1382 }
1383
1384 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1385 static HRESULT reader_parse_xmldecl(xmlreader *reader)
1386 {
1387 static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0};
1388 static const WCHAR declcloseW[] = {'?','>',0};
1389 struct reader_position position;
1390 HRESULT hr;
1391
1392 /* check if we have "<?xml " */
1393 if (reader_cmp(reader, xmldeclW))
1394 return S_FALSE;
1395
1396 reader_skipn(reader, 2);
1397 position = reader->position;
1398 reader_skipn(reader, 3);
1399 hr = reader_parse_versioninfo(reader);
1400 if (FAILED(hr))
1401 return hr;
1402
1403 hr = reader_parse_encdecl(reader);
1404 if (FAILED(hr))
1405 return hr;
1406
1407 hr = reader_parse_sddecl(reader);
1408 if (FAILED(hr))
1409 return hr;
1410
1411 reader_skipspaces(reader);
1412 if (reader_cmp(reader, declcloseW))
1413 return WC_E_XMLDECL;
1414
1415 /* skip '?>' */
1416 reader_skipn(reader, 2);
1417
1418 reader->nodetype = XmlNodeType_XmlDeclaration;
1419 reader->empty_element.position = position;
1420 reader_set_strvalue(reader, StringValue_LocalName, &strval_xml);
1421 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_xml);
1422
1423 return S_OK;
1424 }
1425
1426 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1427 static HRESULT reader_parse_comment(xmlreader *reader)
1428 {
1429 WCHAR *ptr;
1430 UINT start;
1431
1432 if (reader->resumestate == XmlReadResumeState_Comment)
1433 {
1434 start = reader->resume[XmlReadResume_Body];
1435 ptr = reader_get_ptr(reader);
1436 }
1437 else
1438 {
1439 /* skip '<!--' */
1440 reader_skipn(reader, 4);
1441 reader_shrink(reader);
1442 ptr = reader_get_ptr(reader);
1443 start = reader_get_cur(reader);
1444 reader->nodetype = XmlNodeType_Comment;
1445 reader->resume[XmlReadResume_Body] = start;
1446 reader->resumestate = XmlReadResumeState_Comment;
1447 reader_set_strvalue(reader, StringValue_Value, NULL);
1448 }
1449
1450 /* will exit when there's no more data, it won't attempt to
1451 read more from stream */
1452 while (*ptr)
1453 {
1454 if (ptr[0] == '-')
1455 {
1456 if (ptr[1] == '-')
1457 {
1458 if (ptr[2] == '>')
1459 {
1460 strval value;
1461
1462 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1463 TRACE("%s\n", debug_strval(reader, &value));
1464
1465 /* skip rest of markup '->' */
1466 reader_skipn(reader, 3);
1467
1468 reader_set_strvalue(reader, StringValue_Value, &value);
1469 reader->resume[XmlReadResume_Body] = 0;
1470 reader->resumestate = XmlReadResumeState_Initial;
1471 return S_OK;
1472 }
1473 else
1474 return WC_E_COMMENT;
1475 }
1476 }
1477
1478 reader_skipn(reader, 1);
1479 ptr++;
1480 }
1481
1482 return S_OK;
1483 }
1484
1485 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1486 static inline BOOL is_char(WCHAR ch)
1487 {
1488 return (ch == '\t') || (ch == '\r') || (ch == '\n') ||
1489 (ch >= 0x20 && ch <= 0xd7ff) ||
1490 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1491 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1492 (ch >= 0xe000 && ch <= 0xfffd);
1493 }
1494
1495 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1496 BOOL is_pubchar(WCHAR ch)
1497 {
1498 return (ch == ' ') ||
1499 (ch >= 'a' && ch <= 'z') ||
1500 (ch >= 'A' && ch <= 'Z') ||
1501 (ch >= '0' && ch <= '9') ||
1502 (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
1503 (ch == '=') || (ch == '?') ||
1504 (ch == '@') || (ch == '!') ||
1505 (ch >= '#' && ch <= '%') || /* #$% */
1506 (ch == '_') || (ch == '\r') || (ch == '\n');
1507 }
1508
1509 BOOL is_namestartchar(WCHAR ch)
1510 {
1511 return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
1512 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1513 (ch >= 0xc0 && ch <= 0xd6) ||
1514 (ch >= 0xd8 && ch <= 0xf6) ||
1515 (ch >= 0xf8 && ch <= 0x2ff) ||
1516 (ch >= 0x370 && ch <= 0x37d) ||
1517 (ch >= 0x37f && ch <= 0x1fff) ||
1518 (ch >= 0x200c && ch <= 0x200d) ||
1519 (ch >= 0x2070 && ch <= 0x218f) ||
1520 (ch >= 0x2c00 && ch <= 0x2fef) ||
1521 (ch >= 0x3001 && ch <= 0xd7ff) ||
1522 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1523 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1524 (ch >= 0xf900 && ch <= 0xfdcf) ||
1525 (ch >= 0xfdf0 && ch <= 0xfffd);
1526 }
1527
1528 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1529 BOOL is_ncnamechar(WCHAR ch)
1530 {
1531 return (ch >= 'A' && ch <= 'Z') ||
1532 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1533 (ch == '-') || (ch == '.') ||
1534 (ch >= '0' && ch <= '9') ||
1535 (ch == 0xb7) ||
1536 (ch >= 0xc0 && ch <= 0xd6) ||
1537 (ch >= 0xd8 && ch <= 0xf6) ||
1538 (ch >= 0xf8 && ch <= 0x2ff) ||
1539 (ch >= 0x300 && ch <= 0x36f) ||
1540 (ch >= 0x370 && ch <= 0x37d) ||
1541 (ch >= 0x37f && ch <= 0x1fff) ||
1542 (ch >= 0x200c && ch <= 0x200d) ||
1543 (ch >= 0x203f && ch <= 0x2040) ||
1544 (ch >= 0x2070 && ch <= 0x218f) ||
1545 (ch >= 0x2c00 && ch <= 0x2fef) ||
1546 (ch >= 0x3001 && ch <= 0xd7ff) ||
1547 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1548 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1549 (ch >= 0xf900 && ch <= 0xfdcf) ||
1550 (ch >= 0xfdf0 && ch <= 0xfffd);
1551 }
1552
1553 BOOL is_namechar(WCHAR ch)
1554 {
1555 return (ch == ':') || is_ncnamechar(ch);
1556 }
1557
1558 static XmlNodeType reader_get_nodetype(const xmlreader *reader)
1559 {
1560 /* When we're on attribute always return attribute type, container node type is kept.
1561 Note that container is not necessarily an element, and attribute doesn't mean it's
1562 an attribute in XML spec terms. */
1563 return reader->attr ? XmlNodeType_Attribute : reader->nodetype;
1564 }
1565
1566 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1567 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1568 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1569 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1570 [5] Name ::= NameStartChar (NameChar)* */
1571 static HRESULT reader_parse_name(xmlreader *reader, strval *name)
1572 {
1573 WCHAR *ptr;
1574 UINT start;
1575
1576 if (reader->resume[XmlReadResume_Name])
1577 {
1578 start = reader->resume[XmlReadResume_Name];
1579 ptr = reader_get_ptr(reader);
1580 }
1581 else
1582 {
1583 ptr = reader_get_ptr(reader);
1584 start = reader_get_cur(reader);
1585 if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
1586 }
1587
1588 while (is_namechar(*ptr))
1589 {
1590 reader_skipn(reader, 1);
1591 ptr = reader_get_ptr(reader);
1592 }
1593
1594 if (is_reader_pending(reader))
1595 {
1596 reader->resume[XmlReadResume_Name] = start;
1597 return E_PENDING;
1598 }
1599 else
1600 reader->resume[XmlReadResume_Name] = 0;
1601
1602 reader_init_strvalue(start, reader_get_cur(reader)-start, name);
1603 TRACE("name %s:%d\n", debug_strval(reader, name), name->len);
1604
1605 return S_OK;
1606 }
1607
1608 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1609 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
1610 {
1611 static const WCHAR xmlW[] = {'x','m','l'};
1612 static const strval xmlval = { (WCHAR*)xmlW, 3 };
1613 strval name;
1614 WCHAR *ptr;
1615 HRESULT hr;
1616 UINT i;
1617
1618 hr = reader_parse_name(reader, &name);
1619 if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI;
1620
1621 /* now that we got name check for illegal content */
1622 if (strval_eq(reader, &name, &xmlval))
1623 return WC_E_LEADINGXML;
1624
1625 /* PITarget can't be a qualified name */
1626 ptr = reader_get_strptr(reader, &name);
1627 for (i = 0; i < name.len; i++)
1628 if (ptr[i] == ':')
1629 return i ? NC_E_NAMECOLON : WC_E_PI;
1630
1631 TRACE("pitarget %s:%d\n", debug_strval(reader, &name), name.len);
1632 *target = name;
1633 return S_OK;
1634 }
1635
1636 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1637 static HRESULT reader_parse_pi(xmlreader *reader)
1638 {
1639 strval target;
1640 WCHAR *ptr;
1641 UINT start;
1642 HRESULT hr;
1643
1644 switch (reader->resumestate)
1645 {
1646 case XmlReadResumeState_Initial:
1647 /* skip '<?' */
1648 reader_skipn(reader, 2);
1649 reader_shrink(reader);
1650 reader->resumestate = XmlReadResumeState_PITarget;
1651 case XmlReadResumeState_PITarget:
1652 hr = reader_parse_pitarget(reader, &target);
1653 if (FAILED(hr)) return hr;
1654 reader_set_strvalue(reader, StringValue_LocalName, &target);
1655 reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1656 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1657 reader->resumestate = XmlReadResumeState_PIBody;
1658 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1659 default:
1660 ;
1661 }
1662
1663 start = reader->resume[XmlReadResume_Body];
1664 ptr = reader_get_ptr(reader);
1665 while (*ptr)
1666 {
1667 if (ptr[0] == '?')
1668 {
1669 if (ptr[1] == '>')
1670 {
1671 UINT cur = reader_get_cur(reader);
1672 strval value;
1673
1674 /* strip all leading whitespace chars */
1675 while (start < cur)
1676 {
1677 ptr = reader_get_ptr2(reader, start);
1678 if (!is_wchar_space(*ptr)) break;
1679 start++;
1680 }
1681
1682 reader_init_strvalue(start, cur-start, &value);
1683
1684 /* skip '?>' */
1685 reader_skipn(reader, 2);
1686 TRACE("%s\n", debug_strval(reader, &value));
1687 reader->nodetype = XmlNodeType_ProcessingInstruction;
1688 reader->resumestate = XmlReadResumeState_Initial;
1689 reader->resume[XmlReadResume_Body] = 0;
1690 reader_set_strvalue(reader, StringValue_Value, &value);
1691 return S_OK;
1692 }
1693 }
1694
1695 reader_skipn(reader, 1);
1696 ptr = reader_get_ptr(reader);
1697 }
1698
1699 return S_OK;
1700 }
1701
1702 /* This one is used to parse significant whitespace nodes, like in Misc production */
1703 static HRESULT reader_parse_whitespace(xmlreader *reader)
1704 {
1705 switch (reader->resumestate)
1706 {
1707 case XmlReadResumeState_Initial:
1708 reader_shrink(reader);
1709 reader->resumestate = XmlReadResumeState_Whitespace;
1710 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1711 reader->nodetype = XmlNodeType_Whitespace;
1712 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1713 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1714 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1715 /* fallthrough */
1716 case XmlReadResumeState_Whitespace:
1717 {
1718 strval value;
1719 UINT start;
1720
1721 reader_skipspaces(reader);
1722 if (is_reader_pending(reader)) return S_OK;
1723
1724 start = reader->resume[XmlReadResume_Body];
1725 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1726 reader_set_strvalue(reader, StringValue_Value, &value);
1727 TRACE("%s\n", debug_strval(reader, &value));
1728 reader->resumestate = XmlReadResumeState_Initial;
1729 }
1730 default:
1731 ;
1732 }
1733
1734 return S_OK;
1735 }
1736
1737 /* [27] Misc ::= Comment | PI | S */
1738 static HRESULT reader_parse_misc(xmlreader *reader)
1739 {
1740 HRESULT hr = S_FALSE;
1741
1742 if (reader->resumestate != XmlReadResumeState_Initial)
1743 {
1744 hr = reader_more(reader);
1745 if (FAILED(hr)) return hr;
1746
1747 /* finish current node */
1748 switch (reader->resumestate)
1749 {
1750 case XmlReadResumeState_PITarget:
1751 case XmlReadResumeState_PIBody:
1752 return reader_parse_pi(reader);
1753 case XmlReadResumeState_Comment:
1754 return reader_parse_comment(reader);
1755 case XmlReadResumeState_Whitespace:
1756 return reader_parse_whitespace(reader);
1757 default:
1758 ERR("unknown resume state %d\n", reader->resumestate);
1759 }
1760 }
1761
1762 while (1)
1763 {
1764 const WCHAR *cur = reader_get_ptr(reader);
1765
1766 if (is_wchar_space(*cur))
1767 hr = reader_parse_whitespace(reader);
1768 else if (!reader_cmp(reader, commentW))
1769 hr = reader_parse_comment(reader);
1770 else if (!reader_cmp(reader, piW))
1771 hr = reader_parse_pi(reader);
1772 else
1773 break;
1774
1775 if (hr != S_FALSE) return hr;
1776 }
1777
1778 return hr;
1779 }
1780
1781 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1782 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal)
1783 {
1784 WCHAR *cur = reader_get_ptr(reader), quote;
1785 UINT start;
1786
1787 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1788
1789 quote = *cur;
1790 reader_skipn(reader, 1);
1791
1792 cur = reader_get_ptr(reader);
1793 start = reader_get_cur(reader);
1794 while (is_char(*cur) && *cur != quote)
1795 {
1796 reader_skipn(reader, 1);
1797 cur = reader_get_ptr(reader);
1798 }
1799 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1800 if (*cur == quote) reader_skipn(reader, 1);
1801
1802 TRACE("%s\n", debug_strval(reader, literal));
1803 return S_OK;
1804 }
1805
1806 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1807 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1808 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal)
1809 {
1810 WCHAR *cur = reader_get_ptr(reader), quote;
1811 UINT start;
1812
1813 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1814
1815 quote = *cur;
1816 reader_skipn(reader, 1);
1817
1818 start = reader_get_cur(reader);
1819 cur = reader_get_ptr(reader);
1820 while (is_pubchar(*cur) && *cur != quote)
1821 {
1822 reader_skipn(reader, 1);
1823 cur = reader_get_ptr(reader);
1824 }
1825 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1826 if (*cur == quote) reader_skipn(reader, 1);
1827
1828 TRACE("%s\n", debug_strval(reader, literal));
1829 return S_OK;
1830 }
1831
1832 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1833 static HRESULT reader_parse_externalid(xmlreader *reader)
1834 {
1835 static WCHAR systemW[] = {'S','Y','S','T','E','M',0};
1836 static WCHAR publicW[] = {'P','U','B','L','I','C',0};
1837 struct reader_position position = reader->position;
1838 strval name, sys;
1839 HRESULT hr;
1840 int cnt;
1841
1842 if (!reader_cmp(reader, publicW)) {
1843 strval pub;
1844
1845 /* public id */
1846 reader_skipn(reader, 6);
1847 cnt = reader_skipspaces(reader);
1848 if (!cnt) return WC_E_WHITESPACE;
1849
1850 hr = reader_parse_pub_literal(reader, &pub);
1851 if (FAILED(hr)) return hr;
1852
1853 reader_init_cstrvalue(publicW, lstrlenW(publicW), &name);
1854 hr = reader_add_attr(reader, NULL, &name, NULL, &pub, &position, 0);
1855 if (FAILED(hr)) return hr;
1856
1857 cnt = reader_skipspaces(reader);
1858 if (!cnt) return S_OK;
1859
1860 /* optional system id */
1861 hr = reader_parse_sys_literal(reader, &sys);
1862 if (FAILED(hr)) return S_OK;
1863
1864 reader_init_cstrvalue(systemW, lstrlenW(systemW), &name);
1865 hr = reader_add_attr(reader, NULL, &name, NULL, &sys, &position, 0);
1866 if (FAILED(hr)) return hr;
1867
1868 return S_OK;
1869 } else if (!reader_cmp(reader, systemW)) {
1870 /* system id */
1871 reader_skipn(reader, 6);
1872 cnt = reader_skipspaces(reader);
1873 if (!cnt) return WC_E_WHITESPACE;
1874
1875 hr = reader_parse_sys_literal(reader, &sys);
1876 if (FAILED(hr)) return hr;
1877
1878 reader_init_cstrvalue(systemW, lstrlenW(systemW), &name);
1879 return reader_add_attr(reader, NULL, &name, NULL, &sys, &position, 0);
1880 }
1881
1882 return S_FALSE;
1883 }
1884
1885 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1886 static HRESULT reader_parse_dtd(xmlreader *reader)
1887 {
1888 static const WCHAR doctypeW[] = {'<','!','D','O','C','T','Y','P','E',0};
1889 strval name;
1890 WCHAR *cur;
1891 HRESULT hr;
1892
1893 /* check if we have "<!DOCTYPE" */
1894 if (reader_cmp(reader, doctypeW)) return S_FALSE;
1895 reader_shrink(reader);
1896
1897 /* DTD processing is not allowed by default */
1898 if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
1899
1900 reader_skipn(reader, 9);
1901 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1902
1903 /* name */
1904 hr = reader_parse_name(reader, &name);
1905 if (FAILED(hr)) return WC_E_DECLDOCTYPE;
1906
1907 reader_skipspaces(reader);
1908
1909 hr = reader_parse_externalid(reader);
1910 if (FAILED(hr)) return hr;
1911
1912 reader_skipspaces(reader);
1913
1914 cur = reader_get_ptr(reader);
1915 if (*cur != '>')
1916 {
1917 FIXME("internal subset parsing not implemented\n");
1918 return E_NOTIMPL;
1919 }
1920
1921 /* skip '>' */
1922 reader_skipn(reader, 1);
1923
1924 reader->nodetype = XmlNodeType_DocumentType;
1925 reader_set_strvalue(reader, StringValue_LocalName, &name);
1926 reader_set_strvalue(reader, StringValue_QualifiedName, &name);
1927
1928 return S_OK;
1929 }
1930
1931 /* [11 NS] LocalPart ::= NCName */
1932 static HRESULT reader_parse_local(xmlreader *reader, strval *local, BOOL check_for_separator)
1933 {
1934 WCHAR *ptr;
1935 UINT start;
1936
1937 if (reader->resume[XmlReadResume_Local])
1938 {
1939 start = reader->resume[XmlReadResume_Local];
1940 ptr = reader_get_ptr(reader);
1941 }
1942 else
1943 {
1944 ptr = reader_get_ptr(reader);
1945 start = reader_get_cur(reader);
1946 }
1947
1948 while (is_ncnamechar(*ptr))
1949 {
1950 reader_skipn(reader, 1);
1951 ptr = reader_get_ptr(reader);
1952 }
1953
1954 if (check_for_separator && *ptr == ':')
1955 return NC_E_QNAMECOLON;
1956
1957 if (is_reader_pending(reader))
1958 {
1959 reader->resume[XmlReadResume_Local] = start;
1960 return E_PENDING;
1961 }
1962 else
1963 reader->resume[XmlReadResume_Local] = 0;
1964
1965 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1966
1967 return S_OK;
1968 }
1969
1970 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1971 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1972 [9 NS] UnprefixedName ::= LocalPart
1973 [10 NS] Prefix ::= NCName */
1974 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
1975 {
1976 WCHAR *ptr;
1977 UINT start;
1978 HRESULT hr;
1979
1980 if (reader->resume[XmlReadResume_Name])
1981 {
1982 start = reader->resume[XmlReadResume_Name];
1983 ptr = reader_get_ptr(reader);
1984 }
1985 else
1986 {
1987 ptr = reader_get_ptr(reader);
1988 start = reader_get_cur(reader);
1989 reader->resume[XmlReadResume_Name] = start;
1990 if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
1991 }
1992
1993 if (reader->resume[XmlReadResume_Local])
1994 {
1995 hr = reader_parse_local(reader, local, FALSE);
1996 if (FAILED(hr)) return hr;
1997
1998 reader_init_strvalue(reader->resume[XmlReadResume_Name],
1999 local->start - reader->resume[XmlReadResume_Name] - 1,
2000 prefix);
2001 }
2002 else
2003 {
2004 /* skip prefix part */
2005 while (is_ncnamechar(*ptr))
2006 {
2007 reader_skipn(reader, 1);
2008 ptr = reader_get_ptr(reader);
2009 }
2010
2011 if (is_reader_pending(reader)) return E_PENDING;
2012
2013 /* got a qualified name */
2014 if (*ptr == ':')
2015 {
2016 reader_init_strvalue(start, reader_get_cur(reader)-start, prefix);
2017
2018 /* skip ':' */
2019 reader_skipn(reader, 1);
2020 hr = reader_parse_local(reader, local, TRUE);
2021 if (FAILED(hr)) return hr;
2022 }
2023 else
2024 {
2025 reader_init_strvalue(reader->resume[XmlReadResume_Name], reader_get_cur(reader)-reader->resume[XmlReadResume_Name], local);
2026 reader_init_strvalue(0, 0, prefix);
2027 }
2028 }
2029
2030 if (prefix->len)
2031 TRACE("qname %s:%s\n", debug_strval(reader, prefix), debug_strval(reader, local));
2032 else
2033 TRACE("ncname %s\n", debug_strval(reader, local));
2034
2035 reader_init_strvalue(prefix->len ? prefix->start : local->start,
2036 /* count ':' too */
2037 (prefix->len ? prefix->len + 1 : 0) + local->len,
2038 qname);
2039
2040 reader->resume[XmlReadResume_Name] = 0;
2041 reader->resume[XmlReadResume_Local] = 0;
2042
2043 return S_OK;
2044 }
2045
2046 static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name)
2047 {
2048 static const WCHAR entltW[] = {'l','t'};
2049 static const WCHAR entgtW[] = {'g','t'};
2050 static const WCHAR entampW[] = {'a','m','p'};
2051 static const WCHAR entaposW[] = {'a','p','o','s'};
2052 static const WCHAR entquotW[] = {'q','u','o','t'};
2053 static const strval lt = { (WCHAR*)entltW, 2 };
2054 static const strval gt = { (WCHAR*)entgtW, 2 };
2055 static const strval amp = { (WCHAR*)entampW, 3 };
2056 static const strval apos = { (WCHAR*)entaposW, 4 };
2057 static const strval quot = { (WCHAR*)entquotW, 4 };
2058 WCHAR *str = reader_get_strptr(reader, name);
2059
2060 switch (*str)
2061 {
2062 case 'l':
2063 if (strval_eq(reader, name, &lt)) return '<';
2064 break;
2065 case 'g':
2066 if (strval_eq(reader, name, &gt)) return '>';
2067 break;
2068 case 'a':
2069 if (strval_eq(reader, name, &amp))
2070 return '&';
2071 else if (strval_eq(reader, name, &apos))
2072 return '\'';
2073 break;
2074 case 'q':
2075 if (strval_eq(reader, name, &quot)) return '\"';
2076 break;
2077 default:
2078 ;
2079 }
2080
2081 return 0;
2082 }
2083
2084 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
2085 [67] Reference ::= EntityRef | CharRef
2086 [68] EntityRef ::= '&' Name ';' */
2087 static HRESULT reader_parse_reference(xmlreader *reader)
2088 {
2089 encoded_buffer *buffer = &reader->input->buffer->utf16;
2090 WCHAR *start = reader_get_ptr(reader), *ptr;
2091 UINT cur = reader_get_cur(reader);
2092 WCHAR ch = 0;
2093 int len;
2094
2095 /* skip '&' */
2096 reader_skipn(reader, 1);
2097 ptr = reader_get_ptr(reader);
2098
2099 if (*ptr == '#')
2100 {
2101 reader_skipn(reader, 1);
2102 ptr = reader_get_ptr(reader);
2103
2104 /* hex char or decimal */
2105 if (*ptr == 'x')
2106 {
2107 reader_skipn(reader, 1);
2108 ptr = reader_get_ptr(reader);
2109
2110 while (*ptr != ';')
2111 {
2112 if ((*ptr >= '0' && *ptr <= '9'))
2113 ch = ch*16 + *ptr - '0';
2114 else if ((*ptr >= 'a' && *ptr <= 'f'))
2115 ch = ch*16 + *ptr - 'a' + 10;
2116 else if ((*ptr >= 'A' && *ptr <= 'F'))
2117 ch = ch*16 + *ptr - 'A' + 10;
2118 else
2119 return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
2120 reader_skipn(reader, 1);
2121 ptr = reader_get_ptr(reader);
2122 }
2123 }
2124 else
2125 {
2126 while (*ptr != ';')
2127 {
2128 if ((*ptr >= '0' && *ptr <= '9'))
2129 {
2130 ch = ch*10 + *ptr - '0';
2131 reader_skipn(reader, 1);
2132 ptr = reader_get_ptr(reader);
2133 }
2134 else
2135 return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
2136 }
2137 }
2138
2139 if (!is_char(ch)) return WC_E_XMLCHARACTER;
2140
2141 /* normalize */
2142 if (is_wchar_space(ch)) ch = ' ';
2143
2144 ptr = reader_get_ptr(reader);
2145 start = reader_get_ptr2(reader, cur);
2146 len = buffer->written - ((char *)ptr - buffer->data);
2147 memmove(start + 1, ptr + 1, len);
2148
2149 buffer->written -= (reader_get_cur(reader) - cur) * sizeof(WCHAR);
2150 buffer->cur = cur + 1;
2151
2152 *start = ch;
2153 }
2154 else
2155 {
2156 strval name;
2157 HRESULT hr;
2158
2159 hr = reader_parse_name(reader, &name);
2160 if (FAILED(hr)) return hr;
2161
2162 ptr = reader_get_ptr(reader);
2163 if (*ptr != ';') return WC_E_SEMICOLON;
2164
2165 /* predefined entities resolve to a single character */
2166 ch = get_predefined_entity(reader, &name);
2167 if (ch)
2168 {
2169 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
2170 memmove(start+1, ptr+1, len);
2171 buffer->cur = cur + 1;
2172 buffer->written -= (ptr - start) * sizeof(WCHAR);
2173
2174 *start = ch;
2175 }
2176 else
2177 {
2178 FIXME("undeclared entity %s\n", debug_strval(reader, &name));
2179 return WC_E_UNDECLAREDENTITY;
2180 }
2181
2182 }
2183
2184 return S_OK;
2185 }
2186
2187 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
2188 static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
2189 {
2190 WCHAR *ptr, quote;
2191 UINT start;
2192
2193 ptr = reader_get_ptr(reader);
2194
2195 /* skip opening quote */
2196 quote = *ptr;
2197 if (quote != '\"' && quote != '\'') return WC_E_QUOTE;
2198 reader_skipn(reader, 1);
2199
2200 ptr = reader_get_ptr(reader);
2201 start = reader_get_cur(reader);
2202 while (*ptr)
2203 {
2204 if (*ptr == '<') return WC_E_LESSTHAN;
2205
2206 if (*ptr == quote)
2207 {
2208 reader_init_strvalue(start, reader_get_cur(reader)-start, value);
2209 /* skip closing quote */
2210 reader_skipn(reader, 1);
2211 return S_OK;
2212 }
2213
2214 if (*ptr == '&')
2215 {
2216 HRESULT hr = reader_parse_reference(reader);
2217 if (FAILED(hr)) return hr;
2218 }
2219 else
2220 {
2221 /* replace all whitespace chars with ' ' */
2222 if (is_wchar_space(*ptr)) *ptr = ' ';
2223 reader_skipn(reader, 1);
2224 }
2225 ptr = reader_get_ptr(reader);
2226 }
2227
2228 return WC_E_QUOTE;
2229 }
2230
2231 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2232 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2233 [3 NS] DefaultAttName ::= 'xmlns'
2234 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2235 static HRESULT reader_parse_attribute(xmlreader *reader)
2236 {
2237 struct reader_position position = reader->position;
2238 strval prefix, local, qname, value;
2239 enum attribute_flags flags = 0;
2240 HRESULT hr;
2241
2242 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2243 if (FAILED(hr)) return hr;
2244
2245 if (strval_eq(reader, &prefix, &strval_xmlns))
2246 flags |= ATTRIBUTE_NS_DEFINITION;
2247
2248 if (strval_eq(reader, &qname, &strval_xmlns))
2249 flags |= ATTRIBUTE_DEFAULT_NS_DEFINITION;
2250
2251 hr = reader_parse_eq(reader);
2252 if (FAILED(hr)) return hr;
2253
2254 hr = reader_parse_attvalue(reader, &value);
2255 if (FAILED(hr)) return hr;
2256
2257 if (flags & (ATTRIBUTE_NS_DEFINITION | ATTRIBUTE_DEFAULT_NS_DEFINITION))
2258 reader_push_ns(reader, &local, &value, !!(flags & ATTRIBUTE_DEFAULT_NS_DEFINITION));
2259
2260 TRACE("%s=%s\n", debug_strval(reader, &local), debug_strval(reader, &value));
2261 return reader_add_attr(reader, &prefix, &local, &qname, &value, &position, flags);
2262 }
2263
2264 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2265 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2266 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname)
2267 {
2268 struct reader_position position = reader->position;
2269 HRESULT hr;
2270
2271 hr = reader_parse_qname(reader, prefix, local, qname);
2272 if (FAILED(hr)) return hr;
2273
2274 for (;;)
2275 {
2276 static const WCHAR endW[] = {'/','>',0};
2277
2278 reader_skipspaces(reader);
2279
2280 /* empty element */
2281 if ((reader->is_empty_element = !reader_cmp(reader, endW)))
2282 {
2283 struct element *element = &reader->empty_element;
2284
2285 /* skip '/>' */
2286 reader_skipn(reader, 2);
2287
2288 reader_free_strvalued(reader, &element->qname);
2289 reader_free_strvalued(reader, &element->localname);
2290
2291 element->prefix = *prefix;
2292 reader_strvaldup(reader, qname, &element->qname);
2293 reader_strvaldup(reader, local, &element->localname);
2294 element->position = position;
2295 reader_mark_ns_nodes(reader, element);
2296 return S_OK;
2297 }
2298
2299 /* got a start tag */
2300 if (!reader_cmp(reader, gtW))
2301 {
2302 /* skip '>' */
2303 reader_skipn(reader, 1);
2304 return reader_push_element(reader, prefix, local, qname, &position);
2305 }
2306
2307 hr = reader_parse_attribute(reader);
2308 if (FAILED(hr)) return hr;
2309 }
2310
2311 return S_OK;
2312 }
2313
2314 /* [39] element ::= EmptyElemTag | STag content ETag */
2315 static HRESULT reader_parse_element(xmlreader *reader)
2316 {
2317 HRESULT hr;
2318
2319 switch (reader->resumestate)
2320 {
2321 case XmlReadResumeState_Initial:
2322 /* check if we are really on element */
2323 if (reader_cmp(reader, ltW)) return S_FALSE;
2324
2325 /* skip '<' */
2326 reader_skipn(reader, 1);
2327
2328 reader_shrink(reader);
2329 reader->resumestate = XmlReadResumeState_STag;
2330 case XmlReadResumeState_STag:
2331 {
2332 strval qname, prefix, local;
2333
2334 /* this handles empty elements too */
2335 hr = reader_parse_stag(reader, &prefix, &local, &qname);
2336 if (FAILED(hr)) return hr;
2337
2338 /* FIXME: need to check for defined namespace to reject invalid prefix */
2339
2340 /* if we got empty element and stack is empty go straight to Misc */
2341 if (reader->is_empty_element && list_empty(&reader->elements))
2342 reader->instate = XmlReadInState_MiscEnd;
2343 else
2344 reader->instate = XmlReadInState_Content;
2345
2346 reader->nodetype = XmlNodeType_Element;
2347 reader->resumestate = XmlReadResumeState_Initial;
2348 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2349 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2350 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
2351 break;
2352 }
2353 default:
2354 hr = E_FAIL;
2355 }
2356
2357 return hr;
2358 }
2359
2360 /* [13 NS] ETag ::= '</' QName S? '>' */
2361 static HRESULT reader_parse_endtag(xmlreader *reader)
2362 {
2363 struct reader_position position;
2364 strval prefix, local, qname;
2365 struct element *element;
2366 HRESULT hr;
2367
2368 /* skip '</' */
2369 reader_skipn(reader, 2);
2370
2371 position = reader->position;
2372 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2373 if (FAILED(hr)) return hr;
2374
2375 reader_skipspaces(reader);
2376
2377 if (reader_cmp(reader, gtW)) return WC_E_GREATERTHAN;
2378
2379 /* skip '>' */
2380 reader_skipn(reader, 1);
2381
2382 /* Element stack should never be empty at this point, cause we shouldn't get to
2383 content parsing if it's empty. */
2384 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
2385 if (!strval_eq(reader, &element->qname, &qname)) return WC_E_ELEMENTMATCH;
2386
2387 /* update position stored for start tag, we won't be using it */
2388 element->position = position;
2389
2390 reader->nodetype = XmlNodeType_EndElement;
2391 reader->is_empty_element = FALSE;
2392 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2393
2394 return S_OK;
2395 }
2396
2397 /* [18] CDSect ::= CDStart CData CDEnd
2398 [19] CDStart ::= '<![CDATA['
2399 [20] CData ::= (Char* - (Char* ']]>' Char*))
2400 [21] CDEnd ::= ']]>' */
2401 static HRESULT reader_parse_cdata(xmlreader *reader)
2402 {
2403 WCHAR *ptr;
2404 UINT start;
2405
2406 if (reader->resumestate == XmlReadResumeState_CDATA)
2407 {
2408 start = reader->resume[XmlReadResume_Body];
2409 ptr = reader_get_ptr(reader);
2410 }
2411 else
2412 {
2413 /* skip markup '<![CDATA[' */
2414 reader_skipn(reader, 9);
2415 reader_shrink(reader);
2416 ptr = reader_get_ptr(reader);
2417 start = reader_get_cur(reader);
2418 reader->nodetype = XmlNodeType_CDATA;
2419 reader->resume[XmlReadResume_Body] = start;
2420 reader->resumestate = XmlReadResumeState_CDATA;
2421 reader_set_strvalue(reader, StringValue_Value, NULL);
2422 }
2423
2424 while (*ptr)
2425 {
2426 if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>')
2427 {
2428 strval value;
2429
2430 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2431
2432 /* skip ']]>' */
2433 reader_skipn(reader, 3);
2434 TRACE("%s\n", debug_strval(reader, &value));
2435
2436 reader_set_strvalue(reader, StringValue_Value, &value);
2437 reader->resume[XmlReadResume_Body] = 0;
2438 reader->resumestate = XmlReadResumeState_Initial;
2439 return S_OK;
2440 }
2441 else
2442 {
2443 reader_skipn(reader, 1);
2444 ptr++;
2445 }
2446 }
2447
2448 return S_OK;
2449 }
2450
2451 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2452 static HRESULT reader_parse_chardata(xmlreader *reader)
2453 {
2454 struct reader_position position;
2455 WCHAR *ptr;
2456 UINT start;
2457
2458 if (reader->resumestate == XmlReadResumeState_CharData)
2459 {
2460 start = reader->resume[XmlReadResume_Body];
2461 ptr = reader_get_ptr(reader);
2462 }
2463 else
2464 {
2465 reader_shrink(reader);
2466 ptr = reader_get_ptr(reader);
2467 start = reader_get_cur(reader);
2468 /* There's no text */
2469 if (!*ptr || *ptr == '<') return S_OK;
2470 reader->nodetype = is_wchar_space(*ptr) ? XmlNodeType_Whitespace : XmlNodeType_Text;
2471 reader->resume[XmlReadResume_Body] = start;
2472 reader->resumestate = XmlReadResumeState_CharData;
2473 reader_set_strvalue(reader, StringValue_Value, NULL);
2474 }
2475
2476 position = reader->position;
2477 while (*ptr)
2478 {
2479 static const WCHAR ampW[] = {'&',0};
2480
2481 /* CDATA closing sequence ']]>' is not allowed */
2482 if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>')
2483 return WC_E_CDSECTEND;
2484
2485 /* Found next markup part */
2486 if (ptr[0] == '<')
2487 {
2488 strval value;
2489
2490 reader->empty_element.position = position;
2491 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2492 reader_set_strvalue(reader, StringValue_Value, &value);
2493 reader->resume[XmlReadResume_Body] = 0;
2494 reader->resumestate = XmlReadResumeState_Initial;
2495 return S_OK;
2496 }
2497
2498 /* this covers a case when text has leading whitespace chars */
2499 if (!is_wchar_space(*ptr)) reader->nodetype = XmlNodeType_Text;
2500
2501 if (!reader_cmp(reader, ampW))
2502 reader_parse_reference(reader);
2503 else
2504 reader_skipn(reader, 1);
2505
2506 ptr = reader_get_ptr(reader);
2507 }
2508
2509 return S_OK;
2510 }
2511
2512 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2513 static HRESULT reader_parse_content(xmlreader *reader)
2514 {
2515 static const WCHAR cdstartW[] = {'<','!','[','C','D','A','T','A','[',0};
2516 static const WCHAR etagW[] = {'<','/',0};
2517
2518 if (reader->resumestate != XmlReadResumeState_Initial)
2519 {
2520 switch (reader->resumestate)
2521 {
2522 case XmlReadResumeState_CDATA:
2523 return reader_parse_cdata(reader);
2524 case XmlReadResumeState_Comment:
2525 return reader_parse_comment(reader);
2526 case XmlReadResumeState_PIBody:
2527 case XmlReadResumeState_PITarget:
2528 return reader_parse_pi(reader);
2529 case XmlReadResumeState_CharData:
2530 return reader_parse_chardata(reader);
2531 default:
2532 ERR("unknown resume state %d\n", reader->resumestate);
2533 }
2534 }
2535
2536 reader_shrink(reader);
2537
2538 /* handle end tag here, it indicates end of content as well */
2539 if (!reader_cmp(reader, etagW))
2540 return reader_parse_endtag(reader);
2541
2542 if (!reader_cmp(reader, commentW))
2543 return reader_parse_comment(reader);
2544
2545 if (!reader_cmp(reader, piW))
2546 return reader_parse_pi(reader);
2547
2548 if (!reader_cmp(reader, cdstartW))
2549 return reader_parse_cdata(reader);
2550
2551 if (!reader_cmp(reader, ltW))
2552 return reader_parse_element(reader);
2553
2554 /* what's left must be CharData */
2555 return reader_parse_chardata(reader);
2556 }
2557
2558 static HRESULT reader_parse_nextnode(xmlreader *reader)
2559 {
2560 XmlNodeType nodetype = reader_get_nodetype(reader);
2561 HRESULT hr;
2562
2563 if (!is_reader_pending(reader))
2564 {
2565 reader->chunk_read_off = 0;
2566 reader_clear_attrs(reader);
2567 }
2568
2569 /* When moving from EndElement or empty element, pop its own namespace definitions */
2570 switch (nodetype)
2571 {
2572 case XmlNodeType_Attribute:
2573 reader_dec_depth(reader);
2574 /* fallthrough */
2575 case XmlNodeType_Element:
2576 if (reader->is_empty_element)
2577 reader_pop_ns_nodes(reader, &reader->empty_element);
2578 else if (FAILED(hr = reader_inc_depth(reader)))
2579 return hr;
2580 break;
2581 case XmlNodeType_EndElement:
2582 reader_pop_element(reader);
2583 reader_dec_depth(reader);
2584 break;
2585 default:
2586 ;
2587 }
2588
2589 for (;;)
2590 {
2591 switch (reader->instate)
2592 {
2593 /* if it's a first call for a new input we need to detect stream encoding */
2594 case XmlReadInState_Initial:
2595 {
2596 xml_encoding enc;
2597
2598 hr = readerinput_growraw(reader->input);
2599 if (FAILED(hr)) return hr;
2600
2601 reader->position.line_number = 1;
2602 reader->position.line_position = 1;
2603
2604 /* try to detect encoding by BOM or data and set input code page */
2605 hr = readerinput_detectencoding(reader->input, &enc);
2606 TRACE("detected encoding %s, 0x%08x\n", enc == XmlEncoding_Unknown ? "(unknown)" :
2607 debugstr_w(xml_encoding_map[enc].name), hr);
2608 if (FAILED(hr)) return hr;
2609
2610 /* always switch first time cause we have to put something in */
2611 readerinput_switchencoding(reader->input, enc);
2612
2613 /* parse xml declaration */
2614 hr = reader_parse_xmldecl(reader);
2615 if (FAILED(hr)) return hr;
2616
2617 readerinput_shrinkraw(reader->input, -1);
2618 reader->instate = XmlReadInState_Misc_DTD;
2619 if (hr == S_OK) return hr;
2620 }
2621 break;
2622 case XmlReadInState_Misc_DTD:
2623 hr = reader_parse_misc(reader);
2624 if (FAILED(hr)) return hr;
2625
2626 if (hr == S_FALSE)
2627 reader->instate = XmlReadInState_DTD;
2628 else
2629 return hr;
2630 break;
2631 case XmlReadInState_DTD:
2632 hr = reader_parse_dtd(reader);
2633 if (FAILED(hr)) return hr;
2634
2635 if (hr == S_OK)
2636 {
2637 reader->instate = XmlReadInState_DTD_Misc;
2638 return hr;
2639 }
2640 else
2641 reader->instate = XmlReadInState_Element;
2642 break;
2643 case XmlReadInState_DTD_Misc:
2644 hr = reader_parse_misc(reader);
2645 if (FAILED(hr)) return hr;
2646
2647 if (hr == S_FALSE)
2648 reader->instate = XmlReadInState_Element;
2649 else
2650 return hr;
2651 break;
2652 case XmlReadInState_Element:
2653 return reader_parse_element(reader);
2654 case XmlReadInState_Content:
2655 return reader_parse_content(reader);
2656 case XmlReadInState_MiscEnd:
2657 hr = reader_parse_misc(reader);
2658 if (hr != S_FALSE) return hr;
2659
2660 if (*reader_get_ptr(reader))
2661 {
2662 WARN("found garbage in the end of XML\n");
2663 return WC_E_SYNTAX;
2664 }
2665
2666 reader->instate = XmlReadInState_Eof;
2667 reader->state = XmlReadState_EndOfFile;
2668 reader->nodetype = XmlNodeType_None;
2669 return hr;
2670 case XmlReadInState_Eof:
2671 return S_FALSE;
2672 default:
2673 FIXME("internal state %d not handled\n", reader->instate);
2674 return E_NOTIMPL;
2675 }
2676 }
2677
2678 return E_NOTIMPL;
2679 }
2680
2681 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
2682 {
2683 xmlreader *This = impl_from_IXmlReader(iface);
2684
2685 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2686
2687 if (IsEqualGUID(riid, &IID_IUnknown) ||
2688 IsEqualGUID(riid, &IID_IXmlReader))
2689 {
2690 *ppvObject = iface;
2691 }
2692 else
2693 {
2694 FIXME("interface %s not implemented\n", debugstr_guid(riid));
2695 *ppvObject = NULL;
2696 return E_NOINTERFACE;
2697 }
2698
2699 IXmlReader_AddRef(iface);
2700
2701 return S_OK;
2702 }
2703
2704 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface)
2705 {
2706 xmlreader *This = impl_from_IXmlReader(iface);
2707 ULONG ref = InterlockedIncrement(&This->ref);
2708 TRACE("(%p)->(%d)\n", This, ref);
2709 return ref;
2710 }
2711
2712 static void reader_clear_ns(xmlreader *reader)
2713 {
2714 struct ns *ns, *ns2;
2715
2716 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->ns, struct ns, entry) {
2717 list_remove(&ns->entry);
2718 reader_free_strvalued(reader, &ns->prefix);
2719 reader_free_strvalued(reader, &ns->uri);
2720 reader_free(reader, ns);
2721 }
2722
2723 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->nsdef, struct ns, entry) {
2724 list_remove(&ns->entry);
2725 reader_free_strvalued(reader, &ns->uri);
2726 reader_free(reader, ns);
2727 }
2728 }
2729
2730 static void reader_reset_parser(xmlreader *reader)
2731 {
2732 reader->position.line_number = 0;
2733 reader->position.line_position = 0;
2734
2735 reader_clear_elements(reader);
2736 reader_clear_attrs(reader);
2737 reader_clear_ns(reader);
2738 reader_free_strvalues(reader);
2739
2740 reader->depth = 0;
2741 reader->nodetype = XmlNodeType_None;
2742 reader->resumestate = XmlReadResumeState_Initial;
2743 memset(reader->resume, 0, sizeof(reader->resume));
2744 reader->is_empty_element = FALSE;
2745 }
2746
2747 static ULONG WINAPI xmlreader_Release(IXmlReader *iface)
2748 {
2749 xmlreader *This = impl_from_IXmlReader(iface);
2750 LONG ref = InterlockedDecrement(&This->ref);
2751
2752 TRACE("(%p)->(%d)\n", This, ref);
2753
2754 if (ref == 0)
2755 {
2756 IMalloc *imalloc = This->imalloc;
2757 reader_reset_parser(This);
2758 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
2759 if (This->resolver) IXmlResolver_Release(This->resolver);
2760 if (This->mlang) IUnknown_Release(This->mlang);
2761 reader_free(This, This);
2762 if (imalloc) IMalloc_Release(imalloc);
2763 }
2764
2765 return ref;
2766 }
2767
2768 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
2769 {
2770 xmlreader *This = impl_from_IXmlReader(iface);
2771 IXmlReaderInput *readerinput;
2772 HRESULT hr;
2773
2774 TRACE("(%p)->(%p)\n", This, input);
2775
2776 if (This->input)
2777 {
2778 readerinput_release_stream(This->input);
2779 IUnknown_Release(&This->input->IXmlReaderInput_iface);
2780 This->input = NULL;
2781 }
2782
2783 reader_reset_parser(This);
2784
2785 /* just reset current input */
2786 if (!input)
2787 {
2788 This->state = XmlReadState_Initial;
2789 return S_OK;
2790 }
2791
2792 /* now try IXmlReaderInput, ISequentialStream, IStream */
2793 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput);
2794 if (hr == S_OK)
2795 {
2796 if (readerinput->lpVtbl == &xmlreaderinputvtbl)
2797 This->input = impl_from_IXmlReaderInput(readerinput);
2798 else
2799 {
2800 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2801 readerinput, readerinput->lpVtbl);
2802 IUnknown_Release(readerinput);
2803 return E_FAIL;
2804
2805 }
2806 }
2807
2808 if (hr != S_OK || !readerinput)
2809 {
2810 /* create IXmlReaderInput basing on supplied interface */
2811 hr = CreateXmlReaderInputWithEncodingName(input,
2812 This->imalloc, NULL, FALSE, NULL, &readerinput);
2813 if (hr != S_OK) return hr;
2814 This->input = impl_from_IXmlReaderInput(readerinput);
2815 }
2816
2817 /* set stream for supplied IXmlReaderInput */
2818 hr = readerinput_query_for_stream(This->input);
2819 if (hr == S_OK)
2820 {
2821 This->state = XmlReadState_Initial;
2822 This->instate = XmlReadInState_Initial;
2823 }
2824 return hr;
2825 }
2826
2827 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value)
2828 {
2829 xmlreader *This = impl_from_IXmlReader(iface);
2830
2831 TRACE("(%p)->(%s %p)\n", This, debugstr_reader_prop(property), value);
2832
2833 if (!value) return E_INVALIDARG;
2834
2835 switch (property)
2836 {
2837 case XmlReaderProperty_MultiLanguage:
2838 *value = (LONG_PTR)This->mlang;
2839 if (This->mlang)
2840 IUnknown_AddRef(This->mlang);
2841 break;
2842 case XmlReaderProperty_XmlResolver:
2843 *value = (LONG_PTR)This->resolver;
2844 if (This->resolver)
2845 IXmlResolver_AddRef(This->resolver);
2846 break;
2847 case XmlReaderProperty_DtdProcessing:
2848 *value = This->dtdmode;
2849 break;
2850 case XmlReaderProperty_ReadState:
2851 *value = This->state;
2852 break;
2853 case XmlReaderProperty_MaxElementDepth:
2854 *value = This->max_depth;
2855 break;
2856 default:
2857 FIXME("Unimplemented property (%u)\n", property);
2858 return E_NOTIMPL;
2859 }
2860
2861 return S_OK;
2862 }
2863
2864 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value)
2865 {
2866 xmlreader *This = impl_from_IXmlReader(iface);
2867
2868 TRACE("(%p)->(%s 0x%lx)\n", This, debugstr_reader_prop(property), value);
2869
2870 switch (property)
2871 {
2872 case XmlReaderProperty_MultiLanguage:
2873 if (This->mlang)
2874 IUnknown_Release(This->mlang);
2875 This->mlang = (IUnknown*)value;
2876 if (This->mlang)
2877 IUnknown_AddRef(This->mlang);
2878 if (This->mlang)
2879 FIXME("Ignoring MultiLanguage %p\n", This->mlang);
2880 break;
2881 case XmlReaderProperty_XmlResolver:
2882 if (This->resolver)
2883 IXmlResolver_Release(This->resolver);
2884 This->resolver = (IXmlResolver*)value;
2885 if (This->resolver)
2886 IXmlResolver_AddRef(This->resolver);
2887 break;
2888 case XmlReaderProperty_DtdProcessing:
2889 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
2890 This->dtdmode = value;
2891 break;
2892 case XmlReaderProperty_MaxElementDepth:
2893 This->max_depth = value;
2894 break;
2895 default:
2896 FIXME("Unimplemented property (%u)\n", property);
2897 return E_NOTIMPL;
2898 }
2899
2900 return S_OK;
2901 }
2902
2903 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
2904 {
2905 xmlreader *This = impl_from_IXmlReader(iface);
2906 XmlNodeType oldtype = This->nodetype;
2907 XmlNodeType type;
2908 HRESULT hr;
2909
2910 TRACE("(%p)->(%p)\n", This, nodetype);
2911
2912 if (!nodetype)
2913 nodetype = &type;
2914
2915 switch (This->state)
2916 {
2917 case XmlReadState_Closed:
2918 hr = S_FALSE;
2919 break;
2920 case XmlReadState_Error:
2921 hr = This->error;
2922 break;
2923 default:
2924 hr = reader_parse_nextnode(This);
2925 if (SUCCEEDED(hr) && oldtype == XmlNodeType_None && This->nodetype != oldtype)
2926 This->state = XmlReadState_Interactive;
2927
2928 if (FAILED(hr))
2929 {
2930 This->state = XmlReadState_Error;
2931 This->nodetype = XmlNodeType_None;
2932 This->depth = 0;
2933 This->error = hr;
2934 }
2935 }
2936
2937 TRACE("node type %s\n", debugstr_nodetype(This->nodetype));
2938 *nodetype = This->nodetype;
2939
2940 return hr;
2941 }
2942
2943 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
2944 {
2945 xmlreader *This = impl_from_IXmlReader(iface);
2946
2947 TRACE("(%p)->(%p)\n", This, node_type);
2948
2949 if (!node_type)
2950 return E_INVALIDARG;
2951
2952 *node_type = reader_get_nodetype(This);
2953 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
2954 }
2955
2956 static void reader_set_current_attribute(xmlreader *reader, struct attribute *attr)
2957 {
2958 reader->attr = attr;
2959 reader->chunk_read_off = 0;
2960 reader_set_strvalue(reader, StringValue_Prefix, &attr->prefix);
2961 reader_set_strvalue(reader, StringValue_QualifiedName, &attr->qname);
2962 reader_set_strvalue(reader, StringValue_Value, &attr->value);
2963 }
2964
2965 static HRESULT reader_move_to_first_attribute(xmlreader *reader)
2966 {
2967 if (!reader->attr_count)
2968 return S_FALSE;
2969
2970 if (!reader->attr)
2971 reader_inc_depth(reader);
2972
2973 reader_set_current_attribute(reader, LIST_ENTRY(list_head(&reader->attrs), struct attribute, entry));
2974
2975 return S_OK;
2976 }
2977
2978 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface)
2979 {
2980 xmlreader *This = impl_from_IXmlReader(iface);
2981
2982 TRACE("(%p)\n", This);
2983
2984 return reader_move_to_first_attribute(This);
2985 }
2986
2987 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface)
2988 {
2989 xmlreader *This = impl_from_IXmlReader(iface);
2990 const struct list *next;
2991
2992 TRACE("(%p)\n", This);
2993
2994 if (!This->attr_count) return S_FALSE;
2995
2996 if (!This->attr)
2997 return reader_move_to_first_attribute(This);
2998
2999 next = list_next(&This->attrs, &This->attr->entry);
3000 if (next)
3001 reader_set_current_attribute(This, LIST_ENTRY(next, struct attribute, entry));
3002
3003 return next ? S_OK : S_FALSE;
3004 }
3005
3006 static void reader_get_attribute_ns_uri(xmlreader *reader, struct attribute *attr, const WCHAR **uri, UINT *len)
3007 {
3008 static const WCHAR xmlns_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
3009 '2','0','0','0','/','x','m','l','n','s','/',0};
3010 static const WCHAR xml_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
3011 'X','M','L','/','1','9','9','8','/','n','a','m','e','s','p','a','c','e',0};
3012
3013 /* Check for reserved prefixes first */
3014 if ((strval_eq(reader, &attr->prefix, &strval_empty) && strval_eq(reader, &attr->localname, &strval_xmlns)) ||
3015 strval_eq(reader, &attr->prefix, &strval_xmlns))
3016 {
3017 *uri = xmlns_uriW;
3018 *len = ARRAY_SIZE(xmlns_uriW) - 1;
3019 }
3020 else if (strval_eq(reader, &attr->prefix, &strval_xml))
3021 {
3022 *uri = xml_uriW;
3023 *len = ARRAY_SIZE(xml_uriW) - 1;
3024 }
3025 else
3026 {
3027 *uri = NULL;
3028 *len = 0;
3029 }
3030
3031 if (!*uri)
3032 {
3033 struct ns *ns;
3034
3035 if ((ns = reader_lookup_ns(reader, &attr->prefix)))
3036 {
3037 *uri = ns->uri.str;
3038 *len = ns->uri.len;
3039 }
3040 else
3041 {
3042 *uri = emptyW;
3043 *len = 0;
3044 }
3045 }
3046 }
3047
3048 static void reader_get_attribute_local_name(xmlreader *reader, struct attribute *attr, const WCHAR **name, UINT *len)
3049 {
3050 if (attr->flags & ATTRIBUTE_DEFAULT_NS_DEFINITION)
3051 {
3052 *name = xmlnsW;
3053 *len = 5;
3054 }
3055 else if (attr->flags & ATTRIBUTE_NS_DEFINITION)
3056 {
3057 const struct ns *ns = reader_lookup_ns(reader, &attr->localname);
3058 *name = ns->prefix.str;
3059 *len = ns->prefix.len;
3060 }
3061 else
3062 {
3063 *name = attr->localname.str;
3064 *len = attr->localname.len;
3065 }
3066 }
3067
3068 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface,
3069 const WCHAR *local_name, const WCHAR *namespace_uri)
3070 {
3071 xmlreader *This = impl_from_IXmlReader(iface);
3072 UINT target_name_len, target_uri_len;
3073 struct attribute *attr;
3074
3075 TRACE("(%p)->(%s %s)\n", This, debugstr_w(local_name), debugstr_w(namespace_uri));
3076
3077 if (!local_name)
3078 return E_INVALIDARG;
3079
3080 if (!This->attr_count)
3081 return S_FALSE;
3082
3083 if (!namespace_uri)
3084 namespace_uri = emptyW;
3085
3086 target_name_len = lstrlenW(local_name);
3087 target_uri_len = lstrlenW(namespace_uri);
3088
3089 LIST_FOR_EACH_ENTRY(attr, &This->attrs, struct attribute, entry)
3090 {
3091 UINT name_len, uri_len;
3092 const WCHAR *name, *uri;
3093
3094 reader_get_attribute_local_name(This, attr, &name, &name_len);
3095 reader_get_attribute_ns_uri(This, attr, &uri, &uri_len);
3096
3097 if (name_len == target_name_len && uri_len == target_uri_len &&
3098 !wcscmp(name, local_name) && !wcscmp(uri, namespace_uri))
3099 {
3100 reader_set_current_attribute(This, attr);
3101 return S_OK;
3102 }
3103 }
3104
3105 return S_FALSE;
3106 }
3107
3108 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface)
3109 {
3110 xmlreader *This = impl_from_IXmlReader(iface);
3111
3112 TRACE("(%p)\n", This);
3113
3114 if (!This->attr_count) return S_FALSE;
3115
3116 if (This->attr)
3117 reader_dec_depth(This);
3118
3119 This->attr = NULL;
3120
3121 /* FIXME: support other node types with 'attributes' like DTD */
3122 if (This->is_empty_element) {
3123 reader_set_strvalue(This, StringValue_Prefix, &This->empty_element.prefix);
3124 reader_set_strvalue(This, StringValue_QualifiedName, &This->empty_element.qname);
3125 }
3126 else {
3127 struct element *element = LIST_ENTRY(list_head(&This->elements), struct element, entry);
3128 if (element) {
3129 reader_set_strvalue(This, StringValue_Prefix, &element->prefix);
3130 reader_set_strvalue(This, StringValue_QualifiedName, &element->qname);
3131 }
3132 }
3133 This->chunk_read_off = 0;
3134 reader_set_strvalue(This, StringValue_Value, &strval_empty);
3135
3136 return S_OK;
3137 }
3138
3139 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len)
3140 {
3141 xmlreader *This = impl_from_IXmlReader(iface);
3142 struct attribute *attribute = This->attr;
3143 struct element *element;
3144 UINT length;
3145
3146 TRACE("(%p)->(%p %p)\n", This, name, len);
3147
3148 if (!len)
3149 len = &length;
3150
3151 switch (reader_get_nodetype(This))
3152 {
3153 case XmlNodeType_Text:
3154 case XmlNodeType_CDATA:
3155 case XmlNodeType_Comment:
3156 case XmlNodeType_Whitespace: