81aa7f2dfa79056cc312eebd2c0a1e784c23d50d
[reactos.git] / reactos / dll / win32 / xmllite / reader.c
1 /*
2 * IXmlReader implementation
3 *
4 * Copyright 2010, 2012-2013, 2016 Nikolay Sivov
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 */
20
21 #include "xmllite_private.h"
22
23 #include <stdio.h>
24
25 #include <wine/list.h>
26 #include <wine/unicode.h>
27
28 /* not defined in public headers */
29 DEFINE_GUID(IID_IXmlReaderInput, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
30
31 typedef enum
32 {
33 XmlReadInState_Initial,
34 XmlReadInState_XmlDecl,
35 XmlReadInState_Misc_DTD,
36 XmlReadInState_DTD,
37 XmlReadInState_DTD_Misc,
38 XmlReadInState_Element,
39 XmlReadInState_Content,
40 XmlReadInState_MiscEnd, /* optional Misc at the end of a document */
41 XmlReadInState_Eof
42 } XmlReaderInternalState;
43
44 /* This state denotes where parsing was interrupted by input problem.
45 Reader resumes parsing using this information. */
46 typedef enum
47 {
48 XmlReadResumeState_Initial,
49 XmlReadResumeState_PITarget,
50 XmlReadResumeState_PIBody,
51 XmlReadResumeState_CDATA,
52 XmlReadResumeState_Comment,
53 XmlReadResumeState_STag,
54 XmlReadResumeState_CharData,
55 XmlReadResumeState_Whitespace
56 } XmlReaderResumeState;
57
58 /* saved pointer index to resume from particular input position */
59 typedef enum
60 {
61 XmlReadResume_Name, /* PITarget, name for NCName, prefix for QName */
62 XmlReadResume_Local, /* local for QName */
63 XmlReadResume_Body, /* PI body, comment text, CDATA text, CharData text */
64 XmlReadResume_Last
65 } XmlReaderResume;
66
67 typedef enum
68 {
69 StringValue_LocalName,
70 StringValue_Prefix,
71 StringValue_QualifiedName,
72 StringValue_Value,
73 StringValue_Last
74 } XmlReaderStringValue;
75
76 static const WCHAR utf16W[] = {'U','T','F','-','1','6',0};
77 static const WCHAR utf8W[] = {'U','T','F','-','8',0};
78
79 static const WCHAR dblquoteW[] = {'\"',0};
80 static const WCHAR quoteW[] = {'\'',0};
81 static const WCHAR ltW[] = {'<',0};
82 static const WCHAR gtW[] = {'>',0};
83 static const WCHAR commentW[] = {'<','!','-','-',0};
84 static const WCHAR piW[] = {'<','?',0};
85
86 static const char *debugstr_nodetype(XmlNodeType nodetype)
87 {
88 static const char * const type_names[] =
89 {
90 "None",
91 "Element",
92 "Attribute",
93 "Text",
94 "CDATA",
95 "",
96 "",
97 "ProcessingInstruction",
98 "Comment",
99 "",
100 "DocumentType",
101 "",
102 "",
103 "Whitespace",
104 "",
105 "EndElement",
106 "",
107 "XmlDeclaration"
108 };
109
110 if (nodetype > _XmlNodeType_Last)
111 return wine_dbg_sprintf("unknown type=%d", nodetype);
112
113 return type_names[nodetype];
114 }
115
116 static const char *debugstr_reader_prop(XmlReaderProperty prop)
117 {
118 static const char * const prop_names[] =
119 {
120 "MultiLanguage",
121 "ConformanceLevel",
122 "RandomAccess",
123 "XmlResolver",
124 "DtdProcessing",
125 "ReadState",
126 "MaxElementDepth",
127 "MaxEntityExpansion"
128 };
129
130 if (prop > _XmlReaderProperty_Last)
131 return wine_dbg_sprintf("unknown property=%d", prop);
132
133 return prop_names[prop];
134 }
135
136 struct xml_encoding_data
137 {
138 const WCHAR *name;
139 xml_encoding enc;
140 UINT cp;
141 };
142
143 static const struct xml_encoding_data xml_encoding_map[] = {
144 { utf16W, XmlEncoding_UTF16, ~0 },
145 { utf8W, XmlEncoding_UTF8, CP_UTF8 }
146 };
147
148 const WCHAR *get_encoding_name(xml_encoding encoding)
149 {
150 return xml_encoding_map[encoding].name;
151 }
152
153 xml_encoding get_encoding_from_codepage(UINT codepage)
154 {
155 int i;
156 for (i = 0; i < sizeof(xml_encoding_map)/sizeof(xml_encoding_map[0]); i++)
157 {
158 if (xml_encoding_map[i].cp == codepage) return xml_encoding_map[i].enc;
159 }
160 return XmlEncoding_Unknown;
161 }
162
163 typedef struct
164 {
165 char *data;
166 UINT cur;
167 unsigned int allocated;
168 unsigned int written;
169 } encoded_buffer;
170
171 typedef struct input_buffer input_buffer;
172
173 typedef struct
174 {
175 IXmlReaderInput IXmlReaderInput_iface;
176 LONG ref;
177 /* reference passed on IXmlReaderInput creation, is kept when input is created */
178 IUnknown *input;
179 IMalloc *imalloc;
180 xml_encoding encoding;
181 BOOL hint;
182 WCHAR *baseuri;
183 /* stream reference set after SetInput() call from reader,
184 stored as sequential stream, cause currently
185 optimizations possible with IStream aren't implemented */
186 ISequentialStream *stream;
187 input_buffer *buffer;
188 unsigned int pending : 1;
189 } xmlreaderinput;
190
191 static const struct IUnknownVtbl xmlreaderinputvtbl;
192
193 /* Structure to hold parsed string of specific length.
194
195 Reader stores node value as 'start' pointer, on request
196 a null-terminated version of it is allocated.
197
198 To init a strval variable use reader_init_strval(),
199 to set strval as a reader value use reader_set_strval().
200 */
201 typedef struct
202 {
203 WCHAR *str; /* allocated null-terminated string */
204 UINT len; /* length in WCHARs, altered after ReadValueChunk */
205 UINT start; /* input position where value starts */
206 } strval;
207
208 static WCHAR emptyW[] = {0};
209 static WCHAR xmlW[] = {'x','m','l',0};
210 static WCHAR xmlnsW[] = {'x','m','l','n','s',0};
211 static const strval strval_empty = { emptyW };
212 static const strval strval_xml = { xmlW, 3 };
213 static const strval strval_xmlns = { xmlnsW, 5 };
214
215 struct attribute
216 {
217 struct list entry;
218 strval prefix;
219 strval localname;
220 strval value;
221 };
222
223 struct element
224 {
225 struct list entry;
226 strval prefix;
227 strval localname;
228 strval qname;
229 };
230
231 struct ns
232 {
233 struct list entry;
234 strval prefix;
235 strval uri;
236 struct element *element;
237 };
238
239 typedef struct
240 {
241 IXmlReader IXmlReader_iface;
242 LONG ref;
243 xmlreaderinput *input;
244 IMalloc *imalloc;
245 XmlReadState state;
246 XmlReaderInternalState instate;
247 XmlReaderResumeState resumestate;
248 XmlNodeType nodetype;
249 DtdProcessing dtdmode;
250 IXmlResolver *resolver;
251 IUnknown *mlang;
252 UINT line, pos; /* reader position in XML stream */
253 struct list attrs; /* attributes list for current node */
254 struct attribute *attr; /* current attribute */
255 UINT attr_count;
256 struct list nsdef;
257 struct list ns;
258 struct list elements;
259 strval strvalues[StringValue_Last];
260 UINT depth;
261 UINT max_depth;
262 BOOL is_empty_element;
263 struct element empty_element;
264 UINT resume[XmlReadResume_Last]; /* offsets used to resume reader */
265 } xmlreader;
266
267 struct input_buffer
268 {
269 encoded_buffer utf16;
270 encoded_buffer encoded;
271 UINT code_page;
272 xmlreaderinput *input;
273 };
274
275 static inline xmlreader *impl_from_IXmlReader(IXmlReader *iface)
276 {
277 return CONTAINING_RECORD(iface, xmlreader, IXmlReader_iface);
278 }
279
280 static inline xmlreaderinput *impl_from_IXmlReaderInput(IXmlReaderInput *iface)
281 {
282 return CONTAINING_RECORD(iface, xmlreaderinput, IXmlReaderInput_iface);
283 }
284
285 /* reader memory allocation functions */
286 static inline void *reader_alloc(xmlreader *reader, size_t len)
287 {
288 return m_alloc(reader->imalloc, len);
289 }
290
291 static inline void *reader_alloc_zero(xmlreader *reader, size_t len)
292 {
293 void *ret = reader_alloc(reader, len);
294 if (ret)
295 memset(ret, 0, len);
296 return ret;
297 }
298
299 static inline void reader_free(xmlreader *reader, void *mem)
300 {
301 m_free(reader->imalloc, mem);
302 }
303
304 /* Just return pointer from offset, no attempt to read more. */
305 static inline WCHAR *reader_get_ptr2(const xmlreader *reader, UINT offset)
306 {
307 encoded_buffer *buffer = &reader->input->buffer->utf16;
308 return (WCHAR*)buffer->data + offset;
309 }
310
311 static inline WCHAR *reader_get_strptr(const xmlreader *reader, const strval *v)
312 {
313 return v->str ? v->str : reader_get_ptr2(reader, v->start);
314 }
315
316 static HRESULT reader_strvaldup(xmlreader *reader, const strval *src, strval *dest)
317 {
318 *dest = *src;
319
320 if (src->str != strval_empty.str)
321 {
322 dest->str = reader_alloc(reader, (dest->len+1)*sizeof(WCHAR));
323 if (!dest->str) return E_OUTOFMEMORY;
324 memcpy(dest->str, reader_get_strptr(reader, src), dest->len*sizeof(WCHAR));
325 dest->str[dest->len] = 0;
326 dest->start = 0;
327 }
328
329 return S_OK;
330 }
331
332 /* reader input memory allocation functions */
333 static inline void *readerinput_alloc(xmlreaderinput *input, size_t len)
334 {
335 return m_alloc(input->imalloc, len);
336 }
337
338 static inline void *readerinput_realloc(xmlreaderinput *input, void *mem, size_t len)
339 {
340 return m_realloc(input->imalloc, mem, len);
341 }
342
343 static inline void readerinput_free(xmlreaderinput *input, void *mem)
344 {
345 m_free(input->imalloc, mem);
346 }
347
348 static inline WCHAR *readerinput_strdupW(xmlreaderinput *input, const WCHAR *str)
349 {
350 LPWSTR ret = NULL;
351
352 if(str) {
353 DWORD size;
354
355 size = (strlenW(str)+1)*sizeof(WCHAR);
356 ret = readerinput_alloc(input, size);
357 if (ret) memcpy(ret, str, size);
358 }
359
360 return ret;
361 }
362
363 static void reader_clear_attrs(xmlreader *reader)
364 {
365 struct attribute *attr, *attr2;
366 LIST_FOR_EACH_ENTRY_SAFE(attr, attr2, &reader->attrs, struct attribute, entry)
367 {
368 reader_free(reader, attr);
369 }
370 list_init(&reader->attrs);
371 reader->attr_count = 0;
372 reader->attr = NULL;
373 }
374
375 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
376 while we are on a node with attributes */
377 static HRESULT reader_add_attr(xmlreader *reader, strval *prefix, strval *localname, strval *value)
378 {
379 struct attribute *attr;
380
381 attr = reader_alloc(reader, sizeof(*attr));
382 if (!attr) return E_OUTOFMEMORY;
383
384 if (prefix)
385 attr->prefix = *prefix;
386 else
387 memset(&attr->prefix, 0, sizeof(attr->prefix));
388 attr->localname = *localname;
389 attr->value = *value;
390 list_add_tail(&reader->attrs, &attr->entry);
391 reader->attr_count++;
392
393 return S_OK;
394 }
395
396 /* This one frees stored string value if needed */
397 static void reader_free_strvalued(xmlreader *reader, strval *v)
398 {
399 if (v->str != strval_empty.str)
400 {
401 reader_free(reader, v->str);
402 *v = strval_empty;
403 }
404 }
405
406 static inline void reader_init_strvalue(UINT start, UINT len, strval *v)
407 {
408 v->start = start;
409 v->len = len;
410 v->str = NULL;
411 }
412
413 static inline const char* debug_strval(const xmlreader *reader, const strval *v)
414 {
415 return debugstr_wn(reader_get_strptr(reader, v), v->len);
416 }
417
418 /* used to initialize from constant string */
419 static inline void reader_init_cstrvalue(WCHAR *str, UINT len, strval *v)
420 {
421 v->start = 0;
422 v->len = len;
423 v->str = str;
424 }
425
426 static void reader_free_strvalue(xmlreader *reader, XmlReaderStringValue type)
427 {
428 reader_free_strvalued(reader, &reader->strvalues[type]);
429 }
430
431 static void reader_free_strvalues(xmlreader *reader)
432 {
433 int type;
434 for (type = 0; type < StringValue_Last; type++)
435 reader_free_strvalue(reader, type);
436 }
437
438 /* This helper should only be used to test if strings are the same,
439 it doesn't try to sort. */
440 static inline int strval_eq(const xmlreader *reader, const strval *str1, const strval *str2)
441 {
442 if (str1->len != str2->len) return 0;
443 return !memcmp(reader_get_strptr(reader, str1), reader_get_strptr(reader, str2), str1->len*sizeof(WCHAR));
444 }
445
446 static void reader_clear_elements(xmlreader *reader)
447 {
448 struct element *elem, *elem2;
449 LIST_FOR_EACH_ENTRY_SAFE(elem, elem2, &reader->elements, struct element, entry)
450 {
451 reader_free_strvalued(reader, &elem->prefix);
452 reader_free_strvalued(reader, &elem->localname);
453 reader_free_strvalued(reader, &elem->qname);
454 reader_free(reader, elem);
455 }
456 list_init(&reader->elements);
457 reader->is_empty_element = FALSE;
458 }
459
460 static HRESULT reader_inc_depth(xmlreader *reader)
461 {
462 if (++reader->depth > reader->max_depth) return SC_E_MAXELEMENTDEPTH;
463 return S_OK;
464 }
465
466 static void reader_dec_depth(xmlreader *reader)
467 {
468 if (reader->depth > 1) reader->depth--;
469 }
470
471 static HRESULT reader_push_ns(xmlreader *reader, const strval *prefix, const strval *uri, BOOL def)
472 {
473 struct ns *ns;
474 HRESULT hr;
475
476 ns = reader_alloc(reader, sizeof(*ns));
477 if (!ns) return E_OUTOFMEMORY;
478
479 if (def)
480 memset(&ns->prefix, 0, sizeof(ns->prefix));
481 else {
482 hr = reader_strvaldup(reader, prefix, &ns->prefix);
483 if (FAILED(hr)) {
484 reader_free(reader, ns);
485 return hr;
486 }
487 }
488
489 hr = reader_strvaldup(reader, uri, &ns->uri);
490 if (FAILED(hr)) {
491 reader_free_strvalued(reader, &ns->prefix);
492 reader_free(reader, ns);
493 return hr;
494 }
495
496 ns->element = NULL;
497 list_add_head(def ? &reader->nsdef : &reader->ns, &ns->entry);
498 return hr;
499 }
500
501 static void reader_free_element(xmlreader *reader, struct element *element)
502 {
503 reader_free_strvalued(reader, &element->prefix);
504 reader_free_strvalued(reader, &element->localname);
505 reader_free_strvalued(reader, &element->qname);
506 reader_free(reader, element);
507 }
508
509 static void reader_mark_ns_nodes(xmlreader *reader, struct element *element)
510 {
511 struct ns *ns;
512
513 LIST_FOR_EACH_ENTRY(ns, &reader->ns, struct ns, entry) {
514 if (ns->element)
515 break;
516 ns->element = element;
517 }
518
519 LIST_FOR_EACH_ENTRY(ns, &reader->nsdef, struct ns, entry) {
520 if (ns->element)
521 break;
522 ns->element = element;
523 }
524 }
525
526 static HRESULT reader_push_element(xmlreader *reader, strval *prefix, strval *localname,
527 strval *qname)
528 {
529 struct element *element;
530 HRESULT hr;
531
532 if (!list_empty(&reader->elements))
533 {
534 hr = reader_inc_depth(reader);
535 if (FAILED(hr))
536 return hr;
537 }
538
539 element = reader_alloc_zero(reader, sizeof(*element));
540 if (!element) {
541 hr = E_OUTOFMEMORY;
542 goto failed;
543 }
544
545 if ((hr = reader_strvaldup(reader, prefix, &element->prefix)) != S_OK ||
546 (hr = reader_strvaldup(reader, localname, &element->localname)) != S_OK ||
547 (hr = reader_strvaldup(reader, qname, &element->qname)) != S_OK)
548 {
549 reader_free_element(reader, element);
550 goto failed;
551 }
552
553 list_add_head(&reader->elements, &element->entry);
554 reader_mark_ns_nodes(reader, element);
555 reader->is_empty_element = FALSE;
556
557 failed:
558 reader_dec_depth(reader);
559 return hr;
560 }
561
562 static void reader_pop_ns_nodes(xmlreader *reader, struct element *element)
563 {
564 struct ns *ns, *ns2;
565
566 LIST_FOR_EACH_ENTRY_SAFE_REV(ns, ns2, &reader->ns, struct ns, entry) {
567 if (ns->element != element)
568 break;
569
570 list_remove(&ns->entry);
571 reader_free_strvalued(reader, &ns->prefix);
572 reader_free_strvalued(reader, &ns->uri);
573 reader_free(reader, ns);
574 }
575
576 if (!list_empty(&reader->nsdef)) {
577 ns = LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
578 if (ns->element == element) {
579 list_remove(&ns->entry);
580 reader_free_strvalued(reader, &ns->prefix);
581 reader_free_strvalued(reader, &ns->uri);
582 reader_free(reader, ns);
583 }
584 }
585 }
586
587 static void reader_pop_element(xmlreader *reader)
588 {
589 struct element *element;
590
591 if (list_empty(&reader->elements))
592 return;
593
594 element = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
595 list_remove(&element->entry);
596
597 reader_pop_ns_nodes(reader, element);
598 reader_free_element(reader, element);
599 reader_dec_depth(reader);
600
601 /* It was a root element, the rest is expected as Misc */
602 if (list_empty(&reader->elements))
603 reader->instate = XmlReadInState_MiscEnd;
604 }
605
606 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
607 means node value is to be determined. */
608 static void reader_set_strvalue(xmlreader *reader, XmlReaderStringValue type, const strval *value)
609 {
610 strval *v = &reader->strvalues[type];
611
612 reader_free_strvalue(reader, type);
613 if (!value)
614 {
615 v->str = NULL;
616 v->start = 0;
617 v->len = 0;
618 return;
619 }
620
621 if (value->str == strval_empty.str)
622 *v = *value;
623 else
624 {
625 if (type == StringValue_Value)
626 {
627 /* defer allocation for value string */
628 v->str = NULL;
629 v->start = value->start;
630 v->len = value->len;
631 }
632 else
633 {
634 v->str = reader_alloc(reader, (value->len + 1)*sizeof(WCHAR));
635 memcpy(v->str, reader_get_strptr(reader, value), value->len*sizeof(WCHAR));
636 v->str[value->len] = 0;
637 v->len = value->len;
638 }
639 }
640 }
641
642 static inline int is_reader_pending(xmlreader *reader)
643 {
644 return reader->input->pending;
645 }
646
647 static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
648 {
649 const int initial_len = 0x2000;
650 buffer->data = readerinput_alloc(input, initial_len);
651 if (!buffer->data) return E_OUTOFMEMORY;
652
653 memset(buffer->data, 0, 4);
654 buffer->cur = 0;
655 buffer->allocated = initial_len;
656 buffer->written = 0;
657
658 return S_OK;
659 }
660
661 static void free_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer)
662 {
663 readerinput_free(input, buffer->data);
664 }
665
666 HRESULT get_code_page(xml_encoding encoding, UINT *cp)
667 {
668 if (encoding == XmlEncoding_Unknown)
669 {
670 FIXME("unsupported encoding %d\n", encoding);
671 return E_NOTIMPL;
672 }
673
674 *cp = xml_encoding_map[encoding].cp;
675
676 return S_OK;
677 }
678
679 xml_encoding parse_encoding_name(const WCHAR *name, int len)
680 {
681 int min, max, n, c;
682
683 if (!name) return XmlEncoding_Unknown;
684
685 min = 0;
686 max = sizeof(xml_encoding_map)/sizeof(struct xml_encoding_data) - 1;
687
688 while (min <= max)
689 {
690 n = (min+max)/2;
691
692 if (len != -1)
693 c = strncmpiW(xml_encoding_map[n].name, name, len);
694 else
695 c = strcmpiW(xml_encoding_map[n].name, name);
696 if (!c)
697 return xml_encoding_map[n].enc;
698
699 if (c > 0)
700 max = n-1;
701 else
702 min = n+1;
703 }
704
705 return XmlEncoding_Unknown;
706 }
707
708 static HRESULT alloc_input_buffer(xmlreaderinput *input)
709 {
710 input_buffer *buffer;
711 HRESULT hr;
712
713 input->buffer = NULL;
714
715 buffer = readerinput_alloc(input, sizeof(*buffer));
716 if (!buffer) return E_OUTOFMEMORY;
717
718 buffer->input = input;
719 buffer->code_page = ~0; /* code page is unknown at this point */
720 hr = init_encoded_buffer(input, &buffer->utf16);
721 if (hr != S_OK) {
722 readerinput_free(input, buffer);
723 return hr;
724 }
725
726 hr = init_encoded_buffer(input, &buffer->encoded);
727 if (hr != S_OK) {
728 free_encoded_buffer(input, &buffer->utf16);
729 readerinput_free(input, buffer);
730 return hr;
731 }
732
733 input->buffer = buffer;
734 return S_OK;
735 }
736
737 static void free_input_buffer(input_buffer *buffer)
738 {
739 free_encoded_buffer(buffer->input, &buffer->encoded);
740 free_encoded_buffer(buffer->input, &buffer->utf16);
741 readerinput_free(buffer->input, buffer);
742 }
743
744 static void readerinput_release_stream(xmlreaderinput *readerinput)
745 {
746 if (readerinput->stream) {
747 ISequentialStream_Release(readerinput->stream);
748 readerinput->stream = NULL;
749 }
750 }
751
752 /* Queries already stored interface for IStream/ISequentialStream.
753 Interface supplied on creation will be overwritten */
754 static inline HRESULT readerinput_query_for_stream(xmlreaderinput *readerinput)
755 {
756 HRESULT hr;
757
758 readerinput_release_stream(readerinput);
759 hr = IUnknown_QueryInterface(readerinput->input, &IID_IStream, (void**)&readerinput->stream);
760 if (hr != S_OK)
761 hr = IUnknown_QueryInterface(readerinput->input, &IID_ISequentialStream, (void**)&readerinput->stream);
762
763 return hr;
764 }
765
766 /* reads a chunk to raw buffer */
767 static HRESULT readerinput_growraw(xmlreaderinput *readerinput)
768 {
769 encoded_buffer *buffer = &readerinput->buffer->encoded;
770 /* to make sure aligned length won't exceed allocated length */
771 ULONG len = buffer->allocated - buffer->written - 4;
772 ULONG read;
773 HRESULT hr;
774
775 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
776 variable width encodings like UTF-8 */
777 len = (len + 3) & ~3;
778 /* try to use allocated space or grow */
779 if (buffer->allocated - buffer->written < len)
780 {
781 buffer->allocated *= 2;
782 buffer->data = readerinput_realloc(readerinput, buffer->data, buffer->allocated);
783 len = buffer->allocated - buffer->written;
784 }
785
786 read = 0;
787 hr = ISequentialStream_Read(readerinput->stream, buffer->data + buffer->written, len, &read);
788 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer->written, buffer->allocated, len, read, hr);
789 readerinput->pending = hr == E_PENDING;
790 if (FAILED(hr)) return hr;
791 buffer->written += read;
792
793 return hr;
794 }
795
796 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
797 static void readerinput_grow(xmlreaderinput *readerinput, int length)
798 {
799 encoded_buffer *buffer = &readerinput->buffer->utf16;
800
801 length *= sizeof(WCHAR);
802 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
803 if (buffer->allocated < buffer->written + length + 4)
804 {
805 int grown_size = max(2*buffer->allocated, buffer->allocated + length);
806 buffer->data = readerinput_realloc(readerinput, buffer->data, grown_size);
807 buffer->allocated = grown_size;
808 }
809 }
810
811 static inline BOOL readerinput_is_utf8(xmlreaderinput *readerinput)
812 {
813 static const char startA[] = {'<','?'};
814 static const char commentA[] = {'<','!'};
815 encoded_buffer *buffer = &readerinput->buffer->encoded;
816 unsigned char *ptr = (unsigned char*)buffer->data;
817
818 return !memcmp(buffer->data, startA, sizeof(startA)) ||
819 !memcmp(buffer->data, commentA, sizeof(commentA)) ||
820 /* test start byte */
821 (ptr[0] == '<' &&
822 (
823 (ptr[1] && (ptr[1] <= 0x7f)) ||
824 (buffer->data[1] >> 5) == 0x6 || /* 2 bytes */
825 (buffer->data[1] >> 4) == 0xe || /* 3 bytes */
826 (buffer->data[1] >> 3) == 0x1e) /* 4 bytes */
827 );
828 }
829
830 static HRESULT readerinput_detectencoding(xmlreaderinput *readerinput, xml_encoding *enc)
831 {
832 encoded_buffer *buffer = &readerinput->buffer->encoded;
833 static const WCHAR startW[] = {'<','?'};
834 static const WCHAR commentW[] = {'<','!'};
835 static const char utf8bom[] = {0xef,0xbb,0xbf};
836 static const char utf16lebom[] = {0xff,0xfe};
837
838 *enc = XmlEncoding_Unknown;
839
840 if (buffer->written <= 3)
841 {
842 HRESULT hr = readerinput_growraw(readerinput);
843 if (FAILED(hr)) return hr;
844 if (buffer->written <= 3) return MX_E_INPUTEND;
845 }
846
847 /* try start symbols if we have enough data to do that, input buffer should contain
848 first chunk already */
849 if (readerinput_is_utf8(readerinput))
850 *enc = XmlEncoding_UTF8;
851 else if (!memcmp(buffer->data, startW, sizeof(startW)) ||
852 !memcmp(buffer->data, commentW, sizeof(commentW)))
853 *enc = XmlEncoding_UTF16;
854 /* try with BOM now */
855 else if (!memcmp(buffer->data, utf8bom, sizeof(utf8bom)))
856 {
857 buffer->cur += sizeof(utf8bom);
858 *enc = XmlEncoding_UTF8;
859 }
860 else if (!memcmp(buffer->data, utf16lebom, sizeof(utf16lebom)))
861 {
862 buffer->cur += sizeof(utf16lebom);
863 *enc = XmlEncoding_UTF16;
864 }
865
866 return S_OK;
867 }
868
869 static int readerinput_get_utf8_convlen(xmlreaderinput *readerinput)
870 {
871 encoded_buffer *buffer = &readerinput->buffer->encoded;
872 int len = buffer->written;
873
874 /* complete single byte char */
875 if (!(buffer->data[len-1] & 0x80)) return len;
876
877 /* find start byte of multibyte char */
878 while (--len && !(buffer->data[len] & 0xc0))
879 ;
880
881 return len;
882 }
883
884 /* Returns byte length of complete char sequence for buffer code page,
885 it's relative to current buffer position which is currently used for BOM handling
886 only. */
887 static int readerinput_get_convlen(xmlreaderinput *readerinput)
888 {
889 encoded_buffer *buffer = &readerinput->buffer->encoded;
890 int len;
891
892 if (readerinput->buffer->code_page == CP_UTF8)
893 len = readerinput_get_utf8_convlen(readerinput);
894 else
895 len = buffer->written;
896
897 TRACE("%d\n", len - buffer->cur);
898 return len - buffer->cur;
899 }
900
901 /* It's possible that raw buffer has some leftovers from last conversion - some char
902 sequence that doesn't represent a full code point. Length argument should be calculated with
903 readerinput_get_convlen(), if it's -1 it will be calculated here. */
904 static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
905 {
906 encoded_buffer *buffer = &readerinput->buffer->encoded;
907
908 if (len == -1)
909 len = readerinput_get_convlen(readerinput);
910
911 memmove(buffer->data, buffer->data + buffer->cur + (buffer->written - len), len);
912 /* everything below cur is lost too */
913 buffer->written -= len + buffer->cur;
914 /* after this point we don't need cur offset really,
915 it's used only to mark where actual data begins when first chunk is read */
916 buffer->cur = 0;
917 }
918
919 /* note that raw buffer content is kept */
920 static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
921 {
922 encoded_buffer *src = &readerinput->buffer->encoded;
923 encoded_buffer *dest = &readerinput->buffer->utf16;
924 int len, dest_len;
925 HRESULT hr;
926 WCHAR *ptr;
927 UINT cp;
928
929 hr = get_code_page(enc, &cp);
930 if (FAILED(hr)) return;
931
932 readerinput->buffer->code_page = cp;
933 len = readerinput_get_convlen(readerinput);
934
935 TRACE("switching to cp %d\n", cp);
936
937 /* just copy in this case */
938 if (enc == XmlEncoding_UTF16)
939 {
940 readerinput_grow(readerinput, len);
941 memcpy(dest->data, src->data + src->cur, len);
942 dest->written += len*sizeof(WCHAR);
943 return;
944 }
945
946 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
947 readerinput_grow(readerinput, dest_len);
948 ptr = (WCHAR*)dest->data;
949 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
950 ptr[dest_len] = 0;
951 dest->written += dest_len*sizeof(WCHAR);
952 }
953
954 /* shrinks parsed data a buffer begins with */
955 static void reader_shrink(xmlreader *reader)
956 {
957 encoded_buffer *buffer = &reader->input->buffer->utf16;
958
959 /* avoid to move too often using threshold shrink length */
960 if (buffer->cur*sizeof(WCHAR) > buffer->written / 2)
961 {
962 buffer->written -= buffer->cur*sizeof(WCHAR);
963 memmove(buffer->data, (WCHAR*)buffer->data + buffer->cur, buffer->written);
964 buffer->cur = 0;
965 *(WCHAR*)&buffer->data[buffer->written] = 0;
966 }
967 }
968
969 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
970 It won't attempt to shrink but will grow destination buffer if needed */
971 static HRESULT reader_more(xmlreader *reader)
972 {
973 xmlreaderinput *readerinput = reader->input;
974 encoded_buffer *src = &readerinput->buffer->encoded;
975 encoded_buffer *dest = &readerinput->buffer->utf16;
976 UINT cp = readerinput->buffer->code_page;
977 int len, dest_len;
978 HRESULT hr;
979 WCHAR *ptr;
980
981 /* get some raw data from stream first */
982 hr = readerinput_growraw(readerinput);
983 len = readerinput_get_convlen(readerinput);
984
985 /* just copy for UTF-16 case */
986 if (cp == ~0)
987 {
988 readerinput_grow(readerinput, len);
989 memcpy(dest->data + dest->written, src->data + src->cur, len);
990 dest->written += len*sizeof(WCHAR);
991 return hr;
992 }
993
994 dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
995 readerinput_grow(readerinput, dest_len);
996 ptr = (WCHAR*)(dest->data + dest->written);
997 MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
998 ptr[dest_len] = 0;
999 dest->written += dest_len*sizeof(WCHAR);
1000 /* get rid of processed data */
1001 readerinput_shrinkraw(readerinput, len);
1002
1003 return hr;
1004 }
1005
1006 static inline UINT reader_get_cur(xmlreader *reader)
1007 {
1008 return reader->input->buffer->utf16.cur;
1009 }
1010
1011 static inline WCHAR *reader_get_ptr(xmlreader *reader)
1012 {
1013 encoded_buffer *buffer = &reader->input->buffer->utf16;
1014 WCHAR *ptr = (WCHAR*)buffer->data + buffer->cur;
1015 if (!*ptr) reader_more(reader);
1016 return (WCHAR*)buffer->data + buffer->cur;
1017 }
1018
1019 static int reader_cmp(xmlreader *reader, const WCHAR *str)
1020 {
1021 int i=0;
1022 const WCHAR *ptr = reader_get_ptr(reader);
1023 while (str[i])
1024 {
1025 if (!ptr[i])
1026 {
1027 reader_more(reader);
1028 ptr = reader_get_ptr(reader);
1029 }
1030 if (str[i] != ptr[i])
1031 return ptr[i] - str[i];
1032 i++;
1033 }
1034 return 0;
1035 }
1036
1037 /* moves cursor n WCHARs forward */
1038 static void reader_skipn(xmlreader *reader, int n)
1039 {
1040 encoded_buffer *buffer = &reader->input->buffer->utf16;
1041 const WCHAR *ptr = reader_get_ptr(reader);
1042
1043 while (*ptr++ && n--)
1044 {
1045 buffer->cur++;
1046 reader->pos++;
1047 }
1048 }
1049
1050 static inline BOOL is_wchar_space(WCHAR ch)
1051 {
1052 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
1053 }
1054
1055 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
1056 static int reader_skipspaces(xmlreader *reader)
1057 {
1058 encoded_buffer *buffer = &reader->input->buffer->utf16;
1059 const WCHAR *ptr = reader_get_ptr(reader);
1060 UINT start = reader_get_cur(reader);
1061
1062 while (is_wchar_space(*ptr))
1063 {
1064 if (*ptr == '\r')
1065 reader->pos = 0;
1066 else if (*ptr == '\n')
1067 {
1068 reader->line++;
1069 reader->pos = 0;
1070 }
1071 else
1072 reader->pos++;
1073
1074 buffer->cur++;
1075 ptr = reader_get_ptr(reader);
1076 }
1077
1078 return reader_get_cur(reader) - start;
1079 }
1080
1081 /* [26] VersionNum ::= '1.' [0-9]+ */
1082 static HRESULT reader_parse_versionnum(xmlreader *reader, strval *val)
1083 {
1084 static const WCHAR onedotW[] = {'1','.',0};
1085 WCHAR *ptr, *ptr2;
1086 UINT start;
1087
1088 if (reader_cmp(reader, onedotW)) return WC_E_XMLDECL;
1089
1090 start = reader_get_cur(reader);
1091 /* skip "1." */
1092 reader_skipn(reader, 2);
1093
1094 ptr2 = ptr = reader_get_ptr(reader);
1095 while (*ptr >= '0' && *ptr <= '9')
1096 {
1097 reader_skipn(reader, 1);
1098 ptr = reader_get_ptr(reader);
1099 }
1100
1101 if (ptr2 == ptr) return WC_E_DIGIT;
1102 reader_init_strvalue(start, reader_get_cur(reader)-start, val);
1103 TRACE("version=%s\n", debug_strval(reader, val));
1104 return S_OK;
1105 }
1106
1107 /* [25] Eq ::= S? '=' S? */
1108 static HRESULT reader_parse_eq(xmlreader *reader)
1109 {
1110 static const WCHAR eqW[] = {'=',0};
1111 reader_skipspaces(reader);
1112 if (reader_cmp(reader, eqW)) return WC_E_EQUAL;
1113 /* skip '=' */
1114 reader_skipn(reader, 1);
1115 reader_skipspaces(reader);
1116 return S_OK;
1117 }
1118
1119 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1120 static HRESULT reader_parse_versioninfo(xmlreader *reader)
1121 {
1122 static const WCHAR versionW[] = {'v','e','r','s','i','o','n',0};
1123 strval val, name;
1124 HRESULT hr;
1125
1126 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1127
1128 if (reader_cmp(reader, versionW)) return WC_E_XMLDECL;
1129 reader_init_strvalue(reader_get_cur(reader), 7, &name);
1130 /* skip 'version' */
1131 reader_skipn(reader, 7);
1132
1133 hr = reader_parse_eq(reader);
1134 if (FAILED(hr)) return hr;
1135
1136 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1137 return WC_E_QUOTE;
1138 /* skip "'"|'"' */
1139 reader_skipn(reader, 1);
1140
1141 hr = reader_parse_versionnum(reader, &val);
1142 if (FAILED(hr)) return hr;
1143
1144 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1145 return WC_E_QUOTE;
1146
1147 /* skip "'"|'"' */
1148 reader_skipn(reader, 1);
1149
1150 return reader_add_attr(reader, NULL, &name, &val);
1151 }
1152
1153 /* ([A-Za-z0-9._] | '-') */
1154 static inline BOOL is_wchar_encname(WCHAR ch)
1155 {
1156 return ((ch >= 'A' && ch <= 'Z') ||
1157 (ch >= 'a' && ch <= 'z') ||
1158 (ch >= '0' && ch <= '9') ||
1159 (ch == '.') || (ch == '_') ||
1160 (ch == '-'));
1161 }
1162
1163 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1164 static HRESULT reader_parse_encname(xmlreader *reader, strval *val)
1165 {
1166 WCHAR *start = reader_get_ptr(reader), *ptr;
1167 xml_encoding enc;
1168 int len;
1169
1170 if ((*start < 'A' || *start > 'Z') && (*start < 'a' || *start > 'z'))
1171 return WC_E_ENCNAME;
1172
1173 val->start = reader_get_cur(reader);
1174
1175 ptr = start;
1176 while (is_wchar_encname(*++ptr))
1177 ;
1178
1179 len = ptr - start;
1180 enc = parse_encoding_name(start, len);
1181 TRACE("encoding name %s\n", debugstr_wn(start, len));
1182 val->str = start;
1183 val->len = len;
1184
1185 if (enc == XmlEncoding_Unknown)
1186 return WC_E_ENCNAME;
1187
1188 /* skip encoding name */
1189 reader_skipn(reader, len);
1190 return S_OK;
1191 }
1192
1193 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1194 static HRESULT reader_parse_encdecl(xmlreader *reader)
1195 {
1196 static const WCHAR encodingW[] = {'e','n','c','o','d','i','n','g',0};
1197 strval name, val;
1198 HRESULT hr;
1199
1200 if (!reader_skipspaces(reader)) return S_FALSE;
1201
1202 if (reader_cmp(reader, encodingW)) return S_FALSE;
1203 name.str = reader_get_ptr(reader);
1204 name.start = reader_get_cur(reader);
1205 name.len = 8;
1206 /* skip 'encoding' */
1207 reader_skipn(reader, 8);
1208
1209 hr = reader_parse_eq(reader);
1210 if (FAILED(hr)) return hr;
1211
1212 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1213 return WC_E_QUOTE;
1214 /* skip "'"|'"' */
1215 reader_skipn(reader, 1);
1216
1217 hr = reader_parse_encname(reader, &val);
1218 if (FAILED(hr)) return hr;
1219
1220 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1221 return WC_E_QUOTE;
1222
1223 /* skip "'"|'"' */
1224 reader_skipn(reader, 1);
1225
1226 return reader_add_attr(reader, NULL, &name, &val);
1227 }
1228
1229 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1230 static HRESULT reader_parse_sddecl(xmlreader *reader)
1231 {
1232 static const WCHAR standaloneW[] = {'s','t','a','n','d','a','l','o','n','e',0};
1233 static const WCHAR yesW[] = {'y','e','s',0};
1234 static const WCHAR noW[] = {'n','o',0};
1235 strval name, val;
1236 UINT start;
1237 HRESULT hr;
1238
1239 if (!reader_skipspaces(reader)) return S_FALSE;
1240
1241 if (reader_cmp(reader, standaloneW)) return S_FALSE;
1242 reader_init_strvalue(reader_get_cur(reader), 10, &name);
1243 /* skip 'standalone' */
1244 reader_skipn(reader, 10);
1245
1246 hr = reader_parse_eq(reader);
1247 if (FAILED(hr)) return hr;
1248
1249 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1250 return WC_E_QUOTE;
1251 /* skip "'"|'"' */
1252 reader_skipn(reader, 1);
1253
1254 if (reader_cmp(reader, yesW) && reader_cmp(reader, noW))
1255 return WC_E_XMLDECL;
1256
1257 start = reader_get_cur(reader);
1258 /* skip 'yes'|'no' */
1259 reader_skipn(reader, reader_cmp(reader, yesW) ? 2 : 3);
1260 reader_init_strvalue(start, reader_get_cur(reader)-start, &val);
1261 TRACE("standalone=%s\n", debug_strval(reader, &val));
1262
1263 if (reader_cmp(reader, quoteW) && reader_cmp(reader, dblquoteW))
1264 return WC_E_QUOTE;
1265 /* skip "'"|'"' */
1266 reader_skipn(reader, 1);
1267
1268 return reader_add_attr(reader, NULL, &name, &val);
1269 }
1270
1271 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1272 static HRESULT reader_parse_xmldecl(xmlreader *reader)
1273 {
1274 static const WCHAR xmldeclW[] = {'<','?','x','m','l',' ',0};
1275 static const WCHAR declcloseW[] = {'?','>',0};
1276 HRESULT hr;
1277
1278 /* check if we have "<?xml " */
1279 if (reader_cmp(reader, xmldeclW)) return S_FALSE;
1280
1281 reader_skipn(reader, 5);
1282 hr = reader_parse_versioninfo(reader);
1283 if (FAILED(hr))
1284 return hr;
1285
1286 hr = reader_parse_encdecl(reader);
1287 if (FAILED(hr))
1288 return hr;
1289
1290 hr = reader_parse_sddecl(reader);
1291 if (FAILED(hr))
1292 return hr;
1293
1294 reader_skipspaces(reader);
1295 if (reader_cmp(reader, declcloseW)) return WC_E_XMLDECL;
1296 reader_skipn(reader, 2);
1297
1298 reader_inc_depth(reader);
1299 reader->nodetype = XmlNodeType_XmlDeclaration;
1300 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1301 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1302 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1303
1304 return S_OK;
1305 }
1306
1307 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1308 static HRESULT reader_parse_comment(xmlreader *reader)
1309 {
1310 WCHAR *ptr;
1311 UINT start;
1312
1313 if (reader->resumestate == XmlReadResumeState_Comment)
1314 {
1315 start = reader->resume[XmlReadResume_Body];
1316 ptr = reader_get_ptr(reader);
1317 }
1318 else
1319 {
1320 /* skip '<!--' */
1321 reader_skipn(reader, 4);
1322 reader_shrink(reader);
1323 ptr = reader_get_ptr(reader);
1324 start = reader_get_cur(reader);
1325 reader->nodetype = XmlNodeType_Comment;
1326 reader->resume[XmlReadResume_Body] = start;
1327 reader->resumestate = XmlReadResumeState_Comment;
1328 reader_set_strvalue(reader, StringValue_LocalName, NULL);
1329 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
1330 reader_set_strvalue(reader, StringValue_Value, NULL);
1331 }
1332
1333 /* will exit when there's no more data, it won't attempt to
1334 read more from stream */
1335 while (*ptr)
1336 {
1337 if (ptr[0] == '-')
1338 {
1339 if (ptr[1] == '-')
1340 {
1341 if (ptr[2] == '>')
1342 {
1343 strval value;
1344
1345 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1346 TRACE("%s\n", debug_strval(reader, &value));
1347
1348 /* skip rest of markup '->' */
1349 reader_skipn(reader, 3);
1350
1351 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1352 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1353 reader_set_strvalue(reader, StringValue_Value, &value);
1354 reader->resume[XmlReadResume_Body] = 0;
1355 reader->resumestate = XmlReadResumeState_Initial;
1356 return S_OK;
1357 }
1358 else
1359 return WC_E_COMMENT;
1360 }
1361 }
1362
1363 reader_skipn(reader, 1);
1364 ptr++;
1365 }
1366
1367 return S_OK;
1368 }
1369
1370 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1371 static inline BOOL is_char(WCHAR ch)
1372 {
1373 return (ch == '\t') || (ch == '\r') || (ch == '\n') ||
1374 (ch >= 0x20 && ch <= 0xd7ff) ||
1375 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1376 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1377 (ch >= 0xe000 && ch <= 0xfffd);
1378 }
1379
1380 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1381 static inline BOOL is_pubchar(WCHAR ch)
1382 {
1383 return (ch == ' ') ||
1384 (ch >= 'a' && ch <= 'z') ||
1385 (ch >= 'A' && ch <= 'Z') ||
1386 (ch >= '0' && ch <= '9') ||
1387 (ch >= '-' && ch <= ';') || /* '()*+,-./:; */
1388 (ch == '=') || (ch == '?') ||
1389 (ch == '@') || (ch == '!') ||
1390 (ch >= '#' && ch <= '%') || /* #$% */
1391 (ch == '_') || (ch == '\r') || (ch == '\n');
1392 }
1393
1394 static inline BOOL is_namestartchar(WCHAR ch)
1395 {
1396 return (ch == ':') || (ch >= 'A' && ch <= 'Z') ||
1397 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1398 (ch >= 0xc0 && ch <= 0xd6) ||
1399 (ch >= 0xd8 && ch <= 0xf6) ||
1400 (ch >= 0xf8 && ch <= 0x2ff) ||
1401 (ch >= 0x370 && ch <= 0x37d) ||
1402 (ch >= 0x37f && ch <= 0x1fff) ||
1403 (ch >= 0x200c && ch <= 0x200d) ||
1404 (ch >= 0x2070 && ch <= 0x218f) ||
1405 (ch >= 0x2c00 && ch <= 0x2fef) ||
1406 (ch >= 0x3001 && ch <= 0xd7ff) ||
1407 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1408 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1409 (ch >= 0xf900 && ch <= 0xfdcf) ||
1410 (ch >= 0xfdf0 && ch <= 0xfffd);
1411 }
1412
1413 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1414 static inline BOOL is_ncnamechar(WCHAR ch)
1415 {
1416 return (ch >= 'A' && ch <= 'Z') ||
1417 (ch == '_') || (ch >= 'a' && ch <= 'z') ||
1418 (ch == '-') || (ch == '.') ||
1419 (ch >= '0' && ch <= '9') ||
1420 (ch == 0xb7) ||
1421 (ch >= 0xc0 && ch <= 0xd6) ||
1422 (ch >= 0xd8 && ch <= 0xf6) ||
1423 (ch >= 0xf8 && ch <= 0x2ff) ||
1424 (ch >= 0x300 && ch <= 0x36f) ||
1425 (ch >= 0x370 && ch <= 0x37d) ||
1426 (ch >= 0x37f && ch <= 0x1fff) ||
1427 (ch >= 0x200c && ch <= 0x200d) ||
1428 (ch >= 0x203f && ch <= 0x2040) ||
1429 (ch >= 0x2070 && ch <= 0x218f) ||
1430 (ch >= 0x2c00 && ch <= 0x2fef) ||
1431 (ch >= 0x3001 && ch <= 0xd7ff) ||
1432 (ch >= 0xd800 && ch <= 0xdbff) || /* high surrogate */
1433 (ch >= 0xdc00 && ch <= 0xdfff) || /* low surrogate */
1434 (ch >= 0xf900 && ch <= 0xfdcf) ||
1435 (ch >= 0xfdf0 && ch <= 0xfffd);
1436 }
1437
1438 static inline BOOL is_namechar(WCHAR ch)
1439 {
1440 return (ch == ':') || is_ncnamechar(ch);
1441 }
1442
1443 static XmlNodeType reader_get_nodetype(const xmlreader *reader)
1444 {
1445 /* When we're on attribute always return attribute type, container node type is kept.
1446 Note that container is not necessarily an element, and attribute doesn't mean it's
1447 an attribute in XML spec terms. */
1448 return reader->attr ? XmlNodeType_Attribute : reader->nodetype;
1449 }
1450
1451 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1452 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1453 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1454 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1455 [5] Name ::= NameStartChar (NameChar)* */
1456 static HRESULT reader_parse_name(xmlreader *reader, strval *name)
1457 {
1458 WCHAR *ptr;
1459 UINT start;
1460
1461 if (reader->resume[XmlReadResume_Name])
1462 {
1463 start = reader->resume[XmlReadResume_Name];
1464 ptr = reader_get_ptr(reader);
1465 }
1466 else
1467 {
1468 ptr = reader_get_ptr(reader);
1469 start = reader_get_cur(reader);
1470 if (!is_namestartchar(*ptr)) return WC_E_NAMECHARACTER;
1471 }
1472
1473 while (is_namechar(*ptr))
1474 {
1475 reader_skipn(reader, 1);
1476 ptr = reader_get_ptr(reader);
1477 }
1478
1479 if (is_reader_pending(reader))
1480 {
1481 reader->resume[XmlReadResume_Name] = start;
1482 return E_PENDING;
1483 }
1484 else
1485 reader->resume[XmlReadResume_Name] = 0;
1486
1487 reader_init_strvalue(start, reader_get_cur(reader)-start, name);
1488 TRACE("name %s:%d\n", debug_strval(reader, name), name->len);
1489
1490 return S_OK;
1491 }
1492
1493 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1494 static HRESULT reader_parse_pitarget(xmlreader *reader, strval *target)
1495 {
1496 static const WCHAR xmlW[] = {'x','m','l'};
1497 static const strval xmlval = { (WCHAR*)xmlW, 3 };
1498 strval name;
1499 WCHAR *ptr;
1500 HRESULT hr;
1501 UINT i;
1502
1503 hr = reader_parse_name(reader, &name);
1504 if (FAILED(hr)) return is_reader_pending(reader) ? E_PENDING : WC_E_PI;
1505
1506 /* now that we got name check for illegal content */
1507 if (strval_eq(reader, &name, &xmlval))
1508 return WC_E_LEADINGXML;
1509
1510 /* PITarget can't be a qualified name */
1511 ptr = reader_get_strptr(reader, &name);
1512 for (i = 0; i < name.len; i++)
1513 if (ptr[i] == ':')
1514 return i ? NC_E_NAMECOLON : WC_E_PI;
1515
1516 TRACE("pitarget %s:%d\n", debug_strval(reader, &name), name.len);
1517 *target = name;
1518 return S_OK;
1519 }
1520
1521 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1522 static HRESULT reader_parse_pi(xmlreader *reader)
1523 {
1524 strval target;
1525 WCHAR *ptr;
1526 UINT start;
1527 HRESULT hr;
1528
1529 switch (reader->resumestate)
1530 {
1531 case XmlReadResumeState_Initial:
1532 /* skip '<?' */
1533 reader_skipn(reader, 2);
1534 reader_shrink(reader);
1535 reader->resumestate = XmlReadResumeState_PITarget;
1536 case XmlReadResumeState_PITarget:
1537 hr = reader_parse_pitarget(reader, &target);
1538 if (FAILED(hr)) return hr;
1539 reader_set_strvalue(reader, StringValue_LocalName, &target);
1540 reader_set_strvalue(reader, StringValue_QualifiedName, &target);
1541 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1542 reader->resumestate = XmlReadResumeState_PIBody;
1543 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1544 default:
1545 ;
1546 }
1547
1548 start = reader->resume[XmlReadResume_Body];
1549 ptr = reader_get_ptr(reader);
1550 while (*ptr)
1551 {
1552 if (ptr[0] == '?')
1553 {
1554 if (ptr[1] == '>')
1555 {
1556 UINT cur = reader_get_cur(reader);
1557 strval value;
1558
1559 /* strip all leading whitespace chars */
1560 while (start < cur)
1561 {
1562 ptr = reader_get_ptr2(reader, start);
1563 if (!is_wchar_space(*ptr)) break;
1564 start++;
1565 }
1566
1567 reader_init_strvalue(start, cur-start, &value);
1568
1569 /* skip '?>' */
1570 reader_skipn(reader, 2);
1571 TRACE("%s\n", debug_strval(reader, &value));
1572 reader->nodetype = XmlNodeType_ProcessingInstruction;
1573 reader->resumestate = XmlReadResumeState_Initial;
1574 reader->resume[XmlReadResume_Body] = 0;
1575 reader_set_strvalue(reader, StringValue_Value, &value);
1576 return S_OK;
1577 }
1578 }
1579
1580 reader_skipn(reader, 1);
1581 ptr = reader_get_ptr(reader);
1582 }
1583
1584 return S_OK;
1585 }
1586
1587 /* This one is used to parse significant whitespace nodes, like in Misc production */
1588 static HRESULT reader_parse_whitespace(xmlreader *reader)
1589 {
1590 switch (reader->resumestate)
1591 {
1592 case XmlReadResumeState_Initial:
1593 reader_shrink(reader);
1594 reader->resumestate = XmlReadResumeState_Whitespace;
1595 reader->resume[XmlReadResume_Body] = reader_get_cur(reader);
1596 reader->nodetype = XmlNodeType_Whitespace;
1597 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
1598 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
1599 reader_set_strvalue(reader, StringValue_Value, &strval_empty);
1600 /* fallthrough */
1601 case XmlReadResumeState_Whitespace:
1602 {
1603 strval value;
1604 UINT start;
1605
1606 reader_skipspaces(reader);
1607 if (is_reader_pending(reader)) return S_OK;
1608
1609 start = reader->resume[XmlReadResume_Body];
1610 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
1611 reader_set_strvalue(reader, StringValue_Value, &value);
1612 TRACE("%s\n", debug_strval(reader, &value));
1613 reader->resumestate = XmlReadResumeState_Initial;
1614 }
1615 default:
1616 ;
1617 }
1618
1619 return S_OK;
1620 }
1621
1622 /* [27] Misc ::= Comment | PI | S */
1623 static HRESULT reader_parse_misc(xmlreader *reader)
1624 {
1625 HRESULT hr = S_FALSE;
1626
1627 if (reader->resumestate != XmlReadResumeState_Initial)
1628 {
1629 hr = reader_more(reader);
1630 if (FAILED(hr)) return hr;
1631
1632 /* finish current node */
1633 switch (reader->resumestate)
1634 {
1635 case XmlReadResumeState_PITarget:
1636 case XmlReadResumeState_PIBody:
1637 return reader_parse_pi(reader);
1638 case XmlReadResumeState_Comment:
1639 return reader_parse_comment(reader);
1640 case XmlReadResumeState_Whitespace:
1641 return reader_parse_whitespace(reader);
1642 default:
1643 ERR("unknown resume state %d\n", reader->resumestate);
1644 }
1645 }
1646
1647 while (1)
1648 {
1649 const WCHAR *cur = reader_get_ptr(reader);
1650
1651 if (is_wchar_space(*cur))
1652 hr = reader_parse_whitespace(reader);
1653 else if (!reader_cmp(reader, commentW))
1654 hr = reader_parse_comment(reader);
1655 else if (!reader_cmp(reader, piW))
1656 hr = reader_parse_pi(reader);
1657 else
1658 break;
1659
1660 if (hr != S_FALSE) return hr;
1661 }
1662
1663 return hr;
1664 }
1665
1666 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1667 static HRESULT reader_parse_sys_literal(xmlreader *reader, strval *literal)
1668 {
1669 WCHAR *cur = reader_get_ptr(reader), quote;
1670 UINT start;
1671
1672 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1673
1674 quote = *cur;
1675 reader_skipn(reader, 1);
1676
1677 cur = reader_get_ptr(reader);
1678 start = reader_get_cur(reader);
1679 while (is_char(*cur) && *cur != quote)
1680 {
1681 reader_skipn(reader, 1);
1682 cur = reader_get_ptr(reader);
1683 }
1684 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1685 if (*cur == quote) reader_skipn(reader, 1);
1686
1687 TRACE("%s\n", debug_strval(reader, literal));
1688 return S_OK;
1689 }
1690
1691 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1692 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1693 static HRESULT reader_parse_pub_literal(xmlreader *reader, strval *literal)
1694 {
1695 WCHAR *cur = reader_get_ptr(reader), quote;
1696 UINT start;
1697
1698 if (*cur != '"' && *cur != '\'') return WC_E_QUOTE;
1699
1700 quote = *cur;
1701 reader_skipn(reader, 1);
1702
1703 start = reader_get_cur(reader);
1704 cur = reader_get_ptr(reader);
1705 while (is_pubchar(*cur) && *cur != quote)
1706 {
1707 reader_skipn(reader, 1);
1708 cur = reader_get_ptr(reader);
1709 }
1710 reader_init_strvalue(start, reader_get_cur(reader)-start, literal);
1711 if (*cur == quote) reader_skipn(reader, 1);
1712
1713 TRACE("%s\n", debug_strval(reader, literal));
1714 return S_OK;
1715 }
1716
1717 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1718 static HRESULT reader_parse_externalid(xmlreader *reader)
1719 {
1720 static WCHAR systemW[] = {'S','Y','S','T','E','M',0};
1721 static WCHAR publicW[] = {'P','U','B','L','I','C',0};
1722 strval name, sys;
1723 HRESULT hr;
1724 int cnt;
1725
1726 if (!reader_cmp(reader, publicW)) {
1727 strval pub;
1728
1729 /* public id */
1730 reader_skipn(reader, 6);
1731 cnt = reader_skipspaces(reader);
1732 if (!cnt) return WC_E_WHITESPACE;
1733
1734 hr = reader_parse_pub_literal(reader, &pub);
1735 if (FAILED(hr)) return hr;
1736
1737 reader_init_cstrvalue(publicW, strlenW(publicW), &name);
1738 hr = reader_add_attr(reader, NULL, &name, &pub);
1739 if (FAILED(hr)) return hr;
1740
1741 cnt = reader_skipspaces(reader);
1742 if (!cnt) return S_OK;
1743
1744 /* optional system id */
1745 hr = reader_parse_sys_literal(reader, &sys);
1746 if (FAILED(hr)) return S_OK;
1747
1748 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1749 hr = reader_add_attr(reader, NULL, &name, &sys);
1750 if (FAILED(hr)) return hr;
1751
1752 return S_OK;
1753 } else if (!reader_cmp(reader, systemW)) {
1754 /* system id */
1755 reader_skipn(reader, 6);
1756 cnt = reader_skipspaces(reader);
1757 if (!cnt) return WC_E_WHITESPACE;
1758
1759 hr = reader_parse_sys_literal(reader, &sys);
1760 if (FAILED(hr)) return hr;
1761
1762 reader_init_cstrvalue(systemW, strlenW(systemW), &name);
1763 return reader_add_attr(reader, NULL, &name, &sys);
1764 }
1765
1766 return S_FALSE;
1767 }
1768
1769 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1770 static HRESULT reader_parse_dtd(xmlreader *reader)
1771 {
1772 static const WCHAR doctypeW[] = {'<','!','D','O','C','T','Y','P','E',0};
1773 strval name;
1774 WCHAR *cur;
1775 HRESULT hr;
1776
1777 /* check if we have "<!DOCTYPE" */
1778 if (reader_cmp(reader, doctypeW)) return S_FALSE;
1779 reader_shrink(reader);
1780
1781 /* DTD processing is not allowed by default */
1782 if (reader->dtdmode == DtdProcessing_Prohibit) return WC_E_DTDPROHIBITED;
1783
1784 reader_skipn(reader, 9);
1785 if (!reader_skipspaces(reader)) return WC_E_WHITESPACE;
1786
1787 /* name */
1788 hr = reader_parse_name(reader, &name);
1789 if (FAILED(hr)) return WC_E_DECLDOCTYPE;
1790
1791 reader_skipspaces(reader);
1792
1793 hr = reader_parse_externalid(reader);
1794 if (FAILED(hr)) return hr;
1795
1796 reader_skipspaces(reader);
1797
1798 cur = reader_get_ptr(reader);
1799 if (*cur != '>')
1800 {
1801 FIXME("internal subset parsing not implemented\n");
1802 return E_NOTIMPL;
1803 }
1804
1805 /* skip '>' */
1806 reader_skipn(reader, 1);
1807
1808 reader->nodetype = XmlNodeType_DocumentType;
1809 reader_set_strvalue(reader, StringValue_LocalName, &name);
1810 reader_set_strvalue(reader, StringValue_QualifiedName, &name);
1811
1812 return S_OK;
1813 }
1814
1815 /* [11 NS] LocalPart ::= NCName */
1816 static HRESULT reader_parse_local(xmlreader *reader, strval *local)
1817 {
1818 WCHAR *ptr;
1819 UINT start;
1820
1821 if (reader->resume[XmlReadResume_Local])
1822 {
1823 start = reader->resume[XmlReadResume_Local];
1824 ptr = reader_get_ptr(reader);
1825 }
1826 else
1827 {
1828 ptr = reader_get_ptr(reader);
1829 start = reader_get_cur(reader);
1830 }
1831
1832 while (is_ncnamechar(*ptr))
1833 {
1834 reader_skipn(reader, 1);
1835 ptr = reader_get_ptr(reader);
1836 }
1837
1838 if (is_reader_pending(reader))
1839 {
1840 reader->resume[XmlReadResume_Local] = start;
1841 return E_PENDING;
1842 }
1843 else
1844 reader->resume[XmlReadResume_Local] = 0;
1845
1846 reader_init_strvalue(start, reader_get_cur(reader)-start, local);
1847
1848 return S_OK;
1849 }
1850
1851 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1852 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1853 [9 NS] UnprefixedName ::= LocalPart
1854 [10 NS] Prefix ::= NCName */
1855 static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *local, strval *qname)
1856 {
1857 WCHAR *ptr;
1858 UINT start;
1859 HRESULT hr;
1860
1861 if (reader->resume[XmlReadResume_Name])
1862 {
1863 start = reader->resume[XmlReadResume_Name];
1864 ptr = reader_get_ptr(reader);
1865 }
1866 else
1867 {
1868 ptr = reader_get_ptr(reader);
1869 start = reader_get_cur(reader);
1870 reader->resume[XmlReadResume_Name] = start;
1871 if (!is_ncnamechar(*ptr)) return NC_E_QNAMECHARACTER;
1872 }
1873
1874 if (reader->resume[XmlReadResume_Local])
1875 {
1876 hr = reader_parse_local(reader, local);
1877 if (FAILED(hr)) return hr;
1878
1879 reader_init_strvalue(reader->resume[XmlReadResume_Name],
1880 local->start - reader->resume[XmlReadResume_Name] - 1,
1881 prefix);
1882 }
1883 else
1884 {
1885 /* skip prefix part */
1886 while (is_ncnamechar(*ptr))
1887 {
1888 reader_skipn(reader, 1);
1889 ptr = reader_get_ptr(reader);
1890 }
1891
1892 if (is_reader_pending(reader)) return E_PENDING;
1893
1894 /* got a qualified name */
1895 if (*ptr == ':')
1896 {
1897 reader_init_strvalue(start, reader_get_cur(reader)-start, prefix);
1898
1899 /* skip ':' */
1900 reader_skipn(reader, 1);
1901 hr = reader_parse_local(reader, local);
1902 if (FAILED(hr)) return hr;
1903 }
1904 else
1905 {
1906 reader_init_strvalue(reader->resume[XmlReadResume_Name], reader_get_cur(reader)-reader->resume[XmlReadResume_Name], local);
1907 reader_init_strvalue(0, 0, prefix);
1908 }
1909 }
1910
1911 if (prefix->len)
1912 TRACE("qname %s:%s\n", debug_strval(reader, prefix), debug_strval(reader, local));
1913 else
1914 TRACE("ncname %s\n", debug_strval(reader, local));
1915
1916 reader_init_strvalue(prefix->len ? prefix->start : local->start,
1917 /* count ':' too */
1918 (prefix->len ? prefix->len + 1 : 0) + local->len,
1919 qname);
1920
1921 reader->resume[XmlReadResume_Name] = 0;
1922 reader->resume[XmlReadResume_Local] = 0;
1923
1924 return S_OK;
1925 }
1926
1927 /* Applies normalization rules to a single char, used for attribute values.
1928
1929 Rules include 2 steps:
1930
1931 1) replacing \r\n with a single \n;
1932 2) replacing all whitespace chars with ' '.
1933
1934 */
1935 static void reader_normalize_space(xmlreader *reader, WCHAR *ptr)
1936 {
1937 encoded_buffer *buffer = &reader->input->buffer->utf16;
1938
1939 if (!is_wchar_space(*ptr)) return;
1940
1941 if (*ptr == '\r' && *(ptr+1) == '\n')
1942 {
1943 int len = buffer->written - ((char*)ptr - buffer->data) - 2*sizeof(WCHAR);
1944 memmove(ptr+1, ptr+2, len);
1945 }
1946 *ptr = ' ';
1947 }
1948
1949 static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name)
1950 {
1951 static const WCHAR entltW[] = {'l','t'};
1952 static const WCHAR entgtW[] = {'g','t'};
1953 static const WCHAR entampW[] = {'a','m','p'};
1954 static const WCHAR entaposW[] = {'a','p','o','s'};
1955 static const WCHAR entquotW[] = {'q','u','o','t'};
1956 static const strval lt = { (WCHAR*)entltW, 2 };
1957 static const strval gt = { (WCHAR*)entgtW, 2 };
1958 static const strval amp = { (WCHAR*)entampW, 3 };
1959 static const strval apos = { (WCHAR*)entaposW, 4 };
1960 static const strval quot = { (WCHAR*)entquotW, 4 };
1961 WCHAR *str = reader_get_strptr(reader, name);
1962
1963 switch (*str)
1964 {
1965 case 'l':
1966 if (strval_eq(reader, name, &lt)) return '<';
1967 break;
1968 case 'g':
1969 if (strval_eq(reader, name, &gt)) return '>';
1970 break;
1971 case 'a':
1972 if (strval_eq(reader, name, &amp))
1973 return '&';
1974 else if (strval_eq(reader, name, &apos))
1975 return '\'';
1976 break;
1977 case 'q':
1978 if (strval_eq(reader, name, &quot)) return '\"';
1979 break;
1980 default:
1981 ;
1982 }
1983
1984 return 0;
1985 }
1986
1987 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1988 [67] Reference ::= EntityRef | CharRef
1989 [68] EntityRef ::= '&' Name ';' */
1990 static HRESULT reader_parse_reference(xmlreader *reader)
1991 {
1992 encoded_buffer *buffer = &reader->input->buffer->utf16;
1993 WCHAR *start = reader_get_ptr(reader), *ptr;
1994 UINT cur = reader_get_cur(reader);
1995 WCHAR ch = 0;
1996 int len;
1997
1998 /* skip '&' */
1999 reader_skipn(reader, 1);
2000 ptr = reader_get_ptr(reader);
2001
2002 if (*ptr == '#')
2003 {
2004 reader_skipn(reader, 1);
2005 ptr = reader_get_ptr(reader);
2006
2007 /* hex char or decimal */
2008 if (*ptr == 'x')
2009 {
2010 reader_skipn(reader, 1);
2011 ptr = reader_get_ptr(reader);
2012
2013 while (*ptr != ';')
2014 {
2015 if ((*ptr >= '0' && *ptr <= '9'))
2016 ch = ch*16 + *ptr - '0';
2017 else if ((*ptr >= 'a' && *ptr <= 'f'))
2018 ch = ch*16 + *ptr - 'a' + 10;
2019 else if ((*ptr >= 'A' && *ptr <= 'F'))
2020 ch = ch*16 + *ptr - 'A' + 10;
2021 else
2022 return ch ? WC_E_SEMICOLON : WC_E_HEXDIGIT;
2023 reader_skipn(reader, 1);
2024 ptr = reader_get_ptr(reader);
2025 }
2026 }
2027 else
2028 {
2029 while (*ptr != ';')
2030 {
2031 if ((*ptr >= '0' && *ptr <= '9'))
2032 {
2033 ch = ch*10 + *ptr - '0';
2034 reader_skipn(reader, 1);
2035 ptr = reader_get_ptr(reader);
2036 }
2037 else
2038 return ch ? WC_E_SEMICOLON : WC_E_DIGIT;
2039 }
2040 }
2041
2042 if (!is_char(ch)) return WC_E_XMLCHARACTER;
2043
2044 /* normalize */
2045 if (is_wchar_space(ch)) ch = ' ';
2046
2047 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
2048 memmove(start+1, ptr+1, len);
2049 buffer->cur = cur + 1;
2050
2051 *start = ch;
2052 }
2053 else
2054 {
2055 strval name;
2056 HRESULT hr;
2057
2058 hr = reader_parse_name(reader, &name);
2059 if (FAILED(hr)) return hr;
2060
2061 ptr = reader_get_ptr(reader);
2062 if (*ptr != ';') return WC_E_SEMICOLON;
2063
2064 /* predefined entities resolve to a single character */
2065 ch = get_predefined_entity(reader, &name);
2066 if (ch)
2067 {
2068 len = buffer->written - ((char*)ptr - buffer->data) - sizeof(WCHAR);
2069 memmove(start+1, ptr+1, len);
2070 buffer->cur = cur + 1;
2071
2072 *start = ch;
2073 }
2074 else
2075 {
2076 FIXME("undeclared entity %s\n", debug_strval(reader, &name));
2077 return WC_E_UNDECLAREDENTITY;
2078 }
2079
2080 }
2081
2082 return S_OK;
2083 }
2084
2085 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
2086 static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
2087 {
2088 WCHAR *ptr, quote;
2089 UINT start;
2090
2091 ptr = reader_get_ptr(reader);
2092
2093 /* skip opening quote */
2094 quote = *ptr;
2095 if (quote != '\"' && quote != '\'') return WC_E_QUOTE;
2096 reader_skipn(reader, 1);
2097
2098 ptr = reader_get_ptr(reader);
2099 start = reader_get_cur(reader);
2100 while (*ptr)
2101 {
2102 if (*ptr == '<') return WC_E_LESSTHAN;
2103
2104 if (*ptr == quote)
2105 {
2106 reader_init_strvalue(start, reader_get_cur(reader)-start, value);
2107 /* skip closing quote */
2108 reader_skipn(reader, 1);
2109 return S_OK;
2110 }
2111
2112 if (*ptr == '&')
2113 {
2114 HRESULT hr = reader_parse_reference(reader);
2115 if (FAILED(hr)) return hr;
2116 }
2117 else
2118 {
2119 reader_normalize_space(reader, ptr);
2120 reader_skipn(reader, 1);
2121 }
2122 ptr = reader_get_ptr(reader);
2123 }
2124
2125 return WC_E_QUOTE;
2126 }
2127
2128 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2129 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2130 [3 NS] DefaultAttName ::= 'xmlns'
2131 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2132 static HRESULT reader_parse_attribute(xmlreader *reader)
2133 {
2134 strval prefix, local, qname, value;
2135 BOOL ns = FALSE, nsdef = FALSE;
2136 HRESULT hr;
2137
2138 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2139 if (FAILED(hr)) return hr;
2140
2141 if (strval_eq(reader, &prefix, &strval_xmlns))
2142 ns = TRUE;
2143
2144 if (strval_eq(reader, &qname, &strval_xmlns))
2145 ns = nsdef = TRUE;
2146
2147 hr = reader_parse_eq(reader);
2148 if (FAILED(hr)) return hr;
2149
2150 hr = reader_parse_attvalue(reader, &value);
2151 if (FAILED(hr)) return hr;
2152
2153 if (ns)
2154 reader_push_ns(reader, nsdef ? &strval_xmlns : &local, &value, nsdef);
2155
2156 TRACE("%s=%s\n", debug_strval(reader, &local), debug_strval(reader, &value));
2157 return reader_add_attr(reader, &prefix, &local, &value);
2158 }
2159
2160 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2161 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2162 static HRESULT reader_parse_stag(xmlreader *reader, strval *prefix, strval *local, strval *qname, int *empty)
2163 {
2164 HRESULT hr;
2165
2166 hr = reader_parse_qname(reader, prefix, local, qname);
2167 if (FAILED(hr)) return hr;
2168
2169 while (1)
2170 {
2171 static const WCHAR endW[] = {'/','>',0};
2172
2173 reader_skipspaces(reader);
2174
2175 /* empty element */
2176 if ((*empty = !reader_cmp(reader, endW)))
2177 {
2178 /* skip '/>' */
2179 reader_skipn(reader, 2);
2180 reader->is_empty_element = TRUE;
2181 reader->empty_element.prefix = *prefix;
2182 reader->empty_element.localname = *local;
2183 reader->empty_element.qname = *qname;
2184 reader_mark_ns_nodes(reader, &reader->empty_element);
2185 return S_OK;
2186 }
2187
2188 /* got a start tag */
2189 if (!reader_cmp(reader, gtW))
2190 {
2191 /* skip '>' */
2192 reader_skipn(reader, 1);
2193 return reader_push_element(reader, prefix, local, qname);
2194 }
2195
2196 hr = reader_parse_attribute(reader);
2197 if (FAILED(hr)) return hr;
2198 }
2199
2200 return S_OK;
2201 }
2202
2203 /* [39] element ::= EmptyElemTag | STag content ETag */
2204 static HRESULT reader_parse_element(xmlreader *reader)
2205 {
2206 HRESULT hr;
2207
2208 switch (reader->resumestate)
2209 {
2210 case XmlReadResumeState_Initial:
2211 /* check if we are really on element */
2212 if (reader_cmp(reader, ltW)) return S_FALSE;
2213
2214 /* skip '<' */
2215 reader_skipn(reader, 1);
2216
2217 reader_shrink(reader);
2218 reader->resumestate = XmlReadResumeState_STag;
2219 case XmlReadResumeState_STag:
2220 {
2221 strval qname, prefix, local;
2222 int empty = 0;
2223
2224 /* this handles empty elements too */
2225 hr = reader_parse_stag(reader, &prefix, &local, &qname, &empty);
2226 if (FAILED(hr)) return hr;
2227
2228 /* FIXME: need to check for defined namespace to reject invalid prefix */
2229
2230 /* if we got empty element and stack is empty go straight to Misc */
2231 if (empty && list_empty(&reader->elements))
2232 reader->instate = XmlReadInState_MiscEnd;
2233 else
2234 reader->instate = XmlReadInState_Content;
2235
2236 reader->nodetype = XmlNodeType_Element;
2237 reader->resumestate = XmlReadResumeState_Initial;
2238 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2239 reader_set_strvalue(reader, StringValue_LocalName, &local);
2240 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2241 break;
2242 }
2243 default:
2244 hr = E_FAIL;
2245 }
2246
2247 return hr;
2248 }
2249
2250 /* [13 NS] ETag ::= '</' QName S? '>' */
2251 static HRESULT reader_parse_endtag(xmlreader *reader)
2252 {
2253 strval prefix, local, qname;
2254 struct element *elem;
2255 HRESULT hr;
2256
2257 /* skip '</' */
2258 reader_skipn(reader, 2);
2259
2260 hr = reader_parse_qname(reader, &prefix, &local, &qname);
2261 if (FAILED(hr)) return hr;
2262
2263 reader_skipspaces(reader);
2264
2265 if (reader_cmp(reader, gtW)) return WC_E_GREATERTHAN;
2266
2267 /* skip '>' */
2268 reader_skipn(reader, 1);
2269
2270 /* Element stack should never be empty at this point, cause we shouldn't get to
2271 content parsing if it's empty. */
2272 elem = LIST_ENTRY(list_head(&reader->elements), struct element, entry);
2273 if (!strval_eq(reader, &elem->qname, &qname)) return WC_E_ELEMENTMATCH;
2274
2275 reader->nodetype = XmlNodeType_EndElement;
2276 reader_set_strvalue(reader, StringValue_Prefix, &prefix);
2277 reader_set_strvalue(reader, StringValue_LocalName, &local);
2278 reader_set_strvalue(reader, StringValue_QualifiedName, &qname);
2279
2280 return S_OK;
2281 }
2282
2283 /* [18] CDSect ::= CDStart CData CDEnd
2284 [19] CDStart ::= '<![CDATA['
2285 [20] CData ::= (Char* - (Char* ']]>' Char*))
2286 [21] CDEnd ::= ']]>' */
2287 static HRESULT reader_parse_cdata(xmlreader *reader)
2288 {
2289 WCHAR *ptr;
2290 UINT start;
2291
2292 if (reader->resumestate == XmlReadResumeState_CDATA)
2293 {
2294 start = reader->resume[XmlReadResume_Body];
2295 ptr = reader_get_ptr(reader);
2296 }
2297 else
2298 {
2299 /* skip markup '<![CDATA[' */
2300 reader_skipn(reader, 9);
2301 reader_shrink(reader);
2302 ptr = reader_get_ptr(reader);
2303 start = reader_get_cur(reader);
2304 reader->nodetype = XmlNodeType_CDATA;
2305 reader->resume[XmlReadResume_Body] = start;
2306 reader->resumestate = XmlReadResumeState_CDATA;
2307 reader_set_strvalue(reader, StringValue_LocalName, NULL);
2308 reader_set_strvalue(reader, StringValue_QualifiedName, NULL);
2309 reader_set_strvalue(reader, StringValue_Value, NULL);
2310 }
2311
2312 while (*ptr)
2313 {
2314 if (*ptr == ']' && *(ptr+1) == ']' && *(ptr+2) == '>')
2315 {
2316 strval value;
2317
2318 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2319
2320 /* skip ']]>' */
2321 reader_skipn(reader, 3);
2322 TRACE("%s\n", debug_strval(reader, &value));
2323
2324 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2325 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2326 reader_set_strvalue(reader, StringValue_Value, &value);
2327 reader->resume[XmlReadResume_Body] = 0;
2328 reader->resumestate = XmlReadResumeState_Initial;
2329 return S_OK;
2330 }
2331 else
2332 {
2333 /* Value normalization is not fully implemented, rules are:
2334
2335 - single '\r' -> '\n';
2336 - sequence '\r\n' -> '\n', in this case value length changes;
2337 */
2338 if (*ptr == '\r') *ptr = '\n';
2339 reader_skipn(reader, 1);
2340 ptr++;
2341 }
2342 }
2343
2344 return S_OK;
2345 }
2346
2347 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2348 static HRESULT reader_parse_chardata(xmlreader *reader)
2349 {
2350 WCHAR *ptr;
2351 UINT start;
2352
2353 if (reader->resumestate == XmlReadResumeState_CharData)
2354 {
2355 start = reader->resume[XmlReadResume_Body];
2356 ptr = reader_get_ptr(reader);
2357 }
2358 else
2359 {
2360 reader_shrink(reader);
2361 ptr = reader_get_ptr(reader);
2362 start = reader_get_cur(reader);
2363 /* There's no text */
2364 if (!*ptr || *ptr == '<') return S_OK;
2365 reader->nodetype = is_wchar_space(*ptr) ? XmlNodeType_Whitespace : XmlNodeType_Text;
2366 reader->resume[XmlReadResume_Body] = start;
2367 reader->resumestate = XmlReadResumeState_CharData;
2368 reader_set_strvalue(reader, StringValue_LocalName, &strval_empty);
2369 reader_set_strvalue(reader, StringValue_QualifiedName, &strval_empty);
2370 reader_set_strvalue(reader, StringValue_Value, NULL);
2371 }
2372
2373 while (*ptr)
2374 {
2375 static const WCHAR ampW[] = {'&',0};
2376
2377 /* CDATA closing sequence ']]>' is not allowed */
2378 if (ptr[0] == ']' && ptr[1] == ']' && ptr[2] == '>')
2379 return WC_E_CDSECTEND;
2380
2381 /* Found next markup part */
2382 if (ptr[0] == '<')
2383 {
2384 strval value;
2385
2386 reader_init_strvalue(start, reader_get_cur(reader)-start, &value);
2387 reader_set_strvalue(reader, StringValue_Value, &value);
2388 reader->resume[XmlReadResume_Body] = 0;
2389 reader->resumestate = XmlReadResumeState_Initial;
2390 return S_OK;
2391 }
2392
2393 /* this covers a case when text has leading whitespace chars */
2394 if (!is_wchar_space(*ptr)) reader->nodetype = XmlNodeType_Text;
2395
2396 if (!reader_cmp(reader, ampW))
2397 reader_parse_reference(reader);
2398 else
2399 reader_skipn(reader, 1);
2400
2401 ptr = reader_get_ptr(reader);
2402 }
2403
2404 return S_OK;
2405 }
2406
2407 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2408 static HRESULT reader_parse_content(xmlreader *reader)
2409 {
2410 static const WCHAR cdstartW[] = {'<','!','[','C','D','A','T','A','[',0};
2411 static const WCHAR etagW[] = {'<','/',0};
2412
2413 if (reader->resumestate != XmlReadResumeState_Initial)
2414 {
2415 switch (reader->resumestate)
2416 {
2417 case XmlReadResumeState_CDATA:
2418 return reader_parse_cdata(reader);
2419 case XmlReadResumeState_Comment:
2420 return reader_parse_comment(reader);
2421 case XmlReadResumeState_PIBody:
2422 case XmlReadResumeState_PITarget:
2423 return reader_parse_pi(reader);
2424 case XmlReadResumeState_CharData:
2425 return reader_parse_chardata(reader);
2426 default:
2427 ERR("unknown resume state %d\n", reader->resumestate);
2428 }
2429 }
2430
2431 reader_shrink(reader);
2432
2433 /* handle end tag here, it indicates end of content as well */
2434 if (!reader_cmp(reader, etagW))
2435 return reader_parse_endtag(reader);
2436
2437 if (!reader_cmp(reader, commentW))
2438 return reader_parse_comment(reader);
2439
2440 if (!reader_cmp(reader, piW))
2441 return reader_parse_pi(reader);
2442
2443 if (!reader_cmp(reader, cdstartW))
2444 return reader_parse_cdata(reader);
2445
2446 if (!reader_cmp(reader, ltW))
2447 return reader_parse_element(reader);
2448
2449 /* what's left must be CharData */
2450 return reader_parse_chardata(reader);
2451 }
2452
2453 static HRESULT reader_parse_nextnode(xmlreader *reader)
2454 {
2455 XmlNodeType nodetype = reader_get_nodetype(reader);
2456 HRESULT hr;
2457
2458 if (!is_reader_pending(reader))
2459 reader_clear_attrs(reader);
2460
2461 /* When moving from EndElement or empty element, pop its own namespace definitions */
2462 if (nodetype == XmlNodeType_Element && reader->is_empty_element)
2463 reader_pop_ns_nodes(reader, &reader->empty_element);
2464 else if (nodetype == XmlNodeType_EndElement)
2465 reader_pop_element(reader);
2466
2467 while (1)
2468 {
2469 switch (reader->instate)
2470 {
2471 /* if it's a first call for a new input we need to detect stream encoding */
2472 case XmlReadInState_Initial:
2473 {
2474 xml_encoding enc;
2475
2476 hr = readerinput_growraw(reader->input);
2477 if (FAILED(hr)) return hr;
2478
2479 /* try to detect encoding by BOM or data and set input code page */
2480 hr = readerinput_detectencoding(reader->input, &enc);
2481 TRACE("detected encoding %s, 0x%08x\n", debugstr_w(xml_encoding_map[enc].name), hr);
2482 if (FAILED(hr)) return hr;
2483
2484 /* always switch first time cause we have to put something in */
2485 readerinput_switchencoding(reader->input, enc);
2486
2487 /* parse xml declaration */
2488 hr = reader_parse_xmldecl(reader);
2489 if (FAILED(hr)) return hr;
2490
2491 readerinput_shrinkraw(reader->input, -1);
2492 reader->instate = XmlReadInState_Misc_DTD;
2493 if (hr == S_OK) return hr;
2494 }
2495 break;
2496 case XmlReadInState_Misc_DTD:
2497 hr = reader_parse_misc(reader);
2498 if (FAILED(hr)) return hr;
2499
2500 if (hr == S_FALSE)
2501 reader->instate = XmlReadInState_DTD;
2502 else
2503 return hr;
2504 break;
2505 case XmlReadInState_DTD:
2506 hr = reader_parse_dtd(reader);
2507 if (FAILED(hr)) return hr;
2508
2509 if (hr == S_OK)
2510 {
2511 reader->instate = XmlReadInState_DTD_Misc;
2512 return hr;
2513 }
2514 else
2515 reader->instate = XmlReadInState_Element;
2516 break;
2517 case XmlReadInState_DTD_Misc:
2518 hr = reader_parse_misc(reader);
2519 if (FAILED(hr)) return hr;
2520
2521 if (hr == S_FALSE)
2522 reader->instate = XmlReadInState_Element;
2523 else
2524 return hr;
2525 break;
2526 case XmlReadInState_Element:
2527 return reader_parse_element(reader);
2528 case XmlReadInState_Content:
2529 return reader_parse_content(reader);
2530 case XmlReadInState_MiscEnd:
2531 hr = reader_parse_misc(reader);
2532 if (FAILED(hr)) return hr;
2533
2534 if (hr == S_FALSE)
2535 reader->instate = XmlReadInState_Eof;
2536 return hr;
2537 case XmlReadInState_Eof:
2538 return S_FALSE;
2539 default:
2540 FIXME("internal state %d not handled\n", reader->instate);
2541 return E_NOTIMPL;
2542 }
2543 }
2544
2545 return E_NOTIMPL;
2546 }
2547
2548 static HRESULT WINAPI xmlreader_QueryInterface(IXmlReader *iface, REFIID riid, void** ppvObject)
2549 {
2550 xmlreader *This = impl_from_IXmlReader(iface);
2551
2552 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
2553
2554 if (IsEqualGUID(riid, &IID_IUnknown) ||
2555 IsEqualGUID(riid, &IID_IXmlReader))
2556 {
2557 *ppvObject = iface;
2558 }
2559 else
2560 {
2561 FIXME("interface %s not implemented\n", debugstr_guid(riid));
2562 *ppvObject = NULL;
2563 return E_NOINTERFACE;
2564 }
2565
2566 IXmlReader_AddRef(iface);
2567
2568 return S_OK;
2569 }
2570
2571 static ULONG WINAPI xmlreader_AddRef(IXmlReader *iface)
2572 {
2573 xmlreader *This = impl_from_IXmlReader(iface);
2574 ULONG ref = InterlockedIncrement(&This->ref);
2575 TRACE("(%p)->(%d)\n", This, ref);
2576 return ref;
2577 }
2578
2579 static void reader_clear_ns(xmlreader *reader)
2580 {
2581 struct ns *ns, *ns2;
2582
2583 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->ns, struct ns, entry) {
2584 reader_free_strvalued(reader, &ns->prefix);
2585 reader_free_strvalued(reader, &ns->uri);
2586 reader_free(reader, ns);
2587 }
2588
2589 LIST_FOR_EACH_ENTRY_SAFE(ns, ns2, &reader->nsdef, struct ns, entry) {
2590 reader_free_strvalued(reader, &ns->uri);
2591 reader_free(reader, ns);
2592 }
2593 }
2594
2595 static ULONG WINAPI xmlreader_Release(IXmlReader *iface)
2596 {
2597 xmlreader *This = impl_from_IXmlReader(iface);
2598 LONG ref = InterlockedDecrement(&This->ref);
2599
2600 TRACE("(%p)->(%d)\n", This, ref);
2601
2602 if (ref == 0)
2603 {
2604 IMalloc *imalloc = This->imalloc;
2605 if (This->input) IUnknown_Release(&This->input->IXmlReaderInput_iface);
2606 if (This->resolver) IXmlResolver_Release(This->resolver);
2607 if (This->mlang) IUnknown_Release(This->mlang);
2608 reader_clear_attrs(This);
2609 reader_clear_ns(This);
2610 reader_clear_elements(This);
2611 reader_free_strvalues(This);
2612 reader_free(This, This);
2613 if (imalloc) IMalloc_Release(imalloc);
2614 }
2615
2616 return ref;
2617 }
2618
2619 static HRESULT WINAPI xmlreader_SetInput(IXmlReader* iface, IUnknown *input)
2620 {
2621 xmlreader *This = impl_from_IXmlReader(iface);
2622 IXmlReaderInput *readerinput;
2623 HRESULT hr;
2624
2625 TRACE("(%p)->(%p)\n", This, input);
2626
2627 if (This->input)
2628 {
2629 readerinput_release_stream(This->input);
2630 IUnknown_Release(&This->input->IXmlReaderInput_iface);
2631 This->input = NULL;
2632 }
2633
2634 This->line = This->pos = 0;
2635 reader_clear_elements(This);
2636 This->depth = 0;
2637 This->resumestate = XmlReadResumeState_Initial;
2638 memset(This->resume, 0, sizeof(This->resume));
2639
2640 /* just reset current input */
2641 if (!input)
2642 {
2643 This->state = XmlReadState_Initial;
2644 return S_OK;
2645 }
2646
2647 /* now try IXmlReaderInput, ISequentialStream, IStream */
2648 hr = IUnknown_QueryInterface(input, &IID_IXmlReaderInput, (void**)&readerinput);
2649 if (hr == S_OK)
2650 {
2651 if (readerinput->lpVtbl == &xmlreaderinputvtbl)
2652 This->input = impl_from_IXmlReaderInput(readerinput);
2653 else
2654 {
2655 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2656 readerinput, readerinput->lpVtbl);
2657 IUnknown_Release(readerinput);
2658 return E_FAIL;
2659
2660 }
2661 }
2662
2663 if (hr != S_OK || !readerinput)
2664 {
2665 /* create IXmlReaderInput basing on supplied interface */
2666 hr = CreateXmlReaderInputWithEncodingName(input,
2667 This->imalloc, NULL, FALSE, NULL, &readerinput);
2668 if (hr != S_OK) return hr;
2669 This->input = impl_from_IXmlReaderInput(readerinput);
2670 }
2671
2672 /* set stream for supplied IXmlReaderInput */
2673 hr = readerinput_query_for_stream(This->input);
2674 if (hr == S_OK)
2675 {
2676 This->state = XmlReadState_Initial;
2677 This->instate = XmlReadInState_Initial;
2678 }
2679
2680 return hr;
2681 }
2682
2683 static HRESULT WINAPI xmlreader_GetProperty(IXmlReader* iface, UINT property, LONG_PTR *value)
2684 {
2685 xmlreader *This = impl_from_IXmlReader(iface);
2686
2687 TRACE("(%p)->(%s %p)\n", This, debugstr_reader_prop(property), value);
2688
2689 if (!value) return E_INVALIDARG;
2690
2691 switch (property)
2692 {
2693 case XmlReaderProperty_MultiLanguage:
2694 *value = (LONG_PTR)This->mlang;
2695 if (This->mlang)
2696 IUnknown_AddRef(This->mlang);
2697 break;
2698 case XmlReaderProperty_XmlResolver:
2699 *value = (LONG_PTR)This->resolver;
2700 if (This->resolver)
2701 IXmlResolver_AddRef(This->resolver);
2702 break;
2703 case XmlReaderProperty_DtdProcessing:
2704 *value = This->dtdmode;
2705 break;
2706 case XmlReaderProperty_ReadState:
2707 *value = This->state;
2708 break;
2709 default:
2710 FIXME("Unimplemented property (%u)\n", property);
2711 return E_NOTIMPL;
2712 }
2713
2714 return S_OK;
2715 }
2716
2717 static HRESULT WINAPI xmlreader_SetProperty(IXmlReader* iface, UINT property, LONG_PTR value)
2718 {
2719 xmlreader *This = impl_from_IXmlReader(iface);
2720
2721 TRACE("(%p)->(%s 0x%lx)\n", This, debugstr_reader_prop(property), value);
2722
2723 switch (property)
2724 {
2725 case XmlReaderProperty_MultiLanguage:
2726 if (This->mlang)
2727 IUnknown_Release(This->mlang);
2728 This->mlang = (IUnknown*)value;
2729 if (This->mlang)
2730 IUnknown_AddRef(This->mlang);
2731 if (This->mlang)
2732 FIXME("Ignoring MultiLanguage %p\n", This->mlang);
2733 break;
2734 case XmlReaderProperty_XmlResolver:
2735 if (This->resolver)
2736 IXmlResolver_Release(This->resolver);
2737 This->resolver = (IXmlResolver*)value;
2738 if (This->resolver)
2739 IXmlResolver_AddRef(This->resolver);
2740 break;
2741 case XmlReaderProperty_DtdProcessing:
2742 if (value < 0 || value > _DtdProcessing_Last) return E_INVALIDARG;
2743 This->dtdmode = value;
2744 break;
2745 case XmlReaderProperty_MaxElementDepth:
2746 FIXME("Ignoring MaxElementDepth %ld\n", value);
2747 break;
2748 default:
2749 FIXME("Unimplemented property (%u)\n", property);
2750 return E_NOTIMPL;
2751 }
2752
2753 return S_OK;
2754 }
2755
2756 static HRESULT WINAPI xmlreader_Read(IXmlReader* iface, XmlNodeType *nodetype)
2757 {
2758 xmlreader *This = impl_from_IXmlReader(iface);
2759 XmlNodeType oldtype = This->nodetype;
2760 HRESULT hr;
2761
2762 TRACE("(%p)->(%p)\n", This, nodetype);
2763
2764 if (This->state == XmlReadState_Closed) return S_FALSE;
2765
2766 hr = reader_parse_nextnode(This);
2767 if (oldtype == XmlNodeType_None && This->nodetype != oldtype)
2768 This->state = XmlReadState_Interactive;
2769 if (hr == S_OK)
2770 {
2771 TRACE("node type %s\n", debugstr_nodetype(This->nodetype));
2772 if (nodetype)
2773 *nodetype = This->nodetype;
2774 }
2775
2776 return hr;
2777 }
2778
2779 static HRESULT WINAPI xmlreader_GetNodeType(IXmlReader* iface, XmlNodeType *node_type)
2780 {
2781 xmlreader *This = impl_from_IXmlReader(iface);
2782
2783 TRACE("(%p)->(%p)\n", This, node_type);
2784
2785 if (!node_type)
2786 return E_INVALIDARG;
2787
2788 *node_type = reader_get_nodetype(This);
2789 return This->state == XmlReadState_Closed ? S_FALSE : S_OK;
2790 }
2791
2792 static HRESULT reader_move_to_first_attribute(xmlreader *reader)
2793 {
2794 if (!reader->attr_count)
2795 return S_FALSE;
2796
2797 reader->attr = LIST_ENTRY(list_head(&reader->attrs), struct attribute, entry);
2798 reader_set_strvalue(reader, StringValue_Prefix, &reader->attr->prefix);
2799 reader_set_strvalue(reader, StringValue_LocalName, &reader->attr->localname);
2800 reader_set_strvalue(reader, StringValue_Value, &reader->attr->value);
2801
2802 return S_OK;
2803 }
2804
2805 static HRESULT WINAPI xmlreader_MoveToFirstAttribute(IXmlReader* iface)
2806 {
2807 xmlreader *This = impl_from_IXmlReader(iface);
2808
2809 TRACE("(%p)\n", This);
2810
2811 return reader_move_to_first_attribute(This);
2812 }
2813
2814 static HRESULT WINAPI xmlreader_MoveToNextAttribute(IXmlReader* iface)
2815 {
2816 xmlreader *This = impl_from_IXmlReader(iface);
2817 const struct list *next;
2818
2819 TRACE("(%p)\n", This);
2820
2821 if (!This->attr_count) return S_FALSE;
2822
2823 if (!This->attr)
2824 return reader_move_to_first_attribute(This);
2825
2826 next = list_next(&This->attrs, &This->attr->entry);
2827 if (next)
2828 {
2829 This->attr = LIST_ENTRY(next, struct attribute, entry);
2830 reader_set_strvalue(This, StringValue_Prefix, &This->attr->prefix);
2831 reader_set_strvalue(This, StringValue_LocalName, &This->attr->localname);
2832 reader_set_strvalue(This, StringValue_Value, &This->attr->value);
2833 }
2834
2835 return next ? S_OK : S_FALSE;
2836 }
2837
2838 static HRESULT WINAPI xmlreader_MoveToAttributeByName(IXmlReader* iface,
2839 LPCWSTR local_name,
2840 LPCWSTR namespaceUri)
2841 {
2842 FIXME("(%p %p %p): stub\n", iface, local_name, namespaceUri);
2843 return E_NOTIMPL;
2844 }
2845
2846 static HRESULT WINAPI xmlreader_MoveToElement(IXmlReader* iface)
2847 {
2848 xmlreader *This = impl_from_IXmlReader(iface);
2849
2850 TRACE("(%p)\n", This);
2851
2852 if (!This->attr_count) return S_FALSE;
2853 This->attr = NULL;
2854
2855 /* FIXME: support other node types with 'attributes' like DTD */
2856 if (This->is_empty_element) {
2857 reader_set_strvalue(This, StringValue_LocalName, &This->empty_element.localname);
2858 reader_set_strvalue(This, StringValue_QualifiedName, &This->empty_element.qname);
2859 }
2860 else {
2861 struct element *element = LIST_ENTRY(list_head(&This->elements), struct element, entry);
2862 if (element) {
2863 reader_set_strvalue(This, StringValue_LocalName, &element->localname);
2864 reader_set_strvalue(This, StringValue_QualifiedName, &element->qname);
2865 }
2866 }
2867
2868 return S_OK;
2869 }
2870
2871 static HRESULT WINAPI xmlreader_GetQualifiedName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2872 {
2873 xmlreader *This = impl_from_IXmlReader(iface);
2874
2875 TRACE("(%p)->(%p %p)\n", This, name, len);
2876 *name = This->strvalues[StringValue_QualifiedName].str;
2877 if (len) *len = This->strvalues[StringValue_QualifiedName].len;
2878 return S_OK;
2879 }
2880
2881 static struct ns *reader_lookup_ns(xmlreader *reader, const strval *prefix)
2882 {
2883 struct list *nslist = prefix ? &reader->ns : &reader->nsdef;
2884 struct ns *ns;
2885
2886 LIST_FOR_EACH_ENTRY_REV(ns, nslist, struct ns, entry) {
2887 if (strval_eq(reader, prefix, &ns->prefix))
2888 return ns;
2889 }
2890
2891 return NULL;
2892 }
2893
2894 static struct ns *reader_lookup_nsdef(xmlreader *reader)
2895 {
2896 if (list_empty(&reader->nsdef))
2897 return NULL;
2898
2899 return LIST_ENTRY(list_head(&reader->nsdef), struct ns, entry);
2900 }
2901
2902 static HRESULT WINAPI xmlreader_GetNamespaceUri(IXmlReader* iface, const WCHAR **uri, UINT *len)
2903 {
2904 xmlreader *This = impl_from_IXmlReader(iface);
2905 const strval *prefix = &This->strvalues[StringValue_Prefix];
2906 XmlNodeType nodetype;
2907 struct ns *ns;
2908 UINT length;
2909
2910 TRACE("(%p %p %p)\n", iface, uri, len);
2911
2912 if (!len)
2913 len = &length;
2914
2915 *uri = NULL;
2916 *len = 0;
2917
2918 switch ((nodetype = reader_get_nodetype(This)))
2919 {
2920 case XmlNodeType_Attribute:
2921 {
2922 static const WCHAR xmlns_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
2923 '2','0','0','0','/','x','m','l','n','s','/',0};
2924 static const WCHAR xml_uriW[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
2925 'X','M','L','/','1','9','9','8','/','n','a','m','e','s','p','a','c','e',0};
2926 const strval *local = &This->strvalues[StringValue_LocalName];
2927
2928 /* check for reserved prefixes first */
2929 if ((strval_eq(This, prefix, &strval_empty) && strval_eq(This, local, &strval_xmlns)) ||
2930 strval_eq(This, prefix, &strval_xmlns))
2931 {
2932 *uri = xmlns_uriW;
2933 *len = sizeof(xmlns_uriW)/sizeof(xmlns_uriW[0]) - 1;
2934 }
2935 else if (strval_eq(This, prefix, &strval_xml)) {
2936 *uri = xml_uriW;
2937 *len = sizeof(xml_uriW)/sizeof(xml_uriW[0]) - 1;
2938 }
2939
2940 if (!*uri) {
2941 ns = reader_lookup_ns(This, prefix);
2942 if (ns) {
2943 *uri = ns->uri.str;
2944 *len = ns->uri.len;
2945 }
2946 else {
2947 *uri = emptyW;
2948 *len = 0;
2949 }
2950 }
2951 }
2952 break;
2953 case XmlNodeType_Element:
2954 case XmlNodeType_EndElement:
2955 {
2956 ns = reader_lookup_ns(This, prefix);
2957
2958 /* pick top default ns if any */
2959 if (!ns)
2960 ns = reader_lookup_nsdef(This);
2961
2962 if (ns) {
2963 *uri = ns->uri.str;
2964 *len = ns->uri.len;
2965 }
2966 else {
2967 *uri = emptyW;
2968 *len = 0;
2969 }
2970 }
2971 break;
2972 default:
2973 FIXME("Unhandled node type %d\n", nodetype);
2974 return E_NOTIMPL;
2975 }
2976
2977 return S_OK;
2978 }
2979
2980 static HRESULT WINAPI xmlreader_GetLocalName(IXmlReader* iface, LPCWSTR *name, UINT *len)
2981 {
2982 xmlreader *This = impl_from_IXmlReader(iface);
2983
2984 TRACE("(%p)->(%p %p)\n", This, name, len);
2985 *name = This->strvalues[StringValue_LocalName].str;
2986 if (len) *len = This->strvalues[StringValue_LocalName].len;
2987 return S_OK;
2988 }
2989
2990 static HRESULT WINAPI xmlreader_GetPrefix(IXmlReader* iface, LPCWSTR *prefix, UINT *len)
2991 {
2992 xmlreader *This = impl_from_IXmlReader(iface);
2993
2994 TRACE("(%p)->(%p %p)\n", This, prefix, len);
2995 *prefix = This->strvalues[StringValue_Prefix].str;
2996 if (len) *len = This->strvalues[StringValue_Prefix].len;
2997 return S_OK;
2998 }
2999
3000 static BOOL is_namespace_definition(xmlreader *reader)
3001 {
3002 const strval *local = &reader->strvalues[StringValue_LocalName];
3003 const strval *prefix = &reader->strvalues[StringValue_Prefix];
3004
3005 if (reader_get_nodetype(reader) != XmlNodeType_Attribute)
3006 return FALSE;
3007
3008 return ((strval_eq(reader, prefix, &strval_empty) && strval_eq(reader, local, &strval_xmlns)) ||
3009 strval_eq(reader, prefix, &strval_xmlns));
3010 }
3011
3012 static HRESULT WINAPI xmlreader_GetValue(IXmlReader* iface, const WCHAR **value, UINT *len)
3013 {
3014 xmlreader *reader = impl_from_IXmlReader(iface);
3015 strval *val = &reader->strvalues[StringValue_Value];
3016
3017 TRACE("(%p)->(%p %p)\n", reader, value, len);
3018
3019 *value = NULL;
3020
3021 if ((reader->nodetype == XmlNodeType_Comment && !val->str) || is_reader_pending(reader))
3022 {
3023 XmlNodeType type;
3024 HRESULT hr;
3025
3026 hr = IXmlReader_Read(iface, &type);
3027 if (FAILED(hr)) return hr;
3028
3029 /* return if still pending, partially read values are not reported */
3030 if (is_reader_pending(reader)) return E_PENDING;
3031 }
3032
3033 if (!val->str)
3034 {
3035 WCHAR *ptr = reader_alloc(reader, (val->len+1)*sizeof(WCHAR));
3036 if (!ptr) return E_OUTOFMEMORY;
3037 memcpy(ptr, reader_get_strptr(reader, val), val->len*sizeof(WCHAR));
3038 ptr[val->len] = 0;
3039 val->str = ptr;
3040 }
3041
3042 /* For namespace definition attributes return values from namespace list */
3043 if (is_namespace_definition(reader)) {
3044 const strval *local = &reader->strvalues[StringValue_LocalName];
3045 struct ns *ns;
3046
3047 ns = reader_lookup_ns(reader, local);
3048 if (!ns)
3049 ns = reader_lookup_nsdef(reader);
3050
3051 val = &ns->uri;
3052 }
3053
3054 *value = val->str;
3055 if (len) *len = val->len;
3056 return S_OK;
3057 }
3058
3059 static HRESULT WINAPI xmlreader_ReadValueChunk(IXmlReader* iface, WCHAR *buffer, UINT chunk_size, UINT *read)
3060 {
3061 xmlreader *reader = impl_from_IXmlReader(iface);
3062 strval *val = &reader->strvalues[StringValue_Value];
3063 UINT len;
3064
3065 TRACE("(%p)->(%p %u %p)\n", reader, buffer, chunk_size, read);
3066
3067 /* Value is already allocated, chunked reads are not possible. */
3068 if (val->str) return S_FALSE;
3069
3070 if (val->len)
3071 {
3072 len = min(chunk_size, val->len);
3073 memcpy(buffer, reader_get_ptr2(reader, val->start), len);
3074 val->start += len;
3075 val->len -= len;
3076 if (read) *read = len;
3077 }
3078
3079 return S_OK;
3080 }
3081
3082 static HRESULT WINAPI xmlreader_GetBaseUri(IXmlReader* iface,
3083 LPCWSTR *baseUri,
3084 UINT *baseUri_length)
3085 {
3086 FIXME("(%p %p %p): stub\n", iface, baseUri, baseUri_length);
3087 return E_NOTIMPL;
3088 }
3089
3090 static BOOL WINAPI xmlreader_IsDefault(IXmlReader* iface)
3091 {
3092 FIXME("(%p): stub\n", iface);
3093 return FALSE;
3094 }
3095
3096 static BOOL WINAPI xmlreader_IsEmptyElement(IXmlReader* iface)
3097 {
3098 xmlreader *This = impl_from_IXmlReader(iface);
3099 TRACE("(%p)\n", This);
3100 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
3101 when current node is start tag of an element */
3102 return (reader_get_nodetype(This) == XmlNodeType_Element) ? This->is_empty_element : FALSE;
3103 }
3104
3105 static HRESULT WINAPI xmlreader_GetLineNumber(IXmlReader* iface, UINT *lineNumber)
3106 {
3107 xmlreader *This = impl_from_IXmlReader(iface);
3108
3109 TRACE("(%p %p)\n", This, lineNumber);
3110
3111 if (!lineNumber) return E_INVALIDARG;
3112
3113 *lineNumber = This->line;
3114
3115 return S_OK;
3116 }
3117
3118 static HRESULT WINAPI xmlreader_GetLinePosition(IXmlReader* iface, UINT *linePosition)
3119 {
3120 xmlreader *This = impl_from_IXmlReader(iface);
3121
3122 TRACE("(%p %p)\n", This, linePosition);
3123
3124 if (!linePosition) return E_INVALIDARG;
3125
3126 *linePosition = This->pos;
3127
3128 return S_OK;
3129 }
3130
3131 static HRESULT WINAPI xmlreader_GetAttributeCount(IXmlReader* iface, UINT *count)
3132 {
3133 xmlreader *This = impl_from_IXmlReader(iface);
3134
3135 TRACE("(%p)->(%p)\n", This, count);
3136
3137 if (!count) return E_INVALIDARG;
3138
3139 *count = This->attr_count;
3140 return S_OK;
3141 }
3142
3143 static HRESULT WINAPI xmlreader_GetDepth(IXmlReader* iface, UINT *depth)
3144 {
3145 xmlreader *This = impl_from_IXmlReader(iface);
3146 TRACE("(%p)->(%p)\n", This, depth);
3147 *depth = This->depth;
3148 return S_OK;
3149 }
3150
3151 static BOOL WINAPI xmlreader_IsEOF(IXmlReader* iface)
3152 {
3153 FIXME("(%p): stub\n", iface);
3154 return FALSE;
3155 }
3156
3157 static const struct IXmlReaderVtbl xmlreader_vtbl =
3158 {
3159 xmlreader_QueryInterface,
3160 xmlreader_AddRef,
3161 xmlreader_Release,
3162 xmlreader_SetInput,
3163 xmlreader_GetProperty,
3164 xmlreader_SetProperty,
3165 xmlreader_Read,
3166 xmlreader_GetNodeType,
3167 xmlreader_MoveToFirstAttribute,
3168 xmlreader_MoveToNextAttribute,
3169 xmlreader_MoveToAttributeByName,
3170 xmlreader_MoveToElement,
3171 xmlreader_GetQualifiedName,
3172 xmlreader_GetNamespaceUri,
3173 xmlreader_GetLocalName,
3174 xmlreader_GetPrefix,
3175 xmlreader_GetValue,
3176 xmlreader_ReadValueChunk,
3177 xmlreader_GetBaseUri,
3178 xmlreader_IsDefault,
3179 xmlreader_IsEmptyElement,
3180 xmlreader_GetLineNumber,
3181 xmlreader_GetLinePosition,
3182 xmlreader_GetAttributeCount,
3183 xmlreader_GetDepth,
3184 xmlreader_IsEOF
3185 };
3186
3187 /** IXmlReaderInput **/
3188 static HRESULT WINAPI xmlreaderinput_QueryInterface(IXmlReaderInput *iface, REFIID riid, void** ppvObject)
3189 {
3190 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3191
3192 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppvObject);
3193
3194 if (IsEqualGUID(riid, &IID_IXmlReaderInput) ||
3195 IsEqualGUID(riid, &IID_IUnknown))
3196 {
3197 *ppvObject = iface;
3198 }
3199 else
3200 {
3201 WARN("interface %s not implemented\n", debugstr_guid(riid));
3202 *ppvObject = NULL;
3203 return E_NOINTERFACE;
3204 }
3205
3206 IUnknown_AddRef(iface);
3207
3208 return S_OK;
3209 }
3210
3211 static ULONG WINAPI xmlreaderinput_AddRef(IXmlReaderInput *iface)
3212 {
3213 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3214 ULONG ref = InterlockedIncrement(&This->ref);
3215 TRACE("(%p)->(%d)\n", This, ref);
3216 return ref;
3217 }
3218
3219 static ULONG WINAPI xmlreaderinput_Release(IXmlReaderInput *iface)
3220 {
3221 xmlreaderinput *This = impl_from_IXmlReaderInput(iface);
3222 LONG ref = InterlockedDecrement(&This->ref);
3223
3224 TRACE("(%p)->(%d)\n", This, ref);
3225
3226 if (ref == 0)
3227 {
3228 IMalloc *imalloc = This->imalloc;
3229 if (This->input) IUnknown_Release(This->input);
3230 if (This->stream) ISequentialStream_Release(This->stream);
3231 if (This->buffer) free_input_buffer(This->buffer);
3232 readerinput_free(This, This->baseuri);
3233 readerinput_free(This, This);
3234 if (imalloc) IMalloc_Release(imalloc);
3235 }
3236
3237 return ref;
3238 }
3239
3240 static const struct IUnknownVtbl xmlreaderinputvtbl =
3241 {
3242 xmlreaderinput_QueryInterface,
3243 xmlreaderinput_AddRef,
3244 xmlreaderinput_Release
3245 };
3246
3247 HRESULT WINAPI CreateXmlReader(REFIID riid, void **obj, IMalloc *imalloc)
3248 {
3249 xmlreader *reader;
3250 int i;
3251
3252 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid), obj, imalloc);
3253
3254 if (!IsEqualGUID(riid, &IID_IXmlReader))
3255 {
3256 ERR("Unexpected IID requested -> (%s)\n", wine_dbgstr_guid(riid));
3257 return E_FAIL;
3258 }
3259
3260 if (imalloc)
3261 reader = IMalloc_Alloc(imalloc, sizeof(*reader));
3262 else
3263 reader = heap_alloc(sizeof(*reader));
3264 if(!reader) return E_OUTOFMEMORY;
3265
3266 reader->IXmlReader_iface.lpVtbl = &xmlreader_vtbl;
3267 reader->ref = 1;
3268 reader->input = NULL;
3269 reader->state = XmlReadState_Closed;
3270 reader->instate = XmlReadInState_Initial;
3271 reader->resumestate = XmlReadResumeState_Initial;
3272 reader->dtdmode = DtdProcessing_Prohibit;
3273 reader->resolver = NULL;
3274 reader->mlang = NULL;
3275 reader->line = reader->pos = 0;
3276 reader->imalloc = imalloc;
3277 if (imalloc) IMalloc_AddRef(imalloc);
3278 reader->nodetype = XmlNodeType_None;
3279 list_init(&reader->attrs);
3280 reader->attr_count = 0;
3281 reader->attr = NULL;
3282 list_init(&reader->nsdef);
3283 list_init(&reader->ns);
3284 list_init(&reader->elements);
3285 reader->depth = 0;
3286 reader->max_depth = 256;
3287 reader->is_empty_element = FALSE;
3288 memset(reader->resume, 0, sizeof(reader->resume));
3289
3290 for (i = 0; i < StringValue_Last; i++)
3291 reader->strvalues[i] = strval_empty;
3292
3293 *obj = &reader->IXmlReader_iface;
3294
3295 TRACE("returning iface %p\n", *obj);
3296
3297 return S_OK;
3298 }
3299
3300 HRESULT WINAPI CreateXmlReaderInputWithEncodingName(IUnknown *stream,
3301 IMalloc *imalloc,
3302 LPCWSTR encoding,
3303 BOOL hint,
3304 LPCWSTR base_uri,
3305 IXmlReaderInput **ppInput)
3306 {
3307 xmlreaderinput *readerinput;
3308 HRESULT hr;
3309
3310 TRACE("%p %p %s %d %s %p\n", stream, imalloc, wine_dbgstr_w(encoding),
3311 hint, wine_dbgstr_w(base_uri), ppInput);
3312
3313 if (!stream || !ppInput) return E_INVALIDARG;
3314
3315 if (imalloc)
3316 readerinput = IMalloc_Alloc(imalloc, sizeof(*readerinput));
3317 else
3318 readerinput = heap_alloc(sizeof(*readerinput));
3319 if(!readerinput) return E_OUTOFMEMORY;
3320
3321 readerinput->IXmlReaderInput_iface.lpVtbl = &xmlreaderinputvtbl;
3322 readerinput->ref = 1;
3323 readerinput->imalloc = imalloc;
3324 readerinput->stream = NULL;
3325 if (imalloc) IMalloc_AddRef(imalloc);
3326 readerinput->encoding = parse_encoding_name(encoding, -1);
3327 readerinput->hint = hint;
3328 readerinput->baseuri = readerinput_strdupW(readerinput, base_uri);
3329 readerinput->pending = 0;
3330
3331 hr = alloc_input_buffer(readerinput);
3332 if (hr != S_OK)
3333 {
3334 readerinput_free(readerinput, readerinput->baseuri);
3335 readerinput_free(readerinput, readerinput);
3336 if (imalloc) IMalloc_Release(imalloc);
3337 return hr;
3338 }
3339 IUnknown_QueryInterface(stream, &IID_IUnknown, (void**)&readerinput->input);
3340
3341 *ppInput = &readerinput->IXmlReaderInput_iface;
3342
3343 TRACE("returning iface %p\n", *ppInput);
3344
3345 return S_OK;
3346 }